Bug Summary

File:build/source/openmp/runtime/src/kmp_barrier.cpp
Warning:line 2515, column 7
Called function pointer is null (null dereference)

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name kmp_barrier.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-17/lib/clang/17 -D _DEBUG -D _GLIBCXX_ASSERTIONS -D _GNU_SOURCE -D _LIBCPP_ENABLE_ASSERTIONS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D omp_EXPORTS -I projects/openmp/runtime/src -I /build/source/openmp/runtime/src -I include -I /build/source/llvm/include -I /build/source/openmp/runtime/src/i18n -I /build/source/openmp/runtime/src/include -I /build/source/openmp/runtime/src/thirdparty/ittnotify -D _FORTIFY_SOURCE=2 -D NDEBUG -D _GNU_SOURCE -D _REENTRANT -D _FORTIFY_SOURCE=2 -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-17/lib/clang/17/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/source/= -source-date-epoch 1683717183 -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -Wno-extra -Wno-pedantic -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-frame-address -Wno-strict-aliasing -Wno-stringop-truncation -Wno-switch -Wno-uninitialized -Wno-cast-qual -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fno-rtti -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2023-05-10-133810-16478-1 -x c++ /build/source/openmp/runtime/src/kmp_barrier.cpp

/build/source/openmp/runtime/src/kmp_barrier.cpp

1/*
2 * kmp_barrier.cpp
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#include "kmp_wait_release.h"
14#include "kmp_barrier.h"
15#include "kmp_itt.h"
16#include "kmp_os.h"
17#include "kmp_stats.h"
18#include "ompt-specific.h"
19// for distributed barrier
20#include "kmp_affinity.h"
21
22#if KMP_MIC0
23#include <immintrin.h>
24#define USE_NGO_STORES 1
25#endif // KMP_MIC
26
27#if KMP_MIC0 && USE_NGO_STORES
28// ICV copying
29#define ngo_load(src)((void)0) __m512d Vt = _mm512_load_pd((void *)(src))
30#define ngo_store_icvs(dst, src)copy_icvs((dst), (src)) _mm512_storenrngo_pd((void *)(dst), Vt)
31#define ngo_store_go(dst, src)memcpy((dst), (src), 64) _mm512_storenrngo_pd((void *)(dst), Vt)
32#define ngo_sync()((void)0) __asm__ volatile("lock; addl $0,0(%%rsp)" ::: "memory")
33#else
34#define ngo_load(src)((void)0) ((void)0)
35#define ngo_store_icvs(dst, src)copy_icvs((dst), (src)) copy_icvs((dst), (src))
36#define ngo_store_go(dst, src)memcpy((dst), (src), 64) KMP_MEMCPYmemcpy((dst), (src), CACHE_LINE64)
37#define ngo_sync()((void)0) ((void)0)
38#endif /* KMP_MIC && USE_NGO_STORES */
39
40void __kmp_print_structure(void); // Forward declaration
41
42// ---------------------------- Barrier Algorithms ----------------------------
43// Distributed barrier
44
45// Compute how many threads to have polling each cache-line.
46// We want to limit the number of writes to IDEAL_GO_RESOLUTION.
47void distributedBarrier::computeVarsForN(size_t n) {
48 int nsockets = 1;
49 if (__kmp_topology) {
50 int socket_level = __kmp_topology->get_level(KMP_HW_SOCKET);
51 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
52 int ncores_per_socket =
53 __kmp_topology->calculate_ratio(core_level, socket_level);
54 nsockets = __kmp_topology->get_count(socket_level);
55
56 if (nsockets <= 0)
57 nsockets = 1;
58 if (ncores_per_socket <= 0)
59 ncores_per_socket = 1;
60
61 threads_per_go = ncores_per_socket >> 1;
62 if (!fix_threads_per_go) {
63 // Minimize num_gos
64 if (threads_per_go > 4) {
65 if (KMP_OPTIMIZE_FOR_REDUCTIONS0) {
66 threads_per_go = threads_per_go >> 1;
67 }
68 if (threads_per_go > 4 && nsockets == 1)
69 threads_per_go = threads_per_go >> 1;
70 }
71 }
72 if (threads_per_go == 0)
73 threads_per_go = 1;
74 fix_threads_per_go = true;
75 num_gos = n / threads_per_go;
76 if (n % threads_per_go)
77 num_gos++;
78 if (nsockets == 1 || num_gos == 1)
79 num_groups = 1;
80 else {
81 num_groups = num_gos / nsockets;
82 if (num_gos % nsockets)
83 num_groups++;
84 }
85 if (num_groups <= 0)
86 num_groups = 1;
87 gos_per_group = num_gos / num_groups;
88 if (num_gos % num_groups)
89 gos_per_group++;
90 threads_per_group = threads_per_go * gos_per_group;
91 } else {
92 num_gos = n / threads_per_go;
93 if (n % threads_per_go)
94 num_gos++;
95 if (num_gos == 1)
96 num_groups = 1;
97 else {
98 num_groups = num_gos / 2;
99 if (num_gos % 2)
100 num_groups++;
101 }
102 gos_per_group = num_gos / num_groups;
103 if (num_gos % num_groups)
104 gos_per_group++;
105 threads_per_group = threads_per_go * gos_per_group;
106 }
107}
108
109void distributedBarrier::computeGo(size_t n) {
110 // Minimize num_gos
111 for (num_gos = 1;; num_gos++)
112 if (IDEAL_CONTENTION * num_gos >= n)
113 break;
114 threads_per_go = n / num_gos;
115 if (n % num_gos)
116 threads_per_go++;
117 while (num_gos > MAX_GOS) {
118 threads_per_go++;
119 num_gos = n / threads_per_go;
120 if (n % threads_per_go)
121 num_gos++;
122 }
123 computeVarsForN(n);
124}
125
126// This function is to resize the barrier arrays when the new number of threads
127// exceeds max_threads, which is the current size of all the arrays
128void distributedBarrier::resize(size_t nthr) {
129 KMP_DEBUG_ASSERT(nthr > max_threads)if (!(nthr > max_threads)) { __kmp_debug_assert("nthr > max_threads"
, "openmp/runtime/src/kmp_barrier.cpp", 129); }
;
130
131 // expand to requested size * 2
132 max_threads = nthr * 2;
133
134 // allocate arrays to new max threads
135 for (int i = 0; i < MAX_ITERS; ++i) {
136 if (flags[i])
137 flags[i] = (flags_s *)KMP_INTERNAL_REALLOC(flags[i],realloc((flags[i]), (max_threads * sizeof(flags_s)))
138 max_threads * sizeof(flags_s))realloc((flags[i]), (max_threads * sizeof(flags_s)));
139 else
140 flags[i] = (flags_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(flags_s))malloc(max_threads * sizeof(flags_s));
141 }
142
143 if (go)
144 go = (go_s *)KMP_INTERNAL_REALLOC(go, max_threads * sizeof(go_s))realloc((go), (max_threads * sizeof(go_s)));
145 else
146 go = (go_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(go_s))malloc(max_threads * sizeof(go_s));
147
148 if (iter)
149 iter = (iter_s *)KMP_INTERNAL_REALLOC(iter, max_threads * sizeof(iter_s))realloc((iter), (max_threads * sizeof(iter_s)));
150 else
151 iter = (iter_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(iter_s))malloc(max_threads * sizeof(iter_s));
152
153 if (sleep)
154 sleep =
155 (sleep_s *)KMP_INTERNAL_REALLOC(sleep, max_threads * sizeof(sleep_s))realloc((sleep), (max_threads * sizeof(sleep_s)));
156 else
157 sleep = (sleep_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(sleep_s))malloc(max_threads * sizeof(sleep_s));
158}
159
160// This function is to set all the go flags that threads might be waiting
161// on, and when blocktime is not infinite, it should be followed by a wake-up
162// call to each thread
163kmp_uint64 distributedBarrier::go_release() {
164 kmp_uint64 next_go = iter[0].iter + distributedBarrier::MAX_ITERS;
165 for (size_t j = 0; j < num_gos; j++) {
166 go[j].go.store(next_go);
167 }
168 return next_go;
169}
170
171void distributedBarrier::go_reset() {
172 for (size_t j = 0; j < max_threads; ++j) {
173 for (size_t i = 0; i < distributedBarrier::MAX_ITERS; ++i) {
174 flags[i][j].stillNeed = 1;
175 }
176 go[j].go.store(0);
177 iter[j].iter = 0;
178 }
179}
180
181// This function inits/re-inits the distributed barrier for a particular number
182// of threads. If a resize of arrays is needed, it calls the resize function.
183void distributedBarrier::init(size_t nthr) {
184 size_t old_max = max_threads;
185 if (nthr > max_threads) { // need more space in arrays
186 resize(nthr);
187 }
188
189 for (size_t i = 0; i < max_threads; i++) {
190 for (size_t j = 0; j < distributedBarrier::MAX_ITERS; j++) {
191 flags[j][i].stillNeed = 1;
192 }
193 go[i].go.store(0);
194 iter[i].iter = 0;
195 if (i >= old_max)
196 sleep[i].sleep = false;
197 }
198
199 // Recalculate num_gos, etc. based on new nthr
200 computeVarsForN(nthr);
201
202 num_threads = nthr;
203
204 if (team_icvs == NULL__null)
205 team_icvs = __kmp_allocate(sizeof(kmp_internal_control_t))___kmp_allocate((sizeof(kmp_internal_control_t)), "openmp/runtime/src/kmp_barrier.cpp"
, 205)
;
206}
207
208// This function is used only when KMP_BLOCKTIME is not infinite.
209// static
210void __kmp_dist_barrier_wakeup(enum barrier_type bt, kmp_team_t *team,
211 size_t start, size_t stop, size_t inc,
212 size_t tid) {
213 KMP_DEBUG_ASSERT(__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME)if (!(__kmp_dflt_blocktime != (2147483647))) { __kmp_debug_assert
("__kmp_dflt_blocktime != (2147483647)", "openmp/runtime/src/kmp_barrier.cpp"
, 213); }
;
214 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
215 return;
216
217 kmp_info_t **other_threads = team->t.t_threads;
218 for (size_t thr = start; thr < stop; thr += inc) {
219 KMP_DEBUG_ASSERT(other_threads[thr])if (!(other_threads[thr])) { __kmp_debug_assert("other_threads[thr]"
, "openmp/runtime/src/kmp_barrier.cpp", 219); }
;
220 int gtid = other_threads[thr]->th.th_info.ds.ds_gtid;
221 // Wake up worker regardless of if it appears to be sleeping or not
222 __kmp_atomic_resume_64(gtid, (kmp_atomic_flag_64<> *)NULL__null);
223 }
224}
225
226static void __kmp_dist_barrier_gather(
227 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
228 void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
229 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_dist_gather)((void)0);
230 kmp_team_t *team;
231 distributedBarrier *b;
232 kmp_info_t **other_threads;
233 kmp_uint64 my_current_iter, my_next_iter;
234 kmp_uint32 nproc;
235 bool group_leader;
236
237 team = this_thr->th.th_team;
238 nproc = this_thr->th.th_team_nproc;
239 other_threads = team->t.t_threads;
240 b = team->t.b;
241 my_current_iter = b->iter[tid].iter;
242 my_next_iter = (my_current_iter + 1) % distributedBarrier::MAX_ITERS;
243 group_leader = ((tid % b->threads_per_group) == 0);
244
245 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_gather: T#%d(%d:%d) enter; barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
246 ("__kmp_dist_barrier_gather: T#%d(%d:%d) enter; barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_gather: T#%d(%d:%d) enter; barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
247 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_gather: T#%d(%d:%d) enter; barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
248
249#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
250 // Barrier imbalance - save arrive time to the thread
251 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
252 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
253 __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0();
254 }
255#endif
256
257 if (group_leader) {
258 // Start from the thread after the group leader
259 size_t group_start = tid + 1;
260 size_t group_end = tid + b->threads_per_group;
261 size_t threads_pending = 0;
262
263 if (group_end > nproc)
264 group_end = nproc;
265 do { // wait for threads in my group
266 threads_pending = 0;
267 // Check all the flags every time to avoid branch misspredict
268 for (size_t thr = group_start; thr < group_end; thr++) {
269 // Each thread uses a different cache line
270 threads_pending += b->flags[my_current_iter][thr].stillNeed;
271 }
272 // Execute tasks here
273 if (__kmp_tasking_mode != tskm_immediate_exec) {
274 kmp_task_team_t *task_team = this_thr->th.th_task_team;
275 if (task_team != NULL__null) {
276 if (TCR_SYNC_4(task_team->tt.tt_active)(task_team->tt.tt_active)) {
277 if (KMP_TASKING_ENABLED(task_team)((!0) == ((task_team)->tt.tt_found_tasks))) {
278 int tasks_completed = FALSE0;
279 __kmp_atomic_execute_tasks_64(
280 this_thr, gtid, (kmp_atomic_flag_64<> *)NULL__null, FALSE0,
281 &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj, 0);
282 } else
283 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP1;
284 }
285 } else {
286 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP1;
287 } // if
288 }
289 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) {
290 if (__kmp_global.g.g_abort)
291 __kmp_abort_thread();
292 break;
293 } else if (__kmp_tasking_mode != tskm_immediate_exec &&
294 this_thr->th.th_reap_state == KMP_SAFE_TO_REAP1) {
295 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP0;
296 }
297 } while (threads_pending > 0);
298
299 if (reduce) { // Perform reduction if needed
300 OMPT_REDUCTION_DECL(this_thr, gtid)ompt_data_t *my_task_data = (&(this_thr->th.th_current_task
->ompt_task_info.task_data)); ompt_data_t *my_parallel_data
= (&(this_thr->th.th_team->t.ompt_team_info.parallel_data
)); void *return_address = __ompt_load_return_address(gtid);
;
301 OMPT_REDUCTION_BEGINif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_begin, my_parallel_data, my_task_data, return_address
); }
;
302 // Group leader reduces all threads in group
303 for (size_t thr = group_start; thr < group_end; thr++) {
304 (*reduce)(this_thr->th.th_local.reduce_data,
305 other_threads[thr]->th.th_local.reduce_data);
306 }
307 OMPT_REDUCTION_ENDif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_end, my_parallel_data, my_task_data, return_address
); }
;
308 }
309
310 // Set flag for next iteration
311 b->flags[my_next_iter][tid].stillNeed = 1;
312 // Each thread uses a different cache line; resets stillNeed to 0 to
313 // indicate it has reached the barrier
314 b->flags[my_current_iter][tid].stillNeed = 0;
315
316 do { // wait for all group leaders
317 threads_pending = 0;
318 for (size_t thr = 0; thr < nproc; thr += b->threads_per_group) {
319 threads_pending += b->flags[my_current_iter][thr].stillNeed;
320 }
321 // Execute tasks here
322 if (__kmp_tasking_mode != tskm_immediate_exec) {
323 kmp_task_team_t *task_team = this_thr->th.th_task_team;
324 if (task_team != NULL__null) {
325 if (TCR_SYNC_4(task_team->tt.tt_active)(task_team->tt.tt_active)) {
326 if (KMP_TASKING_ENABLED(task_team)((!0) == ((task_team)->tt.tt_found_tasks))) {
327 int tasks_completed = FALSE0;
328 __kmp_atomic_execute_tasks_64(
329 this_thr, gtid, (kmp_atomic_flag_64<> *)NULL__null, FALSE0,
330 &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj, 0);
331 } else
332 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP1;
333 }
334 } else {
335 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP1;
336 } // if
337 }
338 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) {
339 if (__kmp_global.g.g_abort)
340 __kmp_abort_thread();
341 break;
342 } else if (__kmp_tasking_mode != tskm_immediate_exec &&
343 this_thr->th.th_reap_state == KMP_SAFE_TO_REAP1) {
344 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP0;
345 }
346 } while (threads_pending > 0);
347
348 if (reduce) { // Perform reduction if needed
349 if (KMP_MASTER_TID(tid)(0 == (tid))) { // Master reduces over group leaders
350 OMPT_REDUCTION_DECL(this_thr, gtid)ompt_data_t *my_task_data = (&(this_thr->th.th_current_task
->ompt_task_info.task_data)); ompt_data_t *my_parallel_data
= (&(this_thr->th.th_team->t.ompt_team_info.parallel_data
)); void *return_address = __ompt_load_return_address(gtid);
;
351 OMPT_REDUCTION_BEGINif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_begin, my_parallel_data, my_task_data, return_address
); }
;
352 for (size_t thr = b->threads_per_group; thr < nproc;
353 thr += b->threads_per_group) {
354 (*reduce)(this_thr->th.th_local.reduce_data,
355 other_threads[thr]->th.th_local.reduce_data);
356 }
357 OMPT_REDUCTION_ENDif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_end, my_parallel_data, my_task_data, return_address
); }
;
358 }
359 }
360 } else {
361 // Set flag for next iteration
362 b->flags[my_next_iter][tid].stillNeed = 1;
363 // Each thread uses a different cache line; resets stillNeed to 0 to
364 // indicate it has reached the barrier
365 b->flags[my_current_iter][tid].stillNeed = 0;
366 }
367
368 KMP_MFENCE()if (__builtin_expect(!!(!__kmp_cpuinfo.initialized), 0)) { __kmp_query_cpuid
(&__kmp_cpuinfo); } if (__kmp_cpuinfo.flags.sse2) { __sync_synchronize
(); }
;
369
370 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
371 ("__kmp_dist_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
372 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
373}
374
375static void __kmp_dist_barrier_release(
376 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
377 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
378 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_dist_release)((void)0);
379 kmp_team_t *team;
380 distributedBarrier *b;
381 kmp_bstate_t *thr_bar;
382 kmp_uint64 my_current_iter, next_go;
383 size_t my_go_index;
384 bool group_leader;
385
386 KA_TRACE(20, ("__kmp_dist_barrier_release: T#%d(%d) enter; barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_release: T#%d(%d) enter; barrier type %d\n"
, gtid, tid, bt); }
387 gtid, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_release: T#%d(%d) enter; barrier type %d\n"
, gtid, tid, bt); }
;
388
389 thr_bar = &this_thr->th.th_bar[bt].bb;
390
391 if (!KMP_MASTER_TID(tid)(0 == (tid))) {
392 // workers and non-master group leaders need to check their presence in team
393 do {
394 if (this_thr->th.th_used_in_team.load() != 1 &&
395 this_thr->th.th_used_in_team.load() != 3) {
396 // Thread is not in use in a team. Wait on location in tid's thread
397 // struct. The 0 value tells anyone looking that this thread is spinning
398 // or sleeping until this location becomes 3 again; 3 is the transition
399 // state to get to 1 which is waiting on go and being in the team
400 kmp_flag_32<false, false> my_flag(&(this_thr->th.th_used_in_team), 3);
401 if (KMP_COMPARE_AND_STORE_ACQ32(&(this_thr->th.th_used_in_team), 2,__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(this_thr
->th.th_used_in_team)), (kmp_uint32)(2), (kmp_uint32)(0))
402 0)__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(this_thr
->th.th_used_in_team)), (kmp_uint32)(2), (kmp_uint32)(0))
||
403 this_thr->th.th_used_in_team.load() == 0) {
404 my_flag.wait(this_thr, true USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
405 }
406#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
407 if ((__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 && itt_sync_obj == NULL__null) || KMP_ITT_DEBUG0) {
408 // In fork barrier where we could not get the object reliably
409 itt_sync_obj =
410 __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
411 // Cancel wait on previous parallel region...
412 __kmp_itt_task_starting(itt_sync_obj);
413
414 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
415 return;
416
417 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
418 if (itt_sync_obj != NULL__null)
419 // Call prepare as early as possible for "new" barrier
420 __kmp_itt_task_finished(itt_sync_obj);
421 } else
422#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
423 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
424 return;
425 }
426 if (this_thr->th.th_used_in_team.load() != 1 &&
427 this_thr->th.th_used_in_team.load() != 3) // spurious wake-up?
428 continue;
429 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
430 return;
431
432 // At this point, the thread thinks it is in use in a team, or in
433 // transition to be used in a team, but it might have reached this barrier
434 // before it was marked unused by the team. Unused threads are awoken and
435 // shifted to wait on local thread struct elsewhere. It also might reach
436 // this point by being picked up for use by a different team. Either way,
437 // we need to update the tid.
438 tid = __kmp_tid_from_gtid(gtid);
439 team = this_thr->th.th_team;
440 KMP_DEBUG_ASSERT(tid >= 0)if (!(tid >= 0)) { __kmp_debug_assert("tid >= 0", "openmp/runtime/src/kmp_barrier.cpp"
, 440); }
;
441 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_barrier.cpp"
, 441); }
;
442 b = team->t.b;
443 my_current_iter = b->iter[tid].iter;
444 next_go = my_current_iter + distributedBarrier::MAX_ITERS;
445 my_go_index = tid / b->threads_per_go;
446 if (this_thr->th.th_used_in_team.load() == 3) {
447 KMP_COMPARE_AND_STORE_ACQ32(&(this_thr->th.th_used_in_team), 3, 1)__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(this_thr
->th.th_used_in_team)), (kmp_uint32)(3), (kmp_uint32)(1))
;
448 }
449 // Check if go flag is set
450 if (b->go[my_go_index].go.load() != next_go) {
451 // Wait on go flag on team
452 kmp_atomic_flag_64<false, true> my_flag(
453 &(b->go[my_go_index].go), next_go, &(b->sleep[tid].sleep));
454 my_flag.wait(this_thr, true USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
455 KMP_DEBUG_ASSERT(my_current_iter == b->iter[tid].iter ||if (!(my_current_iter == b->iter[tid].iter || b->iter[tid
].iter == 0)) { __kmp_debug_assert("my_current_iter == b->iter[tid].iter || b->iter[tid].iter == 0"
, "openmp/runtime/src/kmp_barrier.cpp", 456); }
456 b->iter[tid].iter == 0)if (!(my_current_iter == b->iter[tid].iter || b->iter[tid
].iter == 0)) { __kmp_debug_assert("my_current_iter == b->iter[tid].iter || b->iter[tid].iter == 0"
, "openmp/runtime/src/kmp_barrier.cpp", 456); }
;
457 KMP_DEBUG_ASSERT(b->sleep[tid].sleep == false)if (!(b->sleep[tid].sleep == false)) { __kmp_debug_assert(
"b->sleep[tid].sleep == false", "openmp/runtime/src/kmp_barrier.cpp"
, 457); }
;
458 }
459
460 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
461 return;
462 // At this point, the thread's go location was set. This means the primary
463 // thread is safely in the barrier, and so this thread's data is
464 // up-to-date, but we should check again that this thread is really in
465 // use in the team, as it could have been woken up for the purpose of
466 // changing team size, or reaping threads at shutdown.
467 if (this_thr->th.th_used_in_team.load() == 1)
468 break;
469 } while (1);
470
471 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
472 return;
473
474 group_leader = ((tid % b->threads_per_group) == 0);
475 if (group_leader) {
476 // Tell all the threads in my group they can go!
477 for (size_t go_idx = my_go_index + 1;
478 go_idx < my_go_index + b->gos_per_group; go_idx++) {
479 b->go[go_idx].go.store(next_go);
480 }
481 // Fence added so that workers can see changes to go. sfence inadequate.
482 KMP_MFENCE()if (__builtin_expect(!!(!__kmp_cpuinfo.initialized), 0)) { __kmp_query_cpuid
(&__kmp_cpuinfo); } if (__kmp_cpuinfo.flags.sse2) { __sync_synchronize
(); }
;
483 }
484
485#if KMP_BARRIER_ICV_PUSH1
486 if (propagate_icvs) { // copy ICVs to final dest
487 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team,
488 tid, FALSE0);
489 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
490 (kmp_internal_control_t *)team->t.b->team_icvs);
491 copy_icvs(&thr_bar->th_fixed_icvs,
492 &team->t.t_implicit_task_taskdata[tid].td_icvs);
493 }
494#endif
495 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647) && group_leader) {
496 // This thread is now awake and participating in the barrier;
497 // wake up the other threads in the group
498 size_t nproc = this_thr->th.th_team_nproc;
499 size_t group_end = tid + b->threads_per_group;
500 if (nproc < group_end)
501 group_end = nproc;
502 __kmp_dist_barrier_wakeup(bt, team, tid + 1, group_end, 1, tid);
503 }
504 } else { // Primary thread
505 team = this_thr->th.th_team;
506 b = team->t.b;
507 my_current_iter = b->iter[tid].iter;
508 next_go = my_current_iter + distributedBarrier::MAX_ITERS;
509#if KMP_BARRIER_ICV_PUSH1
510 if (propagate_icvs) {
511 // primary thread has ICVs in final destination; copy
512 copy_icvs(&thr_bar->th_fixed_icvs,
513 &team->t.t_implicit_task_taskdata[tid].td_icvs);
514 }
515#endif
516 // Tell all the group leaders they can go!
517 for (size_t go_idx = 0; go_idx < b->num_gos; go_idx += b->gos_per_group) {
518 b->go[go_idx].go.store(next_go);
519 }
520
521 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) {
522 // Wake-up the group leaders
523 size_t nproc = this_thr->th.th_team_nproc;
524 __kmp_dist_barrier_wakeup(bt, team, tid + b->threads_per_group, nproc,
525 b->threads_per_group, tid);
526 }
527
528 // Tell all the threads in my group they can go!
529 for (size_t go_idx = 1; go_idx < b->gos_per_group; go_idx++) {
530 b->go[go_idx].go.store(next_go);
531 }
532
533 // Fence added so that workers can see changes to go. sfence inadequate.
534 KMP_MFENCE()if (__builtin_expect(!!(!__kmp_cpuinfo.initialized), 0)) { __kmp_query_cpuid
(&__kmp_cpuinfo); } if (__kmp_cpuinfo.flags.sse2) { __sync_synchronize
(); }
;
535
536 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) {
537 // Wake-up the other threads in my group
538 size_t nproc = this_thr->th.th_team_nproc;
539 size_t group_end = tid + b->threads_per_group;
540 if (nproc < group_end)
541 group_end = nproc;
542 __kmp_dist_barrier_wakeup(bt, team, tid + 1, group_end, 1, tid);
543 }
544 }
545 // Update to next iteration
546 KMP_ASSERT(my_current_iter == b->iter[tid].iter)if (!(my_current_iter == b->iter[tid].iter)) { __kmp_debug_assert
("my_current_iter == b->iter[tid].iter", "openmp/runtime/src/kmp_barrier.cpp"
, 546); }
;
547 b->iter[tid].iter = (b->iter[tid].iter + 1) % distributedBarrier::MAX_ITERS;
548
549 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
550 20, ("__kmp_dist_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
551 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
552}
553
554// Linear Barrier
555template <bool cancellable = false>
556static bool __kmp_linear_barrier_gather_template(
557 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
558 void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
559 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_gather)((void)0);
560 kmp_team_t *team = this_thr->th.th_team;
561 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
562 kmp_info_t **other_threads = team->t.t_threads;
563
564 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
565 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
566 ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
567 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
568 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid])if (!(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid
])) { __kmp_debug_assert("this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]"
, "openmp/runtime/src/kmp_barrier.cpp", 568); }
;
569
570#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
571 // Barrier imbalance - save arrive time to the thread
572 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
573 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
574 __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0();
575 }
576#endif
577 // We now perform a linear reduction to signal that all of the threads have
578 // arrived.
579 if (!KMP_MASTER_TID(tid)(0 == (tid))) {
580 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar
->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived +
(1 << 2)); }
581 ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar
->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived +
(1 << 2)); }
582 "arrived(%p): %llu => %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar
->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived +
(1 << 2)); }
583 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(0, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar
->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived +
(1 << 2)); }
584 team->t.t_id, 0, &thr_bar->b_arrived, thr_bar->b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar
->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived +
(1 << 2)); }
585 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar
->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived +
(1 << 2)); }
;
586 // Mark arrival to primary thread
587 /* After performing this write, a worker thread may not assume that the team
588 is valid any more - it could be deallocated by the primary thread at any
589 time. */
590 kmp_flag_64<> flag(&thr_bar->b_arrived, other_threads[0]);
591 flag.release();
592 } else {
593 kmp_balign_team_t *team_bar = &team->t.t_bar[bt];
594 int nproc = this_thr->th.th_team_nproc;
595 int i;
596 // Don't have to worry about sleep bit here or atomic since team setting
597 kmp_uint64 new_state = team_bar->b_arrived + KMP_BARRIER_STATE_BUMP(1 << 2);
598
599 // Collect all the worker team member threads.
600 for (i = 1; i < nproc; ++i) {
601#if KMP_CACHE_MANAGE
602 // Prefetch next thread's arrived count
603 if (i + 1 < nproc)
604 KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_arrived);
605#endif /* KMP_CACHE_MANAGE */
606 KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(i, team), team->t.t_id, i, &other_threads[i]->th.th_bar
[bt].bb.b_arrived, new_state); }
607 "arrived(%p) == %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(i, team), team->t.t_id, i, &other_threads[i]->th.th_bar
[bt].bb.b_arrived, new_state); }
608 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(i, team), team->t.t_id, i, &other_threads[i]->th.th_bar
[bt].bb.b_arrived, new_state); }
609 team->t.t_id, i,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(i, team), team->t.t_id, i, &other_threads[i]->th.th_bar
[bt].bb.b_arrived, new_state); }
610 &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(i, team), team->t.t_id, i, &other_threads[i]->th.th_bar
[bt].bb.b_arrived, new_state); }
;
611
612 // Wait for worker thread to arrive
613 if (cancellable) {
614 kmp_flag_64<true, false> flag(
615 &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state);
616 if (flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj))
617 return true;
618 } else {
619 kmp_flag_64<> flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived,
620 new_state);
621 flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
622 }
623#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
624 // Barrier imbalance - write min of the thread time and the other thread
625 // time to the thread.
626 if (__kmp_forkjoin_frames_mode == 2) {
627 this_thr->th.th_bar_min_time = KMP_MIN(((this_thr->th.th_bar_min_time) < (other_threads[i]->
th.th_bar_min_time) ? (this_thr->th.th_bar_min_time) : (other_threads
[i]->th.th_bar_min_time))
628 this_thr->th.th_bar_min_time, other_threads[i]->th.th_bar_min_time)((this_thr->th.th_bar_min_time) < (other_threads[i]->
th.th_bar_min_time) ? (this_thr->th.th_bar_min_time) : (other_threads
[i]->th.th_bar_min_time))
;
629 }
630#endif
631 if (reduce) {
632 KA_TRACE(100,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team
->t.t_id, i); }
633 ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team
->t.t_id, i); }
634 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team
->t.t_id, i); }
635 team->t.t_id, i))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team
->t.t_id, i); }
;
636 OMPT_REDUCTION_DECL(this_thr, gtid)ompt_data_t *my_task_data = (&(this_thr->th.th_current_task
->ompt_task_info.task_data)); ompt_data_t *my_parallel_data
= (&(this_thr->th.th_team->t.ompt_team_info.parallel_data
)); void *return_address = __ompt_load_return_address(gtid);
;
637 OMPT_REDUCTION_BEGINif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_begin, my_parallel_data, my_task_data, return_address
); }
;
638 (*reduce)(this_thr->th.th_local.reduce_data,
639 other_threads[i]->th.th_local.reduce_data);
640 OMPT_REDUCTION_ENDif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_end, my_parallel_data, my_task_data, return_address
); }
;
641 }
642 }
643 // Don't have to worry about sleep bit here or atomic since team setting
644 team_bar->b_arrived = new_state;
645 KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team_bar->b_arrived, new_state); }
646 "arrived(%p) = %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team_bar->b_arrived, new_state); }
647 gtid, team->t.t_id, tid, team->t.t_id, &team_bar->b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team_bar->b_arrived, new_state); }
648 new_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team_bar->b_arrived, new_state); }
;
649 }
650 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
651 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
652 ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
653 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
654 return false;
655}
656
657template <bool cancellable = false>
658static bool __kmp_linear_barrier_release_template(
659 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
660 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
661 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_release)((void)0);
662 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
663 kmp_team_t *team;
664
665 if (KMP_MASTER_TID(tid)(0 == (tid))) {
666 unsigned int i;
667 kmp_uint32 nproc = this_thr->th.th_team_nproc;
668 kmp_info_t **other_threads;
669
670 team = __kmp_threads[gtid]->th.th_team;
671 KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null",
"openmp/runtime/src/kmp_barrier.cpp", 671); }
;
672 other_threads = team->t.t_threads;
673
674 KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) primary enter for "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) primary enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
675 "barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) primary enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
676 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) primary enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
;
677
678 if (nproc > 1) {
679#if KMP_BARRIER_ICV_PUSH1
680 {
681 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy)((void)0);
682 if (propagate_icvs) {
683 ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs)((void)0);
684 for (i = 1; i < nproc; ++i) {
685 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i],
686 team, i, FALSE0);
687 ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs,copy_icvs((&team->t.t_implicit_task_taskdata[i].td_icvs
), (&team->t.t_implicit_task_taskdata[0].td_icvs))
688 &team->t.t_implicit_task_taskdata[0].td_icvs)copy_icvs((&team->t.t_implicit_task_taskdata[i].td_icvs
), (&team->t.t_implicit_task_taskdata[0].td_icvs))
;
689 }
690 ngo_sync()((void)0);
691 }
692 }
693#endif // KMP_BARRIER_ICV_PUSH
694
695 // Now, release all of the worker threads
696 for (i = 1; i < nproc; ++i) {
697#if KMP_CACHE_MANAGE
698 // Prefetch next thread's go flag
699 if (i + 1 < nproc)
700 KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_go);
701#endif /* KMP_CACHE_MANAGE */
702 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads
[i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads
[i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar
[bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1
<< 2)); }
703 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads
[i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads
[i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar
[bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1
<< 2)); }
704 ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads
[i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads
[i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar
[bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1
<< 2)); }
705 "go(%p): %u => %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads
[i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads
[i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar
[bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1
<< 2)); }
706 gtid, team->t.t_id, tid, other_threads[i]->th.th_info.ds.ds_gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads
[i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads
[i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar
[bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1
<< 2)); }
707 team->t.t_id, i, &other_threads[i]->th.th_bar[bt].bb.b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads
[i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads
[i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar
[bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1
<< 2)); }
708 other_threads[i]->th.th_bar[bt].bb.b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads
[i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads
[i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar
[bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1
<< 2)); }
709 other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads
[i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads
[i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar
[bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1
<< 2)); }
;
710 kmp_flag_64<> flag(&other_threads[i]->th.th_bar[bt].bb.b_go,
711 other_threads[i]);
712 flag.release();
713 }
714 }
715 } else { // Wait for the PRIMARY thread to release us
716 KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n"
, gtid, &thr_bar->b_go, (1 << 2)); }
717 gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n"
, gtid, &thr_bar->b_go, (1 << 2)); }
;
718 if (cancellable) {
719 kmp_flag_64<true, false> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP(1 << 2));
720 if (flag.wait(this_thr, TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj))
721 return true;
722 } else {
723 kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP(1 << 2));
724 flag.wait(this_thr, TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
725 }
726#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
727 if ((__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 && itt_sync_obj == NULL__null) || KMP_ITT_DEBUG0) {
728 // In a fork barrier; cannot get the object reliably (or ITTNOTIFY is
729 // disabled)
730 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
731 // Cancel wait on previous parallel region...
732 __kmp_itt_task_starting(itt_sync_obj);
733
734 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
735 return false;
736
737 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
738 if (itt_sync_obj != NULL__null)
739 // Call prepare as early as possible for "new" barrier
740 __kmp_itt_task_finished(itt_sync_obj);
741 } else
742#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
743 // Early exit for reaping threads releasing forkjoin barrier
744 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
745 return false;
746// The worker thread may now assume that the team is valid.
747#ifdef KMP_DEBUG1
748 tid = __kmp_tid_from_gtid(gtid);
749 team = __kmp_threads[gtid]->th.th_team;
750#endif
751 KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null",
"openmp/runtime/src/kmp_barrier.cpp", 751); }
;
752 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE)(thr_bar->b_go) = (0);
753 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
754 ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
755 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
;
756 KMP_MB(); // Flush all pending memory write invalidates.
757 }
758 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
759 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
760 ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
761 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
762 return false;
763}
764
765static void __kmp_linear_barrier_gather(
766 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
767 void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
768 __kmp_linear_barrier_gather_template<false>(
769 bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
770}
771
772static bool __kmp_linear_barrier_gather_cancellable(
773 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
774 void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
775 return __kmp_linear_barrier_gather_template<true>(
776 bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
777}
778
779static void __kmp_linear_barrier_release(
780 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
781 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
782 __kmp_linear_barrier_release_template<false>(
783 bt, this_thr, gtid, tid, propagate_icvs USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
784}
785
786static bool __kmp_linear_barrier_release_cancellable(
787 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
788 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
789 return __kmp_linear_barrier_release_template<true>(
790 bt, this_thr, gtid, tid, propagate_icvs USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
791}
792
793// Tree barrier
794static void __kmp_tree_barrier_gather(
795 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
796 void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
797 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_gather)((void)0);
798 kmp_team_t *team = this_thr->th.th_team;
799 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
800 kmp_info_t **other_threads = team->t.t_threads;
801 kmp_uint32 nproc = this_thr->th.th_team_nproc;
802 kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
803 kmp_uint32 branch_factor = 1 << branch_bits;
804 kmp_uint32 child;
805 kmp_uint32 child_tid;
806 kmp_uint64 new_state = 0;
807
808 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
809 20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
810 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
811 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid])if (!(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid
])) { __kmp_debug_assert("this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]"
, "openmp/runtime/src/kmp_barrier.cpp", 811); }
;
812
813#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
814 // Barrier imbalance - save arrive time to the thread
815 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
816 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
817 __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0();
818 }
819#endif
820 // Perform tree gather to wait until all threads have arrived; reduce any
821 // required data as we go
822 child_tid = (tid << branch_bits) + 1;
823 if (child_tid < nproc) {
824 // Parent threads wait for all their children to arrive
825 new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP(1 << 2);
826 child = 1;
827 do {
828 kmp_info_t *child_thr = other_threads[child_tid];
829 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
830#if KMP_CACHE_MANAGE
831 // Prefetch next thread's arrived count
832 if (child + 1 <= branch_factor && child_tid + 1 < nproc)
833 KMP_CACHE_PREFETCH(
834 &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_arrived);
835#endif /* KMP_CACHE_MANAGE */
836 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_arrived, new_state); }
837 ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_arrived, new_state); }
838 "arrived(%p) == %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_arrived, new_state); }
839 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_arrived, new_state); }
840 team->t.t_id, child_tid, &child_bar->b_arrived, new_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_arrived, new_state); }
;
841 // Wait for child to arrive
842 kmp_flag_64<> flag(&child_bar->b_arrived, new_state);
843 flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
844#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
845 // Barrier imbalance - write min of the thread time and a child time to
846 // the thread.
847 if (__kmp_forkjoin_frames_mode == 2) {
848 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,((this_thr->th.th_bar_min_time) < (child_thr->th.th_bar_min_time
) ? (this_thr->th.th_bar_min_time) : (child_thr->th.th_bar_min_time
))
849 child_thr->th.th_bar_min_time)((this_thr->th.th_bar_min_time) < (child_thr->th.th_bar_min_time
) ? (this_thr->th.th_bar_min_time) : (child_thr->th.th_bar_min_time
))
;
850 }
851#endif
852 if (reduce) {
853 KA_TRACE(100,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team
), team->t.t_id, child_tid); }
854 ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team
), team->t.t_id, child_tid); }
855 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team
), team->t.t_id, child_tid); }
856 team->t.t_id, child_tid))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team
), team->t.t_id, child_tid); }
;
857 OMPT_REDUCTION_DECL(this_thr, gtid)ompt_data_t *my_task_data = (&(this_thr->th.th_current_task
->ompt_task_info.task_data)); ompt_data_t *my_parallel_data
= (&(this_thr->th.th_team->t.ompt_team_info.parallel_data
)); void *return_address = __ompt_load_return_address(gtid);
;
858 OMPT_REDUCTION_BEGINif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_begin, my_parallel_data, my_task_data, return_address
); }
;
859 (*reduce)(this_thr->th.th_local.reduce_data,
860 child_thr->th.th_local.reduce_data);
861 OMPT_REDUCTION_ENDif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_end, my_parallel_data, my_task_data, return_address
); }
;
862 }
863 child++;
864 child_tid++;
865 } while (child <= branch_factor && child_tid < nproc);
866 }
867
868 if (!KMP_MASTER_TID(tid)(0 == (tid))) { // Worker threads
869 kmp_int32 parent_tid = (tid - 1) >> branch_bits;
870
871 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
872 ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
873 "arrived(%p): %llu => %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
874 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
875 team->t.t_id, parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
876 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
;
877
878 // Mark arrival to parent thread
879 /* After performing this write, a worker thread may not assume that the team
880 is valid any more - it could be deallocated by the primary thread at any
881 time. */
882 kmp_flag_64<> flag(&thr_bar->b_arrived, other_threads[parent_tid]);
883 flag.release();
884 } else {
885 // Need to update the team arrived pointer if we are the primary thread
886 if (nproc > 1) // New value was already computed above
887 team->t.t_bar[bt].b_arrived = new_state;
888 else
889 team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP(1 << 2);
890 KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
891 "arrived(%p) = %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
892 gtid, team->t.t_id, tid, team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
893 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
;
894 }
895 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
896 ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
897 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
898}
899
900static void __kmp_tree_barrier_release(
901 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
902 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
903 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_release)((void)0);
904 kmp_team_t *team;
905 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
906 kmp_uint32 nproc;
907 kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt];
908 kmp_uint32 branch_factor = 1 << branch_bits;
909 kmp_uint32 child;
910 kmp_uint32 child_tid;
911
912 // Perform a tree release for all of the threads that have been gathered
913 if (!KMP_MASTER_TID((0 == (tid))
9
Taking true branch
914 tid)(0 == (tid))) { // Handle fork barrier workers who aren't part of a team yet
915 KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n"
, gtid, &thr_bar->b_go, (1 << 2)); }
10
Assuming 'kmp_a_debug' is < 20
11
Taking false branch
916 &thr_bar->b_go, KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n"
, gtid, &thr_bar->b_go, (1 << 2)); }
;
917 // Wait for parent thread to release us
918 kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP(1 << 2));
919 flag.wait(this_thr, TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
12
Calling 'kmp_flag_64::wait'
17
Returning from 'kmp_flag_64::wait'
920#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
921 if ((__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 && itt_sync_obj == NULL__null) || KMP_ITT_DEBUG0) {
18
Assuming '__kmp_itt_sync_create_ptr__3_0' is null
922 // In fork barrier where we could not get the object reliably (or
923 // ITTNOTIFY is disabled)
924 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
925 // Cancel wait on previous parallel region...
926 __kmp_itt_task_starting(itt_sync_obj);
927
928 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
929 return;
930
931 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
932 if (itt_sync_obj != NULL__null)
933 // Call prepare as early as possible for "new" barrier
934 __kmp_itt_task_finished(itt_sync_obj);
935 } else
936#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
937 // Early exit for reaping threads releasing forkjoin barrier
938 if (bt
18.1
'bt' is equal to bs_forkjoin_barrier
18.1
'bt' is equal to bs_forkjoin_barrier
== bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
19
Assuming field 'g_done' is not equal to 0
20
Taking true branch
939 return;
940
941 // The worker thread may now assume that the team is valid.
942 team = __kmp_threads[gtid]->th.th_team;
943 KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null",
"openmp/runtime/src/kmp_barrier.cpp", 943); }
;
944 tid = __kmp_tid_from_gtid(gtid);
945
946 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE)(thr_bar->b_go) = (0);
947 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
948 ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
949 team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
;
950 KMP_MB(); // Flush all pending memory write invalidates.
951 } else {
952 team = __kmp_threads[gtid]->th.th_team;
953 KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null",
"openmp/runtime/src/kmp_barrier.cpp", 953); }
;
954 KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) primary enter for "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) primary enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
955 "barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) primary enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
956 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) primary enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
;
957 }
958 nproc = this_thr->th.th_team_nproc;
959 child_tid = (tid << branch_bits) + 1;
960
961 if (child_tid < nproc) {
962 kmp_info_t **other_threads = team->t.t_threads;
963 child = 1;
964 // Parent threads release all their children
965 do {
966 kmp_info_t *child_thr = other_threads[child_tid];
967 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
968#if KMP_CACHE_MANAGE
969 // Prefetch next thread's go count
970 if (child + 1 <= branch_factor && child_tid + 1 < nproc)
971 KMP_CACHE_PREFETCH(
972 &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_go);
973#endif /* KMP_CACHE_MANAGE */
974
975#if KMP_BARRIER_ICV_PUSH1
976 {
977 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy)((void)0);
978 if (propagate_icvs) {
979 __kmp_init_implicit_task(team->t.t_ident,
980 team->t.t_threads[child_tid], team,
981 child_tid, FALSE0);
982 copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs,
983 &team->t.t_implicit_task_taskdata[0].td_icvs);
984 }
985 }
986#endif // KMP_BARRIER_ICV_PUSH
987 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
988 ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
989 "go(%p): %u => %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
990 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
991 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
992 child_bar->b_go + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
;
993 // Release child from barrier
994 kmp_flag_64<> flag(&child_bar->b_go, child_thr);
995 flag.release();
996 child++;
997 child_tid++;
998 } while (child <= branch_factor && child_tid < nproc);
999 }
1000 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
1001 20, ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
1002 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
1003}
1004
1005// Hyper Barrier
1006static void __kmp_hyper_barrier_gather(
1007 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
1008 void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
1009 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_gather)((void)0);
1010 kmp_team_t *team = this_thr->th.th_team;
1011 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
1012 kmp_info_t **other_threads = team->t.t_threads;
1013 kmp_uint64 new_state = KMP_BARRIER_UNUSED_STATE(1 << 1);
1014 kmp_uint32 num_threads = this_thr->th.th_team_nproc;
1015 kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
1016 kmp_uint32 branch_factor = 1 << branch_bits;
1017 kmp_uint32 offset;
1018 kmp_uint32 level;
1019
1020 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
1021 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
1022 ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
1023 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
1024 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid])if (!(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid
])) { __kmp_debug_assert("this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]"
, "openmp/runtime/src/kmp_barrier.cpp", 1024); }
;
1025
1026#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
1027 // Barrier imbalance - save arrive time to the thread
1028 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
1029 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
1030 __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0();
1031 }
1032#endif
1033 /* Perform a hypercube-embedded tree gather to wait until all of the threads
1034 have arrived, and reduce any required data as we go. */
1035 kmp_flag_64<> p_flag(&thr_bar->b_arrived);
1036 for (level = 0, offset = 1; offset < num_threads;
1037 level += branch_bits, offset <<= branch_bits) {
1038 kmp_uint32 child;
1039 kmp_uint32 child_tid;
1040
1041 if (((tid >> level) & (branch_factor - 1)) != 0) {
1042 kmp_int32 parent_tid = tid & ~((1 << (level + branch_bits)) - 1);
1043
1044 KMP_MB(); // Synchronize parent and child threads.
1045 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
1046 ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
1047 "arrived(%p): %llu => %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
1048 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
1049 team->t.t_id, parent_tid, &thr_bar->b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
1050 thr_bar->b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
1051 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
;
1052 // Mark arrival to parent thread
1053 /* After performing this write (in the last iteration of the enclosing for
1054 loop), a worker thread may not assume that the team is valid any more
1055 - it could be deallocated by the primary thread at any time. */
1056 p_flag.set_waiter(other_threads[parent_tid]);
1057 p_flag.release();
1058 break;
1059 }
1060
1061 // Parent threads wait for children to arrive
1062 if (new_state == KMP_BARRIER_UNUSED_STATE(1 << 1))
1063 new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP(1 << 2);
1064 for (child = 1, child_tid = tid + (1 << level);
1065 child < branch_factor && child_tid < num_threads;
1066 child++, child_tid += (1 << level)) {
1067 kmp_info_t *child_thr = other_threads[child_tid];
1068 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1069#if KMP_CACHE_MANAGE
1070 kmp_uint32 next_child_tid = child_tid + (1 << level);
1071 // Prefetch next thread's arrived count
1072 if (child + 1 < branch_factor && next_child_tid < num_threads)
1073 KMP_CACHE_PREFETCH(
1074 &other_threads[next_child_tid]->th.th_bar[bt].bb.b_arrived);
1075#endif /* KMP_CACHE_MANAGE */
1076 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_arrived, new_state); }
1077 ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_arrived, new_state); }
1078 "arrived(%p) == %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_arrived, new_state); }
1079 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_arrived, new_state); }
1080 team->t.t_id, child_tid, &child_bar->b_arrived, new_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_arrived, new_state); }
;
1081 // Wait for child to arrive
1082 kmp_flag_64<> c_flag(&child_bar->b_arrived, new_state);
1083 c_flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1084 KMP_MB(); // Synchronize parent and child threads.
1085#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
1086 // Barrier imbalance - write min of the thread time and a child time to
1087 // the thread.
1088 if (__kmp_forkjoin_frames_mode == 2) {
1089 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,((this_thr->th.th_bar_min_time) < (child_thr->th.th_bar_min_time
) ? (this_thr->th.th_bar_min_time) : (child_thr->th.th_bar_min_time
))
1090 child_thr->th.th_bar_min_time)((this_thr->th.th_bar_min_time) < (child_thr->th.th_bar_min_time
) ? (this_thr->th.th_bar_min_time) : (child_thr->th.th_bar_min_time
))
;
1091 }
1092#endif
1093 if (reduce) {
1094 KA_TRACE(100,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team
), team->t.t_id, child_tid); }
1095 ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team
), team->t.t_id, child_tid); }
1096 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team
), team->t.t_id, child_tid); }
1097 team->t.t_id, child_tid))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team
), team->t.t_id, child_tid); }
;
1098 OMPT_REDUCTION_DECL(this_thr, gtid)ompt_data_t *my_task_data = (&(this_thr->th.th_current_task
->ompt_task_info.task_data)); ompt_data_t *my_parallel_data
= (&(this_thr->th.th_team->t.ompt_team_info.parallel_data
)); void *return_address = __ompt_load_return_address(gtid);
;
1099 OMPT_REDUCTION_BEGINif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_begin, my_parallel_data, my_task_data, return_address
); }
;
1100 (*reduce)(this_thr->th.th_local.reduce_data,
1101 child_thr->th.th_local.reduce_data);
1102 OMPT_REDUCTION_ENDif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_end, my_parallel_data, my_task_data, return_address
); }
;
1103 }
1104 }
1105 }
1106
1107 if (KMP_MASTER_TID(tid)(0 == (tid))) {
1108 // Need to update the team arrived pointer if we are the primary thread
1109 if (new_state == KMP_BARRIER_UNUSED_STATE(1 << 1))
1110 team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP(1 << 2);
1111 else
1112 team->t.t_bar[bt].b_arrived = new_state;
1113 KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
1114 "arrived(%p) = %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
1115 gtid, team->t.t_id, tid, team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
1116 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
;
1117 }
1118 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
1119 20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
1120 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
1121}
1122
1123// The reverse versions seem to beat the forward versions overall
1124#define KMP_REVERSE_HYPER_BAR
1125static void __kmp_hyper_barrier_release(
1126 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
1127 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
1128 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_release)((void)0);
1129 kmp_team_t *team;
1130 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
1131 kmp_info_t **other_threads;
1132 kmp_uint32 num_threads;
1133 kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt];
1134 kmp_uint32 branch_factor = 1 << branch_bits;
1135 kmp_uint32 child;
1136 kmp_uint32 child_tid;
1137 kmp_uint32 offset;
1138 kmp_uint32 level;
1139
1140 /* Perform a hypercube-embedded tree release for all of the threads that have
1141 been gathered. If KMP_REVERSE_HYPER_BAR is defined (default) the threads
1142 are released in the reverse order of the corresponding gather, otherwise
1143 threads are released in the same order. */
1144 if (KMP_MASTER_TID(tid)(0 == (tid))) { // primary thread
1145 team = __kmp_threads[gtid]->th.th_team;
1146 KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null",
"openmp/runtime/src/kmp_barrier.cpp", 1146); }
;
1147 KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) primary enter for "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) primary enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
1148 "barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) primary enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
1149 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) primary enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
;
1150#if KMP_BARRIER_ICV_PUSH1
1151 if (propagate_icvs) { // primary already has ICVs in final destination; copy
1152 copy_icvs(&thr_bar->th_fixed_icvs,
1153 &team->t.t_implicit_task_taskdata[tid].td_icvs);
1154 }
1155#endif
1156 } else { // Handle fork barrier workers who aren't part of a team yet
1157 KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n"
, gtid, &thr_bar->b_go, (1 << 2)); }
1158 &thr_bar->b_go, KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n"
, gtid, &thr_bar->b_go, (1 << 2)); }
;
1159 // Wait for parent thread to release us
1160 kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP(1 << 2));
1161 flag.wait(this_thr, TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1162#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
1163 if ((__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 && itt_sync_obj == NULL__null) || KMP_ITT_DEBUG0) {
1164 // In fork barrier where we could not get the object reliably
1165 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
1166 // Cancel wait on previous parallel region...
1167 __kmp_itt_task_starting(itt_sync_obj);
1168
1169 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
1170 return;
1171
1172 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1173 if (itt_sync_obj != NULL__null)
1174 // Call prepare as early as possible for "new" barrier
1175 __kmp_itt_task_finished(itt_sync_obj);
1176 } else
1177#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1178 // Early exit for reaping threads releasing forkjoin barrier
1179 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
1180 return;
1181
1182 // The worker thread may now assume that the team is valid.
1183 team = __kmp_threads[gtid]->th.th_team;
1184 KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null",
"openmp/runtime/src/kmp_barrier.cpp", 1184); }
;
1185 tid = __kmp_tid_from_gtid(gtid);
1186
1187 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE)(thr_bar->b_go) = (0);
1188 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
1189 ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
1190 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
;
1191 KMP_MB(); // Flush all pending memory write invalidates.
1192 }
1193 num_threads = this_thr->th.th_team_nproc;
1194 other_threads = team->t.t_threads;
1195
1196#ifdef KMP_REVERSE_HYPER_BAR
1197 // Count up to correct level for parent
1198 for (level = 0, offset = 1;
1199 offset < num_threads && (((tid >> level) & (branch_factor - 1)) == 0);
1200 level += branch_bits, offset <<= branch_bits)
1201 ;
1202
1203 // Now go down from there
1204 for (level -= branch_bits, offset >>= branch_bits; offset != 0;
1205 level -= branch_bits, offset >>= branch_bits)
1206#else
1207 // Go down the tree, level by level
1208 for (level = 0, offset = 1; offset < num_threads;
1209 level += branch_bits, offset <<= branch_bits)
1210#endif // KMP_REVERSE_HYPER_BAR
1211 {
1212#ifdef KMP_REVERSE_HYPER_BAR
1213 /* Now go in reverse order through the children, highest to lowest.
1214 Initial setting of child is conservative here. */
1215 child = num_threads >> ((level == 0) ? level : level - 1);
1216 for (child = (child < branch_factor - 1) ? child : branch_factor - 1,
1217 child_tid = tid + (child << level);
1218 child >= 1; child--, child_tid -= (1 << level))
1219#else
1220 if (((tid >> level) & (branch_factor - 1)) != 0)
1221 // No need to go lower than this, since this is the level parent would be
1222 // notified
1223 break;
1224 // Iterate through children on this level of the tree
1225 for (child = 1, child_tid = tid + (1 << level);
1226 child < branch_factor && child_tid < num_threads;
1227 child++, child_tid += (1 << level))
1228#endif // KMP_REVERSE_HYPER_BAR
1229 {
1230 if (child_tid >= num_threads)
1231 continue; // Child doesn't exist so keep going
1232 else {
1233 kmp_info_t *child_thr = other_threads[child_tid];
1234 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1235#if KMP_CACHE_MANAGE
1236 kmp_uint32 next_child_tid = child_tid - (1 << level);
1237// Prefetch next thread's go count
1238#ifdef KMP_REVERSE_HYPER_BAR
1239 if (child - 1 >= 1 && next_child_tid < num_threads)
1240#else
1241 if (child + 1 < branch_factor && next_child_tid < num_threads)
1242#endif // KMP_REVERSE_HYPER_BAR
1243 KMP_CACHE_PREFETCH(
1244 &other_threads[next_child_tid]->th.th_bar[bt].bb.b_go);
1245#endif /* KMP_CACHE_MANAGE */
1246
1247#if KMP_BARRIER_ICV_PUSH1
1248 if (propagate_icvs) // push my fixed ICVs to my child
1249 copy_icvs(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
1250#endif // KMP_BARRIER_ICV_PUSH
1251
1252 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
1253 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
1254 ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
1255 "go(%p): %u => %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
1256 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
1257 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
1258 child_bar->b_go + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
;
1259 // Release child from barrier
1260 kmp_flag_64<> flag(&child_bar->b_go, child_thr);
1261 flag.release();
1262 }
1263 }
1264 }
1265#if KMP_BARRIER_ICV_PUSH1
1266 if (propagate_icvs &&
1267 !KMP_MASTER_TID(tid)(0 == (tid))) { // copy ICVs locally to final dest
1268 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid,
1269 FALSE0);
1270 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1271 &thr_bar->th_fixed_icvs);
1272 }
1273#endif
1274 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
1275 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
1276 ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
1277 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
1278}
1279
1280// Hierarchical Barrier
1281
1282// Initialize thread barrier data
1283/* Initializes/re-initializes the hierarchical barrier data stored on a thread.
1284 Performs the minimum amount of initialization required based on how the team
1285 has changed. Returns true if leaf children will require both on-core and
1286 traditional wake-up mechanisms. For example, if the team size increases,
1287 threads already in the team will respond to on-core wakeup on their parent
1288 thread, but threads newly added to the team will only be listening on the
1289 their local b_go. */
1290static bool __kmp_init_hierarchical_barrier_thread(enum barrier_type bt,
1291 kmp_bstate_t *thr_bar,
1292 kmp_uint32 nproc, int gtid,
1293 int tid, kmp_team_t *team) {
1294 // Checks to determine if (re-)initialization is needed
1295 bool uninitialized = thr_bar->team == NULL__null;
1296 bool team_changed = team != thr_bar->team;
1297 bool team_sz_changed = nproc != thr_bar->nproc;
1298 bool tid_changed = tid != thr_bar->old_tid;
1299 bool retval = false;
1300
1301 if (uninitialized || team_sz_changed) {
1302 __kmp_get_hierarchy(nproc, thr_bar);
1303 }
1304
1305 if (uninitialized || team_sz_changed || tid_changed) {
1306 thr_bar->my_level = thr_bar->depth - 1; // default for primary thread
1307 thr_bar->parent_tid = -1; // default for primary thread
1308 if (!KMP_MASTER_TID(tid)(0 == (tid))) {
1309 // if not primary thread, find parent thread in hierarchy
1310 kmp_uint32 d = 0;
1311 while (d < thr_bar->depth) { // find parent based on level of thread in
1312 // hierarchy, and note level
1313 kmp_uint32 rem;
1314 if (d == thr_bar->depth - 2) { // reached level right below the primary
1315 thr_bar->parent_tid = 0;
1316 thr_bar->my_level = d;
1317 break;
1318 } else if ((rem = tid % thr_bar->skip_per_level[d + 1]) != 0) {
1319 // TODO: can we make the above op faster?
1320 // thread is not a subtree root at next level, so this is max
1321 thr_bar->parent_tid = tid - rem;
1322 thr_bar->my_level = d;
1323 break;
1324 }
1325 ++d;
1326 }
1327 }
1328 __kmp_type_convert(7 - ((tid - thr_bar->parent_tid) /
1329 (thr_bar->skip_per_level[thr_bar->my_level])),
1330 &(thr_bar->offset));
1331 thr_bar->old_tid = tid;
1332 thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING0;
1333 thr_bar->team = team;
1334 thr_bar->parent_bar =
1335 &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
1336 }
1337 if (uninitialized || team_changed || tid_changed) {
1338 thr_bar->team = team;
1339 thr_bar->parent_bar =
1340 &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
1341 retval = true;
1342 }
1343 if (uninitialized || team_sz_changed || tid_changed) {
1344 thr_bar->nproc = nproc;
1345 thr_bar->leaf_kids = thr_bar->base_leaf_kids;
1346 if (thr_bar->my_level == 0)
1347 thr_bar->leaf_kids = 0;
1348 if (thr_bar->leaf_kids && (kmp_uint32)tid + thr_bar->leaf_kids + 1 > nproc)
1349 __kmp_type_convert(nproc - tid - 1, &(thr_bar->leaf_kids));
1350 thr_bar->leaf_state = 0;
1351 for (int i = 0; i < thr_bar->leaf_kids; ++i)
1352 ((char *)&(thr_bar->leaf_state))[7 - i] = 1;
1353 }
1354 return retval;
1355}
1356
1357static void __kmp_hierarchical_barrier_gather(
1358 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
1359 void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
1360 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_gather)((void)0);
1361 kmp_team_t *team = this_thr->th.th_team;
1362 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
1363 kmp_uint32 nproc = this_thr->th.th_team_nproc;
1364 kmp_info_t **other_threads = team->t.t_threads;
1365 kmp_uint64 new_state = 0;
1366
1367 int level = team->t.t_level;
1368 if (other_threads[0]
1369 ->th.th_teams_microtask) // are we inside the teams construct?
1370 if (this_thr->th.th_teams_size.nteams > 1)
1371 ++level; // level was not increased in teams construct for team_of_masters
1372 if (level == 1)
1373 thr_bar->use_oncore_barrier = 1;
1374 else
1375 thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested
1376
1377 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
1378 "barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
1379 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
;
1380 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid])if (!(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid
])) { __kmp_debug_assert("this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]"
, "openmp/runtime/src/kmp_barrier.cpp", 1380); }
;
1381
1382#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
1383 // Barrier imbalance - save arrive time to the thread
1384 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
1385 this_thr->th.th_bar_arrive_time = __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0();
1386 }
1387#endif
1388
1389 (void)__kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid,
1390 team);
1391
1392 if (thr_bar->my_level) { // not a leaf (my_level==0 means leaf)
1393 kmp_int32 child_tid;
1394 new_state =
1395 (kmp_uint64)team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP(1 << 2);
1396 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME(2147483647) &&
1397 thr_bar->use_oncore_barrier) {
1398 if (thr_bar->leaf_kids) {
1399 // First, wait for leaf children to check-in on my b_arrived flag
1400 kmp_uint64 leaf_state =
1401 KMP_MASTER_TID(tid)(0 == (tid))
1402 ? thr_bar->b_arrived | thr_bar->leaf_state
1403 : team->t.t_bar[bt].b_arrived | thr_bar->leaf_state;
1404 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting "
"for leaf kids\n", gtid, team->t.t_id, tid); }
1405 "for leaf kids\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting "
"for leaf kids\n", gtid, team->t.t_id, tid); }
1406 gtid, team->t.t_id, tid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting "
"for leaf kids\n", gtid, team->t.t_id, tid); }
;
1407 kmp_flag_64<> flag(&thr_bar->b_arrived, leaf_state);
1408 flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1409 if (reduce) {
1410 OMPT_REDUCTION_DECL(this_thr, gtid)ompt_data_t *my_task_data = (&(this_thr->th.th_current_task
->ompt_task_info.task_data)); ompt_data_t *my_parallel_data
= (&(this_thr->th.th_team->t.ompt_team_info.parallel_data
)); void *return_address = __ompt_load_return_address(gtid);
;
1411 OMPT_REDUCTION_BEGINif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_begin, my_parallel_data, my_task_data, return_address
); }
;
1412 for (child_tid = tid + 1; child_tid <= tid + thr_bar->leaf_kids;
1413 ++child_tid) {
1414 KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
1415 "T#%d(%d:%d)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
1416 gtid, team->t.t_id, tid,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
1417 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
1418 child_tid))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
;
1419 (*reduce)(this_thr->th.th_local.reduce_data,
1420 other_threads[child_tid]->th.th_local.reduce_data);
1421 }
1422 OMPT_REDUCTION_ENDif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_end, my_parallel_data, my_task_data, return_address
); }
;
1423 }
1424 // clear leaf_state bits
1425 KMP_TEST_THEN_AND64(&thr_bar->b_arrived, ~(thr_bar->leaf_state))__sync_fetch_and_and((volatile kmp_uint64 *)(&thr_bar->
b_arrived), (kmp_uint64)(~(thr_bar->leaf_state)))
;
1426 }
1427 // Next, wait for higher level children on each child's b_arrived flag
1428 for (kmp_uint32 d = 1; d < thr_bar->my_level;
1429 ++d) { // gather lowest level threads first, but skip 0
1430 kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1],
1431 skip = thr_bar->skip_per_level[d];
1432 if (last > nproc)
1433 last = nproc;
1434 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
1435 kmp_info_t *child_thr = other_threads[child_tid];
1436 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1437 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
1438 "T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
1439 "arrived(%p) == %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
1440 gtid, team->t.t_id, tid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
1441 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
1442 child_tid, &child_bar->b_arrived, new_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
;
1443 kmp_flag_64<> flag(&child_bar->b_arrived, new_state);
1444 flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1445 if (reduce) {
1446 KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
1447 "T#%d(%d:%d)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
1448 gtid, team->t.t_id, tid,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
1449 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
1450 child_tid))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
;
1451 (*reduce)(this_thr->th.th_local.reduce_data,
1452 child_thr->th.th_local.reduce_data);
1453 }
1454 }
1455 }
1456 } else { // Blocktime is not infinite
1457 for (kmp_uint32 d = 0; d < thr_bar->my_level;
1458 ++d) { // Gather lowest level threads first
1459 kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1],
1460 skip = thr_bar->skip_per_level[d];
1461 if (last > nproc)
1462 last = nproc;
1463 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
1464 kmp_info_t *child_thr = other_threads[child_tid];
1465 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1466 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
1467 "T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
1468 "arrived(%p) == %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
1469 gtid, team->t.t_id, tid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
1470 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
1471 child_tid, &child_bar->b_arrived, new_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
;
1472 kmp_flag_64<> flag(&child_bar->b_arrived, new_state);
1473 flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1474 if (reduce) {
1475 KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
1476 "T#%d(%d:%d)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
1477 gtid, team->t.t_id, tid,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
1478 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
1479 child_tid))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
;
1480 (*reduce)(this_thr->th.th_local.reduce_data,
1481 child_thr->th.th_local.reduce_data);
1482 }
1483 }
1484 }
1485 }
1486 }
1487 // All subordinates are gathered; now release parent if not primary thread
1488
1489 if (!KMP_MASTER_TID(tid)(0 == (tid))) { // worker threads release parent in hierarchy
1490 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing"
" T#%d(%d:%d) arrived(%p): %llu => %llu\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(thr_bar->parent_tid, team
), team->t.t_id, thr_bar->parent_tid, &thr_bar->
b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 <<
2)); }
1491 " T#%d(%d:%d) arrived(%p): %llu => %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing"
" T#%d(%d:%d) arrived(%p): %llu => %llu\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(thr_bar->parent_tid, team
), team->t.t_id, thr_bar->parent_tid, &thr_bar->
b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 <<
2)); }
1492 gtid, team->t.t_id, tid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing"
" T#%d(%d:%d) arrived(%p): %llu => %llu\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(thr_bar->parent_tid, team
), team->t.t_id, thr_bar->parent_tid, &thr_bar->
b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 <<
2)); }
1493 __kmp_gtid_from_tid(thr_bar->parent_tid, team), team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing"
" T#%d(%d:%d) arrived(%p): %llu => %llu\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(thr_bar->parent_tid, team
), team->t.t_id, thr_bar->parent_tid, &thr_bar->
b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 <<
2)); }
1494 thr_bar->parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing"
" T#%d(%d:%d) arrived(%p): %llu => %llu\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(thr_bar->parent_tid, team
), team->t.t_id, thr_bar->parent_tid, &thr_bar->
b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 <<
2)); }
1495 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing"
" T#%d(%d:%d) arrived(%p): %llu => %llu\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(thr_bar->parent_tid, team
), team->t.t_id, thr_bar->parent_tid, &thr_bar->
b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 <<
2)); }
;
1496 /* Mark arrival to parent: After performing this write, a worker thread may
1497 not assume that the team is valid any more - it could be deallocated by
1498 the primary thread at any time. */
1499 if (thr_bar->my_level || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647) ||
1500 !thr_bar->use_oncore_barrier) { // Parent is waiting on my b_arrived
1501 // flag; release it
1502 kmp_flag_64<> flag(&thr_bar->b_arrived,
1503 other_threads[thr_bar->parent_tid]);
1504 flag.release();
1505 } else {
1506 // Leaf does special release on "offset" bits of parent's b_arrived flag
1507 thr_bar->b_arrived = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP(1 << 2);
1508 kmp_flag_oncore flag(&thr_bar->parent_bar->b_arrived,
1509 thr_bar->offset + 1);
1510 flag.set_waiter(other_threads[thr_bar->parent_tid]);
1511 flag.release();
1512 }
1513 } else { // Primary thread needs to update the team's b_arrived value
1514 team->t.t_bar[bt].b_arrived = new_state;
1515 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
1516 "arrived(%p) = %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
1517 gtid, team->t.t_id, tid, team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
1518 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
;
1519 }
1520 // Is the team access below unsafe or just technically invalid?
1521 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
1522 "barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
1523 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
;
1524}
1525
1526static void __kmp_hierarchical_barrier_release(
1527 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
1528 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
1529 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_release)((void)0);
1530 kmp_team_t *team;
1531 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
1532 kmp_uint32 nproc;
1533 bool team_change = false; // indicates on-core barrier shouldn't be used
1534
1535 if (KMP_MASTER_TID(tid)(0 == (tid))) {
1536 team = __kmp_threads[gtid]->th.th_team;
1537 KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null",
"openmp/runtime/src/kmp_barrier.cpp", 1537); }
;
1538 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) primary "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) primary "
"entered barrier type %d\n", gtid, team->t.t_id, tid, bt)
; }
1539 "entered barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) primary "
"entered barrier type %d\n", gtid, team->t.t_id, tid, bt)
; }
1540 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) primary "
"entered barrier type %d\n", gtid, team->t.t_id, tid, bt)
; }
;
1541 } else { // Worker threads
1542 // Wait for parent thread to release me
1543 if (!thr_bar->use_oncore_barrier ||
1544 __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647) || thr_bar->my_level != 0 ||
1545 thr_bar->team == NULL__null) {
1546 // Use traditional method of waiting on my own b_go flag
1547 thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG1;
1548 kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP(1 << 2));
1549 flag.wait(this_thr, TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1550 TCW_8(thr_bar->b_go,(thr_bar->b_go) = (0)
1551 KMP_INIT_BARRIER_STATE)(thr_bar->b_go) = (0); // Reset my b_go flag for next time
1552 } else { // Thread barrier data is initialized, this is a leaf, blocktime is
1553 // infinite, not nested
1554 // Wait on my "offset" bits on parent's b_go flag
1555 thr_bar->wait_flag = KMP_BARRIER_PARENT_FLAG2;
1556 kmp_flag_oncore flag(&thr_bar->parent_bar->b_go, KMP_BARRIER_STATE_BUMP(1 << 2),
1557 thr_bar->offset + 1, bt,
1558 this_thr USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1559 flag.wait(this_thr, TRUE(!0));
1560 if (thr_bar->wait_flag ==
1561 KMP_BARRIER_SWITCHING4) { // Thread was switched to own b_go
1562 TCW_8(thr_bar->b_go,(thr_bar->b_go) = (0)
1563 KMP_INIT_BARRIER_STATE)(thr_bar->b_go) = (0); // Reset my b_go flag for next time
1564 } else { // Reset my bits on parent's b_go flag
1565 (RCAST(volatile char *,reinterpret_cast<volatile char *>(&(thr_bar->parent_bar
->b_go))
1566 &(thr_bar->parent_bar->b_go))reinterpret_cast<volatile char *>(&(thr_bar->parent_bar
->b_go))
)[thr_bar->offset + 1] = 0;
1567 }
1568 }
1569 thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING0;
1570 // Early exit for reaping threads releasing forkjoin barrier
1571 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
1572 return;
1573 // The worker thread may now assume that the team is valid.
1574 team = __kmp_threads[gtid]->th.th_team;
1575 KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null",
"openmp/runtime/src/kmp_barrier.cpp", 1575); }
;
1576 tid = __kmp_tid_from_gtid(gtid);
1577
1578 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
1579 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
1580 ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
1581 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
;
1582 KMP_MB(); // Flush all pending memory write invalidates.
1583 }
1584
1585 nproc = this_thr->th.th_team_nproc;
1586 int level = team->t.t_level;
1587 if (team->t.t_threads[0]
1588 ->th.th_teams_microtask) { // are we inside the teams construct?
1589 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1590 this_thr->th.th_teams_level == level)
1591 ++level; // level was not increased in teams construct for team_of_workers
1592 if (this_thr->th.th_teams_size.nteams > 1)
1593 ++level; // level was not increased in teams construct for team_of_masters
1594 }
1595 if (level == 1)
1596 thr_bar->use_oncore_barrier = 1;
1597 else
1598 thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested
1599
1600 // If the team size has increased, we still communicate with old leaves via
1601 // oncore barrier.
1602 unsigned short int old_leaf_kids = thr_bar->leaf_kids;
1603 kmp_uint64 old_leaf_state = thr_bar->leaf_state;
1604 team_change = __kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid,
1605 tid, team);
1606 // But if the entire team changes, we won't use oncore barrier at all
1607 if (team_change)
1608 old_leaf_kids = 0;
1609
1610#if KMP_BARRIER_ICV_PUSH1
1611 if (propagate_icvs) {
1612 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid,
1613 FALSE0);
1614 if (KMP_MASTER_TID((0 == (tid))
1615 tid)(0 == (tid))) { // primary already has copy in final destination; copy
1616 copy_icvs(&thr_bar->th_fixed_icvs,
1617 &team->t.t_implicit_task_taskdata[tid].td_icvs);
1618 } else if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME(2147483647) &&
1619 thr_bar->use_oncore_barrier) { // optimization for inf blocktime
1620 if (!thr_bar->my_level) // I'm a leaf in the hierarchy (my_level==0)
1621 // leaves (on-core children) pull parent's fixed ICVs directly to local
1622 // ICV store
1623 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1624 &thr_bar->parent_bar->th_fixed_icvs);
1625 // non-leaves will get ICVs piggybacked with b_go via NGO store
1626 } else { // blocktime is not infinite; pull ICVs from parent's fixed ICVs
1627 if (thr_bar->my_level) // not a leaf; copy ICVs to my fixed ICVs child can
1628 // access
1629 copy_icvs(&thr_bar->th_fixed_icvs, &thr_bar->parent_bar->th_fixed_icvs);
1630 else // leaves copy parent's fixed ICVs directly to local ICV store
1631 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1632 &thr_bar->parent_bar->th_fixed_icvs);
1633 }
1634 }
1635#endif // KMP_BARRIER_ICV_PUSH
1636
1637 // Now, release my children
1638 if (thr_bar->my_level) { // not a leaf
1639 kmp_int32 child_tid;
1640 kmp_uint32 last;
1641 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME(2147483647) &&
1642 thr_bar->use_oncore_barrier) {
1643 if (KMP_MASTER_TID(tid)(0 == (tid))) { // do a flat release
1644 // Set local b_go to bump children via NGO store of the cache line
1645 // containing IVCs and b_go.
1646 thr_bar->b_go = KMP_BARRIER_STATE_BUMP(1 << 2);
1647 // Use ngo stores if available; b_go piggybacks in the last 8 bytes of
1648 // the cache line
1649 ngo_load(&thr_bar->th_fixed_icvs)((void)0);
1650 // This loops over all the threads skipping only the leaf nodes in the
1651 // hierarchy
1652 for (child_tid = thr_bar->skip_per_level[1]; child_tid < (int)nproc;
1653 child_tid += thr_bar->skip_per_level[1]) {
1654 kmp_bstate_t *child_bar =
1655 &team->t.t_threads[child_tid]->th.th_bar[bt].bb;
1656 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team
->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->
t.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1657 "releasing T#%d(%d:%d)"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team
->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->
t.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1658 " go(%p): %u => %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team
->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->
t.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1659 gtid, team->t.t_id, tid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team
->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->
t.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1660 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team
->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->
t.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1661 child_tid, &child_bar->b_go, child_bar->b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team
->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->
t.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1662 child_bar->b_go + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team
->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->
t.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
;
1663 // Use ngo store (if available) to both store ICVs and release child
1664 // via child's b_go
1665 ngo_store_go(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs)memcpy((&child_bar->th_fixed_icvs), (&thr_bar->
th_fixed_icvs), 64)
;
1666 }
1667 ngo_sync()((void)0);
1668 }
1669 TCW_8(thr_bar->b_go,(thr_bar->b_go) = (0)
1670 KMP_INIT_BARRIER_STATE)(thr_bar->b_go) = (0); // Reset my b_go flag for next time
1671 // Now, release leaf children
1672 if (thr_bar->leaf_kids) { // if there are any
1673 // We test team_change on the off-chance that the level 1 team changed.
1674 if (team_change ||
1675 old_leaf_kids < thr_bar->leaf_kids) { // some old, some new
1676 if (old_leaf_kids) { // release old leaf kids
1677 thr_bar->b_go |= old_leaf_state;
1678 }
1679 // Release new leaf kids
1680 last = tid + thr_bar->skip_per_level[1];
1681 if (last > nproc)
1682 last = nproc;
1683 for (child_tid = tid + 1 + old_leaf_kids; child_tid < (int)last;
1684 ++child_tid) { // skip_per_level[0]=1
1685 kmp_info_t *child_thr = team->t.t_threads[child_tid];
1686 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1687 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"
" T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id,
tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid
, &child_bar->b_go, child_bar->b_go, child_bar->
b_go + (1 << 2)); }
1688 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"
" T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id,
tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid
, &child_bar->b_go, child_bar->b_go, child_bar->
b_go + (1 << 2)); }
1689 ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"
" T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id,
tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid
, &child_bar->b_go, child_bar->b_go, child_bar->
b_go + (1 << 2)); }
1690 " T#%d(%d:%d) go(%p): %u => %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"
" T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id,
tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid
, &child_bar->b_go, child_bar->b_go, child_bar->
b_go + (1 << 2)); }
1691 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"
" T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id,
tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid
, &child_bar->b_go, child_bar->b_go, child_bar->
b_go + (1 << 2)); }
1692 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"
" T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id,
tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid
, &child_bar->b_go, child_bar->b_go, child_bar->
b_go + (1 << 2)); }
1693 child_bar->b_go + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"
" T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id,
tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid
, &child_bar->b_go, child_bar->b_go, child_bar->
b_go + (1 << 2)); }
;
1694 // Release child using child's b_go flag
1695 kmp_flag_64<> flag(&child_bar->b_go, child_thr);
1696 flag.release();
1697 }
1698 } else { // Release all children at once with leaf_state bits on my own
1699 // b_go flag
1700 thr_bar->b_go |= thr_bar->leaf_state;
1701 }
1702 }
1703 } else { // Blocktime is not infinite; do a simple hierarchical release
1704 for (int d = thr_bar->my_level - 1; d >= 0;
1705 --d) { // Release highest level threads first
1706 last = tid + thr_bar->skip_per_level[d + 1];
1707 kmp_uint32 skip = thr_bar->skip_per_level[d];
1708 if (last > nproc)
1709 last = nproc;
1710 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
1711 kmp_info_t *child_thr = team->t.t_threads[child_tid];
1712 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1713 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t
.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1714 "releasing T#%d(%d:%d) go(%p): %u => %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t
.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1715 gtid, team->t.t_id, tid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t
.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1716 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t
.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1717 child_tid, &child_bar->b_go, child_bar->b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t
.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1718 child_bar->b_go + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t
.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
;
1719 // Release child using child's b_go flag
1720 kmp_flag_64<> flag(&child_bar->b_go, child_thr);
1721 flag.release();
1722 }
1723 }
1724 }
1725#if KMP_BARRIER_ICV_PUSH1
1726 if (propagate_icvs && !KMP_MASTER_TID(tid)(0 == (tid)))
1727 // non-leaves copy ICVs from fixed ICVs to local dest
1728 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1729 &thr_bar->th_fixed_icvs);
1730#endif // KMP_BARRIER_ICV_PUSH
1731 }
1732 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
1733 "barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
1734 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
;
1735}
1736
1737// End of Barrier Algorithms
1738
1739// type traits for cancellable value
1740// if cancellable is true, then is_cancellable is a normal boolean variable
1741// if cancellable is false, then is_cancellable is a compile time constant
1742template <bool cancellable> struct is_cancellable {};
1743template <> struct is_cancellable<true> {
1744 bool value;
1745 is_cancellable() : value(false) {}
1746 is_cancellable(bool b) : value(b) {}
1747 is_cancellable &operator=(bool b) {
1748 value = b;
1749 return *this;
1750 }
1751 operator bool() const { return value; }
1752};
1753template <> struct is_cancellable<false> {
1754 is_cancellable &operator=(bool b) { return *this; }
1755 constexpr operator bool() const { return false; }
1756};
1757
1758// Internal function to do a barrier.
1759/* If is_split is true, do a split barrier, otherwise, do a plain barrier
1760 If reduce is non-NULL, do a split reduction barrier, otherwise, do a split
1761 barrier
1762 When cancellable = false,
1763 Returns 0 if primary thread, 1 if worker thread.
1764 When cancellable = true
1765 Returns 0 if not cancelled, 1 if cancelled. */
1766template <bool cancellable = false>
1767static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split,
1768 size_t reduce_size, void *reduce_data,
1769 void (*reduce)(void *, void *)) {
1770 KMP_TIME_PARTITIONED_BLOCK(OMP_plain_barrier)((void)0);
1771 KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER)((void)0);
1772 int tid = __kmp_tid_from_gtid(gtid);
1773 kmp_info_t *this_thr = __kmp_threads[gtid];
1774 kmp_team_t *team = this_thr->th.th_team;
1775 int status = 0;
1776 is_cancellable<cancellable> cancelled;
1777#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1778 ompt_data_t *my_task_data;
1779 ompt_data_t *my_parallel_data;
1780 void *return_address;
1781 ompt_sync_region_t barrier_kind;
1782#endif
1783
1784 KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) has arrived\n", gtid,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) has arrived\n"
, gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid
(gtid)); }
1785 __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) has arrived\n"
, gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid
(gtid)); }
;
1786
1787#if OMPT_SUPPORT1
1788 if (ompt_enabled.enabled) {
1789#if OMPT_OPTIONAL1
1790 my_task_data = OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data
))
;
1791 my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr)(&(this_thr->th.th_team->t.ompt_team_info.parallel_data
))
;
1792 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
1793 barrier_kind = __ompt_get_barrier_kind(bt, this_thr);
1794 if (ompt_enabled.ompt_callback_sync_region) {
1795 ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback(
1796 barrier_kind, ompt_scope_begin, my_parallel_data, my_task_data,
1797 return_address);
1798 }
1799 if (ompt_enabled.ompt_callback_sync_region_wait) {
1800 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback(
1801 barrier_kind, ompt_scope_begin, my_parallel_data, my_task_data,
1802 return_address);
1803 }
1804#endif
1805 // It is OK to report the barrier state after the barrier begin callback.
1806 // According to the OMPT specification, a compliant implementation may
1807 // even delay reporting this state until the barrier begins to wait.
1808 this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
1809 }
1810#endif
1811
1812 if (!team->t.t_serialized) {
1813#if USE_ITT_BUILD1
1814 // This value will be used in itt notify events below.
1815 void *itt_sync_obj = NULL__null;
1816#if USE_ITT_NOTIFY1
1817 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0)
1818 itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
1819#endif
1820#endif /* USE_ITT_BUILD */
1821 if (__kmp_tasking_mode == tskm_extra_barrier) {
1822 __kmp_tasking_barrier(team, this_thr, gtid);
1823 KA_TRACE(15,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n"
, gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid
(gtid)); }
1824 ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n", gtid,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n"
, gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid
(gtid)); }
1825 __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n"
, gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid
(gtid)); }
;
1826 }
1827
1828 /* Copy the blocktime info to the thread, where __kmp_wait_template() can
1829 access it when the team struct is not guaranteed to exist. */
1830 // See note about the corresponding code in __kmp_join_barrier() being
1831 // performance-critical.
1832 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) {
1833#if KMP_USE_MONITOR
1834 this_thr->th.th_team_bt_intervals =
1835 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1836 this_thr->th.th_team_bt_set =
1837 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
1838#else
1839 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid)((((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs
.bt_set) ? ((team)->t.t_threads[(tid)]->th.th_current_task
->td_icvs.blocktime) : __kmp_dflt_blocktime) * __kmp_ticks_per_msec
)
;
1840#endif
1841 }
1842
1843#if USE_ITT_BUILD1
1844 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0)
1845 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1846#endif /* USE_ITT_BUILD */
1847#if USE_DEBUGGER0
1848 // Let the debugger know: the thread arrived to the barrier and waiting.
1849 if (KMP_MASTER_TID(tid)(0 == (tid))) { // Primary thread counter stored in team struct
1850 team->t.t_bar[bt].b_master_arrived += 1;
1851 } else {
1852 this_thr->th.th_bar[bt].bb.b_worker_arrived += 1;
1853 } // if
1854#endif /* USE_DEBUGGER */
1855 if (reduce != NULL__null) {
1856 // KMP_DEBUG_ASSERT( is_split == TRUE ); // #C69956
1857 this_thr->th.th_local.reduce_data = reduce_data;
1858 }
1859
1860 if (KMP_MASTER_TID(tid)(0 == (tid)) && __kmp_tasking_mode != tskm_immediate_exec)
1861 // use 0 to only setup the current team if nthreads > 1
1862 __kmp_task_team_setup(this_thr, team, 0);
1863
1864 if (cancellable) {
1865 cancelled = __kmp_linear_barrier_gather_cancellable(
1866 bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1867 } else {
1868 switch (__kmp_barrier_gather_pattern[bt]) {
1869 case bp_dist_bar: {
1870 __kmp_dist_barrier_gather(bt, this_thr, gtid, tid,
1871 reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1872 break;
1873 }
1874 case bp_hyper_bar: {
1875 // don't set branch bits to 0; use linear
1876 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt])if (!(__kmp_barrier_gather_branch_bits[bt])) { __kmp_debug_assert
("__kmp_barrier_gather_branch_bits[bt]", "openmp/runtime/src/kmp_barrier.cpp"
, 1876); }
;
1877 __kmp_hyper_barrier_gather(bt, this_thr, gtid, tid,
1878 reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1879 break;
1880 }
1881 case bp_hierarchical_bar: {
1882 __kmp_hierarchical_barrier_gather(
1883 bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1884 break;
1885 }
1886 case bp_tree_bar: {
1887 // don't set branch bits to 0; use linear
1888 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt])if (!(__kmp_barrier_gather_branch_bits[bt])) { __kmp_debug_assert
("__kmp_barrier_gather_branch_bits[bt]", "openmp/runtime/src/kmp_barrier.cpp"
, 1888); }
;
1889 __kmp_tree_barrier_gather(bt, this_thr, gtid, tid,
1890 reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1891 break;
1892 }
1893 default: {
1894 __kmp_linear_barrier_gather(bt, this_thr, gtid, tid,
1895 reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1896 }
1897 }
1898 }
1899
1900 KMP_MB();
1901
1902 if (KMP_MASTER_TID(tid)(0 == (tid))) {
1903 status = 0;
1904 if (__kmp_tasking_mode != tskm_immediate_exec && !cancelled) {
1905 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1906 }
1907#if USE_DEBUGGER0
1908 // Let the debugger know: All threads are arrived and starting leaving the
1909 // barrier.
1910 team->t.t_bar[bt].b_team_arrived += 1;
1911#endif
1912
1913 if (__kmp_omp_cancellation) {
1914 kmp_int32 cancel_request = KMP_ATOMIC_LD_RLX(&team->t.t_cancel_request)(&team->t.t_cancel_request)->load(std::memory_order_relaxed
)
;
1915 // Reset cancellation flag for worksharing constructs
1916 if (cancel_request == cancel_loop ||
1917 cancel_request == cancel_sections) {
1918 KMP_ATOMIC_ST_RLX(&team->t.t_cancel_request, cancel_noreq)(&team->t.t_cancel_request)->store(cancel_noreq, std
::memory_order_relaxed)
;
1919 }
1920 }
1921#if USE_ITT_BUILD1
1922 /* TODO: In case of split reduction barrier, primary thread may send
1923 acquired event early, before the final summation into the shared
1924 variable is done (final summation can be a long operation for array
1925 reductions). */
1926 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0)
1927 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1928#endif /* USE_ITT_BUILD */
1929#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
1930 // Barrier - report frame end (only if active_level == 1)
1931 if ((__itt_frame_submit_v3_ptr__kmp_itt_frame_submit_v3_ptr__3_0 || KMP_ITT_DEBUG0) &&
1932 __kmp_forkjoin_frames_mode &&
1933 (this_thr->th.th_teams_microtask == NULL__null || // either not in teams
1934 this_thr->th.th_teams_size.nteams == 1) && // or inside single team
1935 team->t.t_active_level == 1) {
1936 ident_t *loc = __kmp_threads[gtid]->th.th_ident;
1937 kmp_uint64 cur_time = __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0();
1938 kmp_info_t **other_threads = team->t.t_threads;
1939 int nproc = this_thr->th.th_team_nproc;
1940 int i;
1941 switch (__kmp_forkjoin_frames_mode) {
1942 case 1:
1943 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1944 loc, nproc);
1945 this_thr->th.th_frame_time = cur_time;
1946 break;
1947 case 2: // AC 2015-01-19: currently does not work for hierarchical (to
1948 // be fixed)
1949 __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time,
1950 1, loc, nproc);
1951 break;
1952 case 3:
1953 if (__itt_metadata_add_ptr__kmp_itt_metadata_add_ptr__3_0) {
1954 // Initialize with primary thread's wait time
1955 kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
1956 // Set arrive time to zero to be able to check it in
1957 // __kmp_invoke_task(); the same is done inside the loop below
1958 this_thr->th.th_bar_arrive_time = 0;
1959 for (i = 1; i < nproc; ++i) {
1960 delta += (cur_time - other_threads[i]->th.th_bar_arrive_time);
1961 other_threads[i]->th.th_bar_arrive_time = 0;
1962 }
1963 __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time,
1964 cur_time, delta,
1965 (kmp_uint64)(reduce != NULL__null));
1966 }
1967 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1968 loc, nproc);
1969 this_thr->th.th_frame_time = cur_time;
1970 break;
1971 }
1972 }
1973#endif /* USE_ITT_BUILD */
1974 } else {
1975 status = 1;
1976#if USE_ITT_BUILD1
1977 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0)
1978 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1979#endif /* USE_ITT_BUILD */
1980 }
1981 if ((status == 1 || !is_split) && !cancelled) {
1982 if (cancellable) {
1983 cancelled = __kmp_linear_barrier_release_cancellable(
1984 bt, this_thr, gtid, tid, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1985 } else {
1986 switch (__kmp_barrier_release_pattern[bt]) {
1987 case bp_dist_bar: {
1988 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt])if (!(__kmp_barrier_release_branch_bits[bt])) { __kmp_debug_assert
("__kmp_barrier_release_branch_bits[bt]", "openmp/runtime/src/kmp_barrier.cpp"
, 1988); }
;
1989 __kmp_dist_barrier_release(bt, this_thr, gtid, tid,
1990 FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1991 break;
1992 }
1993 case bp_hyper_bar: {
1994 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt])if (!(__kmp_barrier_release_branch_bits[bt])) { __kmp_debug_assert
("__kmp_barrier_release_branch_bits[bt]", "openmp/runtime/src/kmp_barrier.cpp"
, 1994); }
;
1995 __kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
1996 FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1997 break;
1998 }
1999 case bp_hierarchical_bar: {
2000 __kmp_hierarchical_barrier_release(
2001 bt, this_thr, gtid, tid, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
2002 break;
2003 }
2004 case bp_tree_bar: {
2005 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt])if (!(__kmp_barrier_release_branch_bits[bt])) { __kmp_debug_assert
("__kmp_barrier_release_branch_bits[bt]", "openmp/runtime/src/kmp_barrier.cpp"
, 2005); }
;
2006 __kmp_tree_barrier_release(bt, this_thr, gtid, tid,
2007 FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
2008 break;
2009 }
2010 default: {
2011 __kmp_linear_barrier_release(bt, this_thr, gtid, tid,
2012 FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
2013 }
2014 }
2015 }
2016 if (__kmp_tasking_mode != tskm_immediate_exec && !cancelled) {
2017 __kmp_task_team_sync(this_thr, team);
2018 }
2019 }
2020
2021#if USE_ITT_BUILD1
2022 /* GEH: TODO: Move this under if-condition above and also include in
2023 __kmp_end_split_barrier(). This will more accurately represent the actual
2024 release time of the threads for split barriers. */
2025 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0)
2026 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
2027#endif /* USE_ITT_BUILD */
2028 } else { // Team is serialized.
2029 status = 0;
2030 if (__kmp_tasking_mode != tskm_immediate_exec) {
2031 if (this_thr->th.th_task_team != NULL__null) {
2032#if USE_ITT_NOTIFY1
2033 void *itt_sync_obj = NULL__null;
2034 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) {
2035 itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
2036 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
2037 }
2038#endif
2039
2040 KMP_DEBUG_ASSERT(if (!(this_thr->th.th_task_team->tt.tt_found_proxy_tasks
== (!0) || this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered
== (!0))) { __kmp_debug_assert("this_thr->th.th_task_team->tt.tt_found_proxy_tasks == (!0) || this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered == (!0)"
, "openmp/runtime/src/kmp_barrier.cpp", 2043); }
2041 this_thr->th.th_task_team->tt.tt_found_proxy_tasks == TRUE ||if (!(this_thr->th.th_task_team->tt.tt_found_proxy_tasks
== (!0) || this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered
== (!0))) { __kmp_debug_assert("this_thr->th.th_task_team->tt.tt_found_proxy_tasks == (!0) || this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered == (!0)"
, "openmp/runtime/src/kmp_barrier.cpp", 2043); }
2042 this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered ==if (!(this_thr->th.th_task_team->tt.tt_found_proxy_tasks
== (!0) || this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered
== (!0))) { __kmp_debug_assert("this_thr->th.th_task_team->tt.tt_found_proxy_tasks == (!0) || this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered == (!0)"
, "openmp/runtime/src/kmp_barrier.cpp", 2043); }
2043 TRUE)if (!(this_thr->th.th_task_team->tt.tt_found_proxy_tasks
== (!0) || this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered
== (!0))) { __kmp_debug_assert("this_thr->th.th_task_team->tt.tt_found_proxy_tasks == (!0) || this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered == (!0)"
, "openmp/runtime/src/kmp_barrier.cpp", 2043); }
;
2044 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
2045 __kmp_task_team_setup(this_thr, team, 0);
2046
2047#if USE_ITT_BUILD1
2048 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0)
2049 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
2050#endif /* USE_ITT_BUILD */
2051 }
2052 }
2053 }
2054 KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n",if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n"
, gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid
(gtid), status); }
2055 gtid, __kmp_team_from_gtid(gtid)->t.t_id,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n"
, gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid
(gtid), status); }
2056 __kmp_tid_from_gtid(gtid), status))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n"
, gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid
(gtid), status); }
;
2057
2058#if OMPT_SUPPORT1
2059 if (ompt_enabled.enabled) {
2060#if OMPT_OPTIONAL1
2061 if (ompt_enabled.ompt_callback_sync_region_wait) {
2062 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback(
2063 barrier_kind, ompt_scope_end, my_parallel_data, my_task_data,
2064 return_address);
2065 }
2066 if (ompt_enabled.ompt_callback_sync_region) {
2067 ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback(
2068 barrier_kind, ompt_scope_end, my_parallel_data, my_task_data,
2069 return_address);
2070 }
2071#endif
2072 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
2073 }
2074#endif
2075
2076 if (cancellable)
2077 return (int)cancelled;
2078 return status;
2079}
2080
2081// Returns 0 if primary thread, 1 if worker thread.
2082int __kmp_barrier(enum barrier_type bt, int gtid, int is_split,
2083 size_t reduce_size, void *reduce_data,
2084 void (*reduce)(void *, void *)) {
2085 return __kmp_barrier_template<>(bt, gtid, is_split, reduce_size, reduce_data,
2086 reduce);
2087}
2088
2089#if defined(KMP_GOMP_COMPAT)
2090// Returns 1 if cancelled, 0 otherwise
2091int __kmp_barrier_gomp_cancel(int gtid) {
2092 if (__kmp_omp_cancellation) {
2093 int cancelled = __kmp_barrier_template<true>(bs_plain_barrier, gtid, FALSE0,
2094 0, NULL__null, NULL__null);
2095 if (cancelled) {
2096 int tid = __kmp_tid_from_gtid(gtid);
2097 kmp_info_t *this_thr = __kmp_threads[gtid];
2098 if (KMP_MASTER_TID(tid)(0 == (tid))) {
2099 // Primary thread does not need to revert anything
2100 } else {
2101 // Workers need to revert their private b_arrived flag
2102 this_thr->th.th_bar[bs_plain_barrier].bb.b_arrived -=
2103 KMP_BARRIER_STATE_BUMP(1 << 2);
2104 }
2105 }
2106 return cancelled;
2107 }
2108 __kmp_barrier(bs_plain_barrier, gtid, FALSE0, 0, NULL__null, NULL__null);
2109 return FALSE0;
2110}
2111#endif
2112
2113void __kmp_end_split_barrier(enum barrier_type bt, int gtid) {
2114 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_end_split_barrier)((void)0);
2115 KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER)((void)0);
2116 KMP_DEBUG_ASSERT(bt < bs_last_barrier)if (!(bt < bs_last_barrier)) { __kmp_debug_assert("bt < bs_last_barrier"
, "openmp/runtime/src/kmp_barrier.cpp", 2116); }
;
2117 int tid = __kmp_tid_from_gtid(gtid);
2118 kmp_info_t *this_thr = __kmp_threads[gtid];
2119 kmp_team_t *team = this_thr->th.th_team;
2120
2121 if (!team->t.t_serialized) {
2122 if (KMP_MASTER_GTID(gtid)(0 == __kmp_tid_from_gtid((gtid)))) {
2123 switch (__kmp_barrier_release_pattern[bt]) {
2124 case bp_dist_bar: {
2125 __kmp_dist_barrier_release(bt, this_thr, gtid, tid,
2126 FALSE0 USE_ITT_BUILD_ARG(NULL), __null);
2127 break;
2128 }
2129 case bp_hyper_bar: {
2130 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt])if (!(__kmp_barrier_release_branch_bits[bt])) { __kmp_debug_assert
("__kmp_barrier_release_branch_bits[bt]", "openmp/runtime/src/kmp_barrier.cpp"
, 2130); }
;
2131 __kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
2132 FALSE0 USE_ITT_BUILD_ARG(NULL), __null);
2133 break;
2134 }
2135 case bp_hierarchical_bar: {
2136 __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid,
2137 FALSE0 USE_ITT_BUILD_ARG(NULL), __null);
2138 break;
2139 }
2140 case bp_tree_bar: {
2141 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt])if (!(__kmp_barrier_release_branch_bits[bt])) { __kmp_debug_assert
("__kmp_barrier_release_branch_bits[bt]", "openmp/runtime/src/kmp_barrier.cpp"
, 2141); }
;
2142 __kmp_tree_barrier_release(bt, this_thr, gtid, tid,
2143 FALSE0 USE_ITT_BUILD_ARG(NULL), __null);
2144 break;
2145 }
2146 default: {
2147 __kmp_linear_barrier_release(bt, this_thr, gtid, tid,
2148 FALSE0 USE_ITT_BUILD_ARG(NULL), __null);
2149 }
2150 }
2151 if (__kmp_tasking_mode != tskm_immediate_exec) {
2152 __kmp_task_team_sync(this_thr, team);
2153 } // if
2154 }
2155 }
2156}
2157
2158void __kmp_join_barrier(int gtid) {
2159 KMP_TIME_PARTITIONED_BLOCK(OMP_join_barrier)((void)0);
2160 KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER)((void)0);
2161
2162 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid])if (!(__kmp_threads && __kmp_threads[gtid])) { __kmp_debug_assert
("__kmp_threads && __kmp_threads[gtid]", "openmp/runtime/src/kmp_barrier.cpp"
, 2162); }
;
2163
2164 kmp_info_t *this_thr = __kmp_threads[gtid];
2165 kmp_team_t *team;
2166 int tid;
2167#ifdef KMP_DEBUG1
2168 int team_id;
2169#endif /* KMP_DEBUG */
2170#if USE_ITT_BUILD1
2171 void *itt_sync_obj = NULL__null;
2172#if USE_ITT_NOTIFY1
2173 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) // Don't call routine without need
2174 // Get object created at fork_barrier
2175 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
2176#endif
2177#endif /* USE_ITT_BUILD */
2178#if ((USE_ITT_BUILD1 && USE_ITT_NOTIFY1) || defined KMP_DEBUG1)
2179 int nproc = this_thr->th.th_team_nproc;
2180#endif
2181 KMP_MB();
2182
2183 // Get current info
2184 team = this_thr->th.th_team;
2185 KMP_DEBUG_ASSERT(nproc == team->t.t_nproc)if (!(nproc == team->t.t_nproc)) { __kmp_debug_assert("nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_barrier.cpp", 2185); }
;
2186 tid = __kmp_tid_from_gtid(gtid);
2187#ifdef KMP_DEBUG1
2188 team_id = team->t.t_id;
2189 kmp_info_t *master_thread = this_thr->th.th_team_master;
2190 if (master_thread != team->t.t_threads[0]) {
2191 __kmp_print_structure();
2192 }
2193#endif /* KMP_DEBUG */
2194 KMP_DEBUG_ASSERT(master_thread == team->t.t_threads[0])if (!(master_thread == team->t.t_threads[0])) { __kmp_debug_assert
("master_thread == team->t.t_threads[0]", "openmp/runtime/src/kmp_barrier.cpp"
, 2194); }
;
2195 KMP_MB();
2196
2197 // Verify state
2198 KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_team))if (!(((void *)(this_thr->th.th_team)))) { __kmp_debug_assert
("((void *)(this_thr->th.th_team))", "openmp/runtime/src/kmp_barrier.cpp"
, 2198); }
;
2199 KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_root))if (!(((void *)(this_thr->th.th_root)))) { __kmp_debug_assert
("((void *)(this_thr->th.th_root))", "openmp/runtime/src/kmp_barrier.cpp"
, 2199); }
;
2200 KMP_DEBUG_ASSERT(this_thr == team->t.t_threads[tid])if (!(this_thr == team->t.t_threads[tid])) { __kmp_debug_assert
("this_thr == team->t.t_threads[tid]", "openmp/runtime/src/kmp_barrier.cpp"
, 2200); }
;
2201 KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n"
, gtid, team_id, tid); }
2202 gtid, team_id, tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n"
, gtid, team_id, tid); }
;
2203
2204#if OMPT_SUPPORT1
2205 if (ompt_enabled.enabled) {
2206#if OMPT_OPTIONAL1
2207 ompt_data_t *my_task_data;
2208 ompt_data_t *my_parallel_data;
2209 void *codeptr = NULL__null;
2210 int ds_tid = this_thr->th.th_info.ds.ds_tid;
2211 if (KMP_MASTER_TID(ds_tid)(0 == (ds_tid)) &&
2212 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback ||
2213 ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback))
2214 codeptr = team->t.ompt_team_info.master_return_address;
2215 my_task_data = OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data
))
;
2216 my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr)(&(this_thr->th.th_team->t.ompt_team_info.parallel_data
))
;
2217 if (ompt_enabled.ompt_callback_sync_region) {
2218 ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback(
2219 ompt_sync_region_barrier_implicit, ompt_scope_begin, my_parallel_data,
2220 my_task_data, codeptr);
2221 }
2222 if (ompt_enabled.ompt_callback_sync_region_wait) {
2223 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback(
2224 ompt_sync_region_barrier_implicit, ompt_scope_begin, my_parallel_data,
2225 my_task_data, codeptr);
2226 }
2227 if (!KMP_MASTER_TID(ds_tid)(0 == (ds_tid)))
2228 this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data
))
;
2229#endif
2230 this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier_implicit;
2231 }
2232#endif
2233
2234 if (__kmp_tasking_mode == tskm_extra_barrier) {
2235 __kmp_tasking_barrier(team, this_thr, gtid);
2236 KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) past tasking barrier\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) past tasking barrier\n"
, gtid, team_id, tid); }
2237 gtid, team_id, tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) past tasking barrier\n"
, gtid, team_id, tid); }
;
2238 }
2239#ifdef KMP_DEBUG1
2240 if (__kmp_tasking_mode != tskm_immediate_exec) {
2241 KA_TRACE(20, ("__kmp_join_barrier: T#%d, old team = %d, old task_team = "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_barrier: T#%d, old team = %d, old task_team = "
"%p, th_task_team = %p\n", __kmp_gtid_from_thread(this_thr),
team_id, team->t.t_task_team[this_thr->th.th_task_state
], this_thr->th.th_task_team); }
2242 "%p, th_task_team = %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_barrier: T#%d, old team = %d, old task_team = "
"%p, th_task_team = %p\n", __kmp_gtid_from_thread(this_thr),
team_id, team->t.t_task_team[this_thr->th.th_task_state
], this_thr->th.th_task_team); }
2243 __kmp_gtid_from_thread(this_thr), team_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_barrier: T#%d, old team = %d, old task_team = "
"%p, th_task_team = %p\n", __kmp_gtid_from_thread(this_thr),
team_id, team->t.t_task_team[this_thr->th.th_task_state
], this_thr->th.th_task_team); }
2244 team->t.t_task_team[this_thr->th.th_task_state],if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_barrier: T#%d, old team = %d, old task_team = "
"%p, th_task_team = %p\n", __kmp_gtid_from_thread(this_thr),
team_id, team->t.t_task_team[this_thr->th.th_task_state
], this_thr->th.th_task_team); }
2245 this_thr->th.th_task_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_barrier: T#%d, old team = %d, old task_team = "
"%p, th_task_team = %p\n", __kmp_gtid_from_thread(this_thr),
team_id, team->t.t_task_team[this_thr->th.th_task_state
], this_thr->th.th_task_team); }
;
2246 if (this_thr->th.th_task_team)
2247 KMP_DEBUG_ASSERT(this_thr->th.th_task_team ==if (!(this_thr->th.th_task_team == team->t.t_task_team[
this_thr->th.th_task_state])) { __kmp_debug_assert("this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]"
, "openmp/runtime/src/kmp_barrier.cpp", 2248); }
2248 team->t.t_task_team[this_thr->th.th_task_state])if (!(this_thr->th.th_task_team == team->t.t_task_team[
this_thr->th.th_task_state])) { __kmp_debug_assert("this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]"
, "openmp/runtime/src/kmp_barrier.cpp", 2248); }
;
2249 }
2250#endif /* KMP_DEBUG */
2251
2252 /* Copy the blocktime info to the thread, where __kmp_wait_template() can
2253 access it when the team struct is not guaranteed to exist. Doing these
2254 loads causes a cache miss slows down EPCC parallel by 2x. As a workaround,
2255 we do not perform the copy if blocktime=infinite, since the values are not
2256 used by __kmp_wait_template() in that case. */
2257 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) {
2258#if KMP_USE_MONITOR
2259 this_thr->th.th_team_bt_intervals =
2260 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
2261 this_thr->th.th_team_bt_set =
2262 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
2263#else
2264 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid)((((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs
.bt_set) ? ((team)->t.t_threads[(tid)]->th.th_current_task
->td_icvs.blocktime) : __kmp_dflt_blocktime) * __kmp_ticks_per_msec
)
;
2265#endif
2266 }
2267
2268#if USE_ITT_BUILD1
2269 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0)
2270 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
2271#endif /* USE_ITT_BUILD */
2272
2273 switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) {
2274 case bp_dist_bar: {
2275 __kmp_dist_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
2276 NULL__null USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
2277 break;
2278 }
2279 case bp_hyper_bar: {
2280 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier])if (!(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]))
{ __kmp_debug_assert("__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]"
, "openmp/runtime/src/kmp_barrier.cpp", 2280); }
;
2281 __kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
2282 NULL__null USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
2283 break;
2284 }
2285 case bp_hierarchical_bar: {
2286 __kmp_hierarchical_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
2287 NULL__null USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
2288 break;
2289 }
2290 case bp_tree_bar: {
2291 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier])if (!(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]))
{ __kmp_debug_assert("__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]"
, "openmp/runtime/src/kmp_barrier.cpp", 2291); }
;
2292 __kmp_tree_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
2293 NULL__null USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
2294 break;
2295 }
2296 default: {
2297 __kmp_linear_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
2298 NULL__null USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
2299 }
2300 }
2301
2302 /* From this point on, the team data structure may be deallocated at any time
2303 by the primary thread - it is unsafe to reference it in any of the worker
2304 threads. Any per-team data items that need to be referenced before the
2305 end of the barrier should be moved to the kmp_task_team_t structs. */
2306 if (KMP_MASTER_TID(tid)(0 == (tid))) {
2307 if (__kmp_tasking_mode != tskm_immediate_exec) {
2308 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
2309 }
2310 if (__kmp_display_affinity) {
2311 KMP_CHECK_UPDATE(team->t.t_display_affinity, 0)if ((team->t.t_display_affinity) != (0)) (team->t.t_display_affinity
) = (0)
;
2312 }
2313#if KMP_STATS_ENABLED0
2314 // Have primary thread flag the workers to indicate they are now waiting for
2315 // next parallel region, Also wake them up so they switch their timers to
2316 // idle.
2317 for (int i = 0; i < team->t.t_nproc; ++i) {
2318 kmp_info_t *team_thread = team->t.t_threads[i];
2319 if (team_thread == this_thr)
2320 continue;
2321 team_thread->th.th_stats->setIdleFlag();
2322 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647) &&
2323 team_thread->th.th_sleep_loc != NULL__null)
2324 __kmp_null_resume_wrapper(team_thread);
2325 }
2326#endif
2327#if USE_ITT_BUILD1
2328 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0)
2329 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
2330#endif /* USE_ITT_BUILD */
2331
2332#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
2333 // Join barrier - report frame end
2334 if ((__itt_frame_submit_v3_ptr__kmp_itt_frame_submit_v3_ptr__3_0 || KMP_ITT_DEBUG0) &&
2335 __kmp_forkjoin_frames_mode &&
2336 (this_thr->th.th_teams_microtask == NULL__null || // either not in teams
2337 this_thr->th.th_teams_size.nteams == 1) && // or inside single team
2338 team->t.t_active_level == 1) {
2339 kmp_uint64 cur_time = __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0();
2340 ident_t *loc = team->t.t_ident;
2341 kmp_info_t **other_threads = team->t.t_threads;
2342 switch (__kmp_forkjoin_frames_mode) {
2343 case 1:
2344 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
2345 loc, nproc);
2346 break;
2347 case 2:
2348 __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1,
2349 loc, nproc);
2350 break;
2351 case 3:
2352 if (__itt_metadata_add_ptr__kmp_itt_metadata_add_ptr__3_0) {
2353 // Initialize with primary thread's wait time
2354 kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
2355 // Set arrive time to zero to be able to check it in
2356 // __kmp_invoke_task(); the same is done inside the loop below
2357 this_thr->th.th_bar_arrive_time = 0;
2358 for (int i = 1; i < nproc; ++i) {
2359 delta += (cur_time - other_threads[i]->th.th_bar_arrive_time);
2360 other_threads[i]->th.th_bar_arrive_time = 0;
2361 }
2362 __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time,
2363 cur_time, delta, 0);
2364 }
2365 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
2366 loc, nproc);
2367 this_thr->th.th_frame_time = cur_time;
2368 break;
2369 }
2370 }
2371#endif /* USE_ITT_BUILD */
2372 }
2373#if USE_ITT_BUILD1
2374 else {
2375 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0)
2376 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
2377 }
2378#endif /* USE_ITT_BUILD */
2379
2380#if KMP_DEBUG1
2381 if (KMP_MASTER_TID(tid)(0 == (tid))) {
2382 KA_TRACE(if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n"
, gtid, team_id, tid, nproc); }
2383 15,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n"
, gtid, team_id, tid, nproc); }
2384 ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n",if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n"
, gtid, team_id, tid, nproc); }
2385 gtid, team_id, tid, nproc))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n"
, gtid, team_id, tid, nproc); }
;
2386 }
2387#endif /* KMP_DEBUG */
2388
2389 // TODO now, mark worker threads as done so they may be disbanded
2390 KMP_MB(); // Flush all pending memory write invalidates.
2391 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) leaving\n"
, gtid, team_id, tid); }
2392 ("__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) leaving\n"
, gtid, team_id, tid); }
;
2393
2394}
2395
2396// TODO release worker threads' fork barriers as we are ready instead of all at
2397// once
2398void __kmp_fork_barrier(int gtid, int tid) {
2399 KMP_TIME_PARTITIONED_BLOCK(OMP_fork_barrier)((void)0);
2400 KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER)((void)0);
2401 kmp_info_t *this_thr = __kmp_threads[gtid];
2402 kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL__null;
1
Assuming 'tid' is not equal to 0
2
'?' condition is false
2403#if USE_ITT_BUILD1
2404 void *itt_sync_obj = NULL__null;
2405#endif /* USE_ITT_BUILD */
2406 if (team
2.1
'team' is null
2.1
'team' is null
)
3
Taking false branch
2407
2408 KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) has arrived\n", gtid,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:%d) has arrived\n"
, gtid, (team != __null) ? team->t.t_id : -1, tid); }
2409 (team != NULL) ? team->t.t_id : -1, tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:%d) has arrived\n"
, gtid, (team != __null) ? team->t.t_id : -1, tid); }
;
2410
2411 // th_team pointer only valid for primary thread here
2412 if (KMP_MASTER_TID(tid)(0 == (tid))) {
4
Taking false branch
2413#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
2414 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) {
2415 // Create itt barrier object
2416 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 1);
2417 __kmp_itt_barrier_middle(gtid, itt_sync_obj); // Call acquired/releasing
2418 }
2419#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
2420
2421#ifdef KMP_DEBUG1
2422 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_barrier.cpp"
, 2422); }
;
2423 kmp_info_t **other_threads = team->t.t_threads;
2424 int i;
2425
2426 // Verify state
2427 KMP_MB();
2428
2429 for (i = 1; i < team->t.t_nproc; ++i) {
2430 KA_TRACE(500,if (kmp_a_debug >= 500) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go "
"== %u.\n", gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_tid, other_threads[i]->th.th_bar[bs_forkjoin_barrier
].bb.b_go); }
2431 ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go "if (kmp_a_debug >= 500) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go "
"== %u.\n", gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_tid, other_threads[i]->th.th_bar[bs_forkjoin_barrier
].bb.b_go); }
2432 "== %u.\n",if (kmp_a_debug >= 500) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go "
"== %u.\n", gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_tid, other_threads[i]->th.th_bar[bs_forkjoin_barrier
].bb.b_go); }
2433 gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid,if (kmp_a_debug >= 500) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go "
"== %u.\n", gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_tid, other_threads[i]->th.th_bar[bs_forkjoin_barrier
].bb.b_go); }
2434 team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid,if (kmp_a_debug >= 500) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go "
"== %u.\n", gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_tid, other_threads[i]->th.th_bar[bs_forkjoin_barrier
].bb.b_go); }
2435 other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go))if (kmp_a_debug >= 500) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go "
"== %u.\n", gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_tid, other_threads[i]->th.th_bar[bs_forkjoin_barrier
].bb.b_go); }
;
2436 KMP_DEBUG_ASSERT(if (!(((other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb
.b_go) & ~((1 << 0))) == 0)) { __kmp_debug_assert("((other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) & ~((1 << 0))) == 0"
, "openmp/runtime/src/kmp_barrier.cpp", 2438); }
2437 (TCR_4(other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) &if (!(((other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb
.b_go) & ~((1 << 0))) == 0)) { __kmp_debug_assert("((other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) & ~((1 << 0))) == 0"
, "openmp/runtime/src/kmp_barrier.cpp", 2438); }
2438 ~(KMP_BARRIER_SLEEP_STATE)) == KMP_INIT_BARRIER_STATE)if (!(((other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb
.b_go) & ~((1 << 0))) == 0)) { __kmp_debug_assert("((other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) & ~((1 << 0))) == 0"
, "openmp/runtime/src/kmp_barrier.cpp", 2438); }
;
2439 KMP_DEBUG_ASSERT(other_threads[i]->th.th_team == team)if (!(other_threads[i]->th.th_team == team)) { __kmp_debug_assert
("other_threads[i]->th.th_team == team", "openmp/runtime/src/kmp_barrier.cpp"
, 2439); }
;
2440 }
2441#endif
2442
2443 if (__kmp_tasking_mode != tskm_immediate_exec) {
2444 // 0 indicates setup current task team if nthreads > 1
2445 __kmp_task_team_setup(this_thr, team, 0);
2446 }
2447
2448 /* The primary thread may have changed its blocktime between join barrier
2449 and fork barrier. Copy the blocktime info to the thread, where
2450 __kmp_wait_template() can access it when the team struct is not
2451 guaranteed to exist. */
2452 // See note about the corresponding code in __kmp_join_barrier() being
2453 // performance-critical
2454 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) {
2455#if KMP_USE_MONITOR
2456 this_thr->th.th_team_bt_intervals =
2457 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
2458 this_thr->th.th_team_bt_set =
2459 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
2460#else
2461 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid)((((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs
.bt_set) ? ((team)->t.t_threads[(tid)]->th.th_current_task
->td_icvs.blocktime) : __kmp_dflt_blocktime) * __kmp_ticks_per_msec
)
;
2462#endif
2463 }
2464 } // primary thread
2465
2466 switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) {
5
Control jumps to 'case bp_tree_bar:' at line 2483
2467 case bp_dist_bar: {
2468 __kmp_dist_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
2469 TRUE(!0) USE_ITT_BUILD_ARG(NULL), __null);
2470 break;
2471 }
2472 case bp_hyper_bar: {
2473 KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier])if (!(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier])
) { __kmp_debug_assert("__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]"
, "openmp/runtime/src/kmp_barrier.cpp", 2473); }
;
2474 __kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
2475 TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
2476 break;
2477 }
2478 case bp_hierarchical_bar: {
2479 __kmp_hierarchical_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
2480 TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
2481 break;
2482 }
2483 case bp_tree_bar: {
2484 KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier])if (!(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier])
) { __kmp_debug_assert("__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]"
, "openmp/runtime/src/kmp_barrier.cpp", 2484); }
;
6
Assuming the condition is false
7
Taking false branch
2485 __kmp_tree_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
8
Calling '__kmp_tree_barrier_release'
21
Returning from '__kmp_tree_barrier_release'
2486 TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
2487 break;
2488 }
2489 default: {
2490 __kmp_linear_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
2491 TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
2492 }
2493 }
2494
2495#if OMPT_SUPPORT1
2496 if (ompt_enabled.enabled &&
22
Assuming field 'enabled' is not equal to 0
24
Taking true branch
2497 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
23
Assuming field 'state' is equal to ompt_state_wait_barrier_implicit
2498 int ds_tid = this_thr->th.th_info.ds.ds_tid;
2499 ompt_data_t *task_data = (team
24.1
'team' is null
24.1
'team' is null
)
25
'?' condition is false
2500 ? OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data
))
2501 : &(this_thr->th.ompt_thread_info.task_data);
2502 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
2503#if OMPT_OPTIONAL1
2504 void *codeptr = NULL__null;
2505 if (KMP_MASTER_TID(ds_tid)(0 == (ds_tid)) &&
26
Assuming 'ds_tid' is equal to 0
29
Assuming pointer value is null
30
Taking false branch
2506 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback ||
27
Assuming field 'ompt_callback_sync_region_wait_callback' is null
2507 ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback))
28
Assuming field 'ompt_callback_sync_region_callback' is null
2508 codeptr = team ? team->t.ompt_team_info.master_return_address : NULL__null;
2509 if (ompt_enabled.ompt_callback_sync_region_wait) {
31
Assuming field 'ompt_callback_sync_region_wait' is 0
32
Taking false branch
2510 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback(
2511 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL__null, task_data,
2512 codeptr);
2513 }
2514 if (ompt_enabled.ompt_callback_sync_region) {
33
Assuming field 'ompt_callback_sync_region' is not equal to 0
34
Taking true branch
2515 ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback(
35
Called function pointer is null (null dereference)
2516 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL__null, task_data,
2517 codeptr);
2518 }
2519#endif
2520 if (!KMP_MASTER_TID(ds_tid)(0 == (ds_tid)) && ompt_enabled.ompt_callback_implicit_task) {
2521 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
2522 ompt_scope_end, NULL__null, task_data, 0, ds_tid,
2523 ompt_task_implicit); // TODO: Can this be ompt_task_initial?
2524 }
2525 }
2526#endif
2527
2528 // Early exit for reaping threads releasing forkjoin barrier
2529 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) {
2530 this_thr->th.th_task_team = NULL__null;
2531
2532#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
2533 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) {
2534 if (!KMP_MASTER_TID(tid)(0 == (tid))) {
2535 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
2536 if (itt_sync_obj)
2537 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
2538 }
2539 }
2540#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
2541 KA_TRACE(10, ("__kmp_fork_barrier: T#%d is leaving early\n", gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d is leaving early\n"
, gtid); }
;
2542 return;
2543 }
2544
2545 /* We can now assume that a valid team structure has been allocated by the
2546 primary thread and propagated to all worker threads. The current thread,
2547 however, may not be part of the team, so we can't blindly assume that the
2548 team pointer is non-null. */
2549 team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team)((void *)(this_thr->th.th_team));
2550 KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null",
"openmp/runtime/src/kmp_barrier.cpp", 2550); }
;
2551 tid = __kmp_tid_from_gtid(gtid);
2552
2553#if KMP_BARRIER_ICV_PULL
2554 /* Primary thread's copy of the ICVs was set up on the implicit taskdata in
2555 __kmp_reinitialize_team. __kmp_fork_call() assumes the primary thread's
2556 implicit task has this data before this function is called. We cannot
2557 modify __kmp_fork_call() to look at the fixed ICVs in the primary thread's
2558 thread struct, because it is not always the case that the threads arrays
2559 have been allocated when __kmp_fork_call() is executed. */
2560 {
2561 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy)((void)0);
2562 if (!KMP_MASTER_TID(tid)(0 == (tid))) { // primary thread already has ICVs
2563 // Copy the initial ICVs from the primary thread's thread struct to the
2564 // implicit task for this tid.
2565 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n"
, gtid, tid); }
2566 ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n"
, gtid, tid); }
;
2567 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team,
2568 tid, FALSE0);
2569 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
2570 &team->t.t_threads[0]
2571 ->th.th_bar[bs_forkjoin_barrier]
2572 .bb.th_fixed_icvs);
2573 }
2574 }
2575#endif // KMP_BARRIER_ICV_PULL
2576
2577 if (__kmp_tasking_mode != tskm_immediate_exec) {
2578 __kmp_task_team_sync(this_thr, team);
2579 }
2580
2581#if KMP_AFFINITY_SUPPORTED1
2582 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
2583 if (proc_bind == proc_bind_intel) {
2584 // Call dynamic affinity settings
2585 if (__kmp_affinity.type == affinity_balanced && team->t.t_size_changed) {
2586 __kmp_balanced_affinity(this_thr, team->t.t_nproc);
2587 }
2588 } else if (proc_bind != proc_bind_false) {
2589 if (this_thr->th.th_new_place == this_thr->th.th_current_place) {
2590 KA_TRACE(100, ("__kmp_fork_barrier: T#%d already in correct place %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d already in correct place %d\n"
, __kmp_gtid_from_thread(this_thr), this_thr->th.th_current_place
); }
2591 __kmp_gtid_from_thread(this_thr),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d already in correct place %d\n"
, __kmp_gtid_from_thread(this_thr), this_thr->th.th_current_place
); }
2592 this_thr->th.th_current_place))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d already in correct place %d\n"
, __kmp_gtid_from_thread(this_thr), this_thr->th.th_current_place
); }
;
2593 } else {
2594 __kmp_affinity_set_place(gtid);
2595 }
2596 }
2597#endif // KMP_AFFINITY_SUPPORTED
2598 // Perform the display affinity functionality
2599 if (__kmp_display_affinity) {
2600 if (team->t.t_display_affinity
2601#if KMP_AFFINITY_SUPPORTED1
2602 || (__kmp_affinity.type == affinity_balanced && team->t.t_size_changed)
2603#endif
2604 ) {
2605 // NULL means use the affinity-format-var ICV
2606 __kmp_aux_display_affinity(gtid, NULL__null);
2607 this_thr->th.th_prev_num_threads = team->t.t_nproc;
2608 this_thr->th.th_prev_level = team->t.t_level;
2609 }
2610 }
2611 if (!KMP_MASTER_TID(tid)(0 == (tid)))
2612 KMP_CHECK_UPDATE(this_thr->th.th_def_allocator, team->t.t_def_allocator)if ((this_thr->th.th_def_allocator) != (team->t.t_def_allocator
)) (this_thr->th.th_def_allocator) = (team->t.t_def_allocator
)
;
2613
2614#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
2615 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) {
2616 if (!KMP_MASTER_TID(tid)(0 == (tid))) {
2617 // Get correct barrier object
2618 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
2619 __kmp_itt_barrier_finished(gtid, itt_sync_obj); // Workers call acquired
2620 } // (prepare called inside barrier_release)
2621 }
2622#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
2623 KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) is leaving\n", gtid,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:%d) is leaving\n"
, gtid, team->t.t_id, tid); }
2624 team->t.t_id, tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:%d) is leaving\n"
, gtid, team->t.t_id, tid); }
;
2625}
2626
2627void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc,
2628 kmp_internal_control_t *new_icvs, ident_t *loc) {
2629 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_setup_icv_copy)((void)0);
2630
2631 KMP_DEBUG_ASSERT(team && new_nproc && new_icvs)if (!(team && new_nproc && new_icvs)) { __kmp_debug_assert
("team && new_nproc && new_icvs", "openmp/runtime/src/kmp_barrier.cpp"
, 2631); }
;
2632 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc)if (!((!(__kmp_init_parallel)) || new_icvs->nproc)) { __kmp_debug_assert
("(!(__kmp_init_parallel)) || new_icvs->nproc", "openmp/runtime/src/kmp_barrier.cpp"
, 2632); }
;
2633
2634/* Primary thread's copy of the ICVs was set up on the implicit taskdata in
2635 __kmp_reinitialize_team. __kmp_fork_call() assumes the primary thread's
2636 implicit task has this data before this function is called. */
2637#if KMP_BARRIER_ICV_PULL
2638 /* Copy ICVs to primary thread's thread structure into th_fixed_icvs (which
2639 remains untouched), where all of the worker threads can access them and
2640 make their own copies after the barrier. */
2641 KMP_DEBUG_ASSERT(team->t.t_threads[0])if (!(team->t.t_threads[0])) { __kmp_debug_assert("team->t.t_threads[0]"
, "openmp/runtime/src/kmp_barrier.cpp", 2641); }
; // The threads arrays should be
2642 // allocated at this point
2643 copy_icvs(
2644 &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs,
2645 new_icvs);
2646 KF_TRACE(10, ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n", 0,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n"
, 0, team->t.t_threads[0], team); }
2647 team->t.t_threads[0], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n"
, 0, team->t.t_threads[0], team); }
;
2648#elif KMP_BARRIER_ICV_PUSH1
2649 // The ICVs will be propagated in the fork barrier, so nothing needs to be
2650 // done here.
2651 KF_TRACE(10, ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n", 0,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n"
, 0, team->t.t_threads[0], team); }
2652 team->t.t_threads[0], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n"
, 0, team->t.t_threads[0], team); }
;
2653#else
2654 // Copy the ICVs to each of the non-primary threads. This takes O(nthreads)
2655 // time.
2656 ngo_load(new_icvs)((void)0);
2657 KMP_DEBUG_ASSERT(team->t.t_threads[0])if (!(team->t.t_threads[0])) { __kmp_debug_assert("team->t.t_threads[0]"
, "openmp/runtime/src/kmp_barrier.cpp", 2657); }
; // The threads arrays should be
2658 // allocated at this point
2659 for (int f = 1; f < new_nproc; ++f) { // Skip the primary thread
2660 // TODO: GEH - pass in better source location info since usually NULL here
2661 KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n"
, f, team->t.t_threads[f], team); }
2662 f, team->t.t_threads[f], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n"
, f, team->t.t_threads[f], team); }
;
2663 __kmp_init_implicit_task(loc, team->t.t_threads[f], team, f, FALSE0);
2664 ngo_store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs)copy_icvs((&team->t.t_implicit_task_taskdata[f].td_icvs
), (new_icvs))
;
2665 KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n"
, f, team->t.t_threads[f], team); }
2666 f, team->t.t_threads[f], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n"
, f, team->t.t_threads[f], team); }
;
2667 }
2668 ngo_sync()((void)0);
2669#endif // KMP_BARRIER_ICV_PULL
2670}

/build/source/openmp/runtime/src/kmp_wait_release.h

1/*
2 * kmp_wait_release.h -- Wait/Release implementation
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef KMP_WAIT_RELEASE_H
14#define KMP_WAIT_RELEASE_H
15
16#include "kmp.h"
17#include "kmp_itt.h"
18#include "kmp_stats.h"
19#if OMPT_SUPPORT1
20#include "ompt-specific.h"
21#endif
22
23/*!
24@defgroup WAIT_RELEASE Wait/Release operations
25
26The definitions and functions here implement the lowest level thread
27synchronizations of suspending a thread and awaking it. They are used to build
28higher level operations such as barriers and fork/join.
29*/
30
31/*!
32@ingroup WAIT_RELEASE
33@{
34*/
35
36struct flag_properties {
37 unsigned int type : 16;
38 unsigned int reserved : 16;
39};
40
41template <enum flag_type FlagType> struct flag_traits {};
42
43template <> struct flag_traits<flag32> {
44 typedef kmp_uint32 flag_t;
45 static const flag_type t = flag32;
46 static inline flag_t tcr(flag_t f) { return TCR_4(f)(f); }
47 static inline flag_t test_then_add4(volatile flag_t *f) {
48 return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f))__sync_fetch_and_add((volatile kmp_int32 *)(reinterpret_cast<
volatile kmp_int32 *>(f)), 4)
;
49 }
50 static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
51 return KMP_TEST_THEN_OR32(f, v)__sync_fetch_and_or((volatile kmp_uint32 *)(f), (kmp_uint32)(
v))
;
52 }
53 static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
54 return KMP_TEST_THEN_AND32(f, v)__sync_fetch_and_and((volatile kmp_uint32 *)(f), (kmp_uint32)
(v))
;
55 }
56};
57
58template <> struct flag_traits<atomic_flag64> {
59 typedef kmp_uint64 flag_t;
60 static const flag_type t = atomic_flag64;
61 static inline flag_t tcr(flag_t f) { return TCR_8(f)(f); }
62 static inline flag_t test_then_add4(volatile flag_t *f) {
63 return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f))__sync_fetch_and_add((volatile kmp_int64 *)(reinterpret_cast<
volatile kmp_int64 *>(f)), 4LL)
;
64 }
65 static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
66 return KMP_TEST_THEN_OR64(f, v)__sync_fetch_and_or((volatile kmp_uint64 *)(f), (kmp_uint64)(
v))
;
67 }
68 static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
69 return KMP_TEST_THEN_AND64(f, v)__sync_fetch_and_and((volatile kmp_uint64 *)(f), (kmp_uint64)
(v))
;
70 }
71};
72
73template <> struct flag_traits<flag64> {
74 typedef kmp_uint64 flag_t;
75 static const flag_type t = flag64;
76 static inline flag_t tcr(flag_t f) { return TCR_8(f)(f); }
77 static inline flag_t test_then_add4(volatile flag_t *f) {
78 return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f))__sync_fetch_and_add((volatile kmp_int64 *)(reinterpret_cast<
volatile kmp_int64 *>(f)), 4LL)
;
79 }
80 static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
81 return KMP_TEST_THEN_OR64(f, v)__sync_fetch_and_or((volatile kmp_uint64 *)(f), (kmp_uint64)(
v))
;
82 }
83 static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
84 return KMP_TEST_THEN_AND64(f, v)__sync_fetch_and_and((volatile kmp_uint64 *)(f), (kmp_uint64)
(v))
;
85 }
86};
87
88template <> struct flag_traits<flag_oncore> {
89 typedef kmp_uint64 flag_t;
90 static const flag_type t = flag_oncore;
91 static inline flag_t tcr(flag_t f) { return TCR_8(f)(f); }
92 static inline flag_t test_then_add4(volatile flag_t *f) {
93 return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f))__sync_fetch_and_add((volatile kmp_int64 *)(reinterpret_cast<
volatile kmp_int64 *>(f)), 4LL)
;
94 }
95 static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
96 return KMP_TEST_THEN_OR64(f, v)__sync_fetch_and_or((volatile kmp_uint64 *)(f), (kmp_uint64)(
v))
;
97 }
98 static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
99 return KMP_TEST_THEN_AND64(f, v)__sync_fetch_and_and((volatile kmp_uint64 *)(f), (kmp_uint64)
(v))
;
100 }
101};
102
103/*! Base class for all flags */
104template <flag_type FlagType> class kmp_flag {
105protected:
106 flag_properties t; /**< "Type" of the flag in loc */
107 kmp_info_t *waiting_threads[1]; /**< Threads sleeping on this thread. */
108 kmp_uint32 num_waiting_threads; /**< Num threads sleeping on this thread. */
109 std::atomic<bool> *sleepLoc;
110
111public:
112 typedef flag_traits<FlagType> traits_type;
113 kmp_flag() : t({FlagType, 0U}), num_waiting_threads(0), sleepLoc(nullptr) {}
114 kmp_flag(int nwaiters)
115 : t({FlagType, 0U}), num_waiting_threads(nwaiters), sleepLoc(nullptr) {}
116 kmp_flag(std::atomic<bool> *sloc)
117 : t({FlagType, 0U}), num_waiting_threads(0), sleepLoc(sloc) {}
118 /*! @result the flag_type */
119 flag_type get_type() { return (flag_type)(t.type); }
120
121 /*! param i in index into waiting_threads
122 * @result the thread that is waiting at index i */
123 kmp_info_t *get_waiter(kmp_uint32 i) {
124 KMP_DEBUG_ASSERT(i < num_waiting_threads)if (!(i < num_waiting_threads)) { __kmp_debug_assert("i < num_waiting_threads"
, "openmp/runtime/src/kmp_wait_release.h", 124); }
;
125 return waiting_threads[i];
126 }
127 /*! @result num_waiting_threads */
128 kmp_uint32 get_num_waiters() { return num_waiting_threads; }
129 /*! @param thr in the thread which is now waiting
130 * Insert a waiting thread at index 0. */
131 void set_waiter(kmp_info_t *thr) {
132 waiting_threads[0] = thr;
133 num_waiting_threads = 1;
134 }
135 enum barrier_type get_bt() { return bs_last_barrier; }
136};
137
138/*! Base class for wait/release volatile flag */
139template <typename PtrType, flag_type FlagType, bool Sleepable>
140class kmp_flag_native : public kmp_flag<FlagType> {
141protected:
142 volatile PtrType *loc;
143 PtrType checker; /**< When flag==checker, it has been released. */
144 typedef flag_traits<FlagType> traits_type;
145
146public:
147 typedef PtrType flag_t;
148 kmp_flag_native(volatile PtrType *p) : kmp_flag<FlagType>(), loc(p) {}
149 kmp_flag_native(volatile PtrType *p, kmp_info_t *thr)
150 : kmp_flag<FlagType>(1), loc(p) {
151 this->waiting_threads[0] = thr;
152 }
153 kmp_flag_native(volatile PtrType *p, PtrType c)
154 : kmp_flag<FlagType>(), loc(p), checker(c) {}
155 kmp_flag_native(volatile PtrType *p, PtrType c, std::atomic<bool> *sloc)
156 : kmp_flag<FlagType>(sloc), loc(p), checker(c) {}
157 virtual ~kmp_flag_native() {}
158 void *operator new(size_t size) { return __kmp_allocate(size)___kmp_allocate((size), "openmp/runtime/src/kmp_wait_release.h"
, 158)
; }
159 void operator delete(void *p) { __kmp_free(p)___kmp_free((p), "openmp/runtime/src/kmp_wait_release.h", 159
)
; }
160 volatile PtrType *get() { return loc; }
161 void *get_void_p() { return RCAST(void *, CCAST(PtrType *, loc))reinterpret_cast<void *>(const_cast<PtrType *>(loc
))
; }
162 void set(volatile PtrType *new_loc) { loc = new_loc; }
163 PtrType load() { return *loc; }
164 void store(PtrType val) { *loc = val; }
165 /*! @result true if the flag object has been released. */
166 virtual bool done_check() {
167 if (Sleepable && !(this->sleepLoc))
168 return (traits_type::tcr(*(this->get())) & ~KMP_BARRIER_SLEEP_STATE(1 << 0)) ==
169 checker;
170 else
171 return traits_type::tcr(*(this->get())) == checker;
172 }
173 /*! @param old_loc in old value of flag
174 * @result true if the flag's old value indicates it was released. */
175 virtual bool done_check_val(PtrType old_loc) { return old_loc == checker; }
176 /*! @result true if the flag object is not yet released.
177 * Used in __kmp_wait_template like:
178 * @code
179 * while (flag.notdone_check()) { pause(); }
180 * @endcode */
181 virtual bool notdone_check() {
182 return traits_type::tcr(*(this->get())) != checker;
183 }
184 /*! @result Actual flag value before release was applied.
185 * Trigger all waiting threads to run by modifying flag to release state. */
186 void internal_release() {
187 (void)traits_type::test_then_add4((volatile PtrType *)this->get());
188 }
189 /*! @result Actual flag value before sleep bit(s) set.
190 * Notes that there is at least one thread sleeping on the flag by setting
191 * sleep bit(s). */
192 PtrType set_sleeping() {
193 if (this->sleepLoc) {
194 this->sleepLoc->store(true);
195 return *(this->get());
196 }
197 return traits_type::test_then_or((volatile PtrType *)this->get(),
198 KMP_BARRIER_SLEEP_STATE(1 << 0));
199 }
200 /*! @result Actual flag value before sleep bit(s) cleared.
201 * Notes that there are no longer threads sleeping on the flag by clearing
202 * sleep bit(s). */
203 void unset_sleeping() {
204 if (this->sleepLoc) {
205 this->sleepLoc->store(false);
206 return;
207 }
208 traits_type::test_then_and((volatile PtrType *)this->get(),
209 ~KMP_BARRIER_SLEEP_STATE(1 << 0));
210 }
211 /*! @param old_loc in old value of flag
212 * Test if there are threads sleeping on the flag's old value in old_loc. */
213 bool is_sleeping_val(PtrType old_loc) {
214 if (this->sleepLoc)
215 return this->sleepLoc->load();
216 return old_loc & KMP_BARRIER_SLEEP_STATE(1 << 0);
217 }
218 /*! Test whether there are threads sleeping on the flag. */
219 bool is_sleeping() {
220 if (this->sleepLoc)
221 return this->sleepLoc->load();
222 return is_sleeping_val(*(this->get()));
223 }
224 bool is_any_sleeping() {
225 if (this->sleepLoc)
226 return this->sleepLoc->load();
227 return is_sleeping_val(*(this->get()));
228 }
229 kmp_uint8 *get_stolen() { return NULL__null; }
230};
231
232/*! Base class for wait/release atomic flag */
233template <typename PtrType, flag_type FlagType, bool Sleepable>
234class kmp_flag_atomic : public kmp_flag<FlagType> {
235protected:
236 std::atomic<PtrType> *loc; /**< Pointer to flag location to wait on */
237 PtrType checker; /**< Flag == checker means it has been released. */
238public:
239 typedef flag_traits<FlagType> traits_type;
240 typedef PtrType flag_t;
241 kmp_flag_atomic(std::atomic<PtrType> *p) : kmp_flag<FlagType>(), loc(p) {}
242 kmp_flag_atomic(std::atomic<PtrType> *p, kmp_info_t *thr)
243 : kmp_flag<FlagType>(1), loc(p) {
244 this->waiting_threads[0] = thr;
245 }
246 kmp_flag_atomic(std::atomic<PtrType> *p, PtrType c)
247 : kmp_flag<FlagType>(), loc(p), checker(c) {}
248 kmp_flag_atomic(std::atomic<PtrType> *p, PtrType c, std::atomic<bool> *sloc)
249 : kmp_flag<FlagType>(sloc), loc(p), checker(c) {}
250 /*! @result the pointer to the actual flag */
251 std::atomic<PtrType> *get() { return loc; }
252 /*! @result void* pointer to the actual flag */
253 void *get_void_p() { return RCAST(void *, loc)reinterpret_cast<void *>(loc); }
254 /*! @param new_loc in set loc to point at new_loc */
255 void set(std::atomic<PtrType> *new_loc) { loc = new_loc; }
256 /*! @result flag value */
257 PtrType load() { return loc->load(std::memory_order_acquire); }
258 /*! @param val the new flag value to be stored */
259 void store(PtrType val) { loc->store(val, std::memory_order_release); }
260 /*! @result true if the flag object has been released. */
261 bool done_check() {
262 if (Sleepable && !(this->sleepLoc))
263 return (this->load() & ~KMP_BARRIER_SLEEP_STATE(1 << 0)) == checker;
264 else
265 return this->load() == checker;
266 }
267 /*! @param old_loc in old value of flag
268 * @result true if the flag's old value indicates it was released. */
269 bool done_check_val(PtrType old_loc) { return old_loc == checker; }
270 /*! @result true if the flag object is not yet released.
271 * Used in __kmp_wait_template like:
272 * @code
273 * while (flag.notdone_check()) { pause(); }
274 * @endcode */
275 bool notdone_check() { return this->load() != checker; }
276 /*! @result Actual flag value before release was applied.
277 * Trigger all waiting threads to run by modifying flag to release state. */
278 void internal_release() { KMP_ATOMIC_ADD(this->get(), 4)(this->get())->fetch_add(4, std::memory_order_acq_rel); }
279 /*! @result Actual flag value before sleep bit(s) set.
280 * Notes that there is at least one thread sleeping on the flag by setting
281 * sleep bit(s). */
282 PtrType set_sleeping() {
283 if (this->sleepLoc) {
284 this->sleepLoc->store(true);
285 return *(this->get());
286 }
287 return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE)(this->get())->fetch_or((1 << 0), std::memory_order_acq_rel
)
;
288 }
289 /*! @result Actual flag value before sleep bit(s) cleared.
290 * Notes that there are no longer threads sleeping on the flag by clearing
291 * sleep bit(s). */
292 void unset_sleeping() {
293 if (this->sleepLoc) {
294 this->sleepLoc->store(false);
295 return;
296 }
297 KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE)(this->get())->fetch_and(~(1 << 0), std::memory_order_acq_rel
)
;
298 }
299 /*! @param old_loc in old value of flag
300 * Test whether there are threads sleeping on flag's old value in old_loc. */
301 bool is_sleeping_val(PtrType old_loc) {
302 if (this->sleepLoc)
303 return this->sleepLoc->load();
304 return old_loc & KMP_BARRIER_SLEEP_STATE(1 << 0);
305 }
306 /*! Test whether there are threads sleeping on the flag. */
307 bool is_sleeping() {
308 if (this->sleepLoc)
309 return this->sleepLoc->load();
310 return is_sleeping_val(this->load());
311 }
312 bool is_any_sleeping() {
313 if (this->sleepLoc)
314 return this->sleepLoc->load();
315 return is_sleeping_val(this->load());
316 }
317 kmp_uint8 *get_stolen() { return NULL__null; }
318};
319
320#if OMPT_SUPPORT1
321OMPT_NOINLINE__attribute__((noinline))
322static void __ompt_implicit_task_end(kmp_info_t *this_thr,
323 ompt_state_t ompt_state,
324 ompt_data_t *tId) {
325 int ds_tid = this_thr->th.th_info.ds.ds_tid;
326 if (ompt_state == ompt_state_wait_barrier_implicit) {
327 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
328#if OMPT_OPTIONAL1
329 void *codeptr = NULL__null;
330 if (ompt_enabled.ompt_callback_sync_region_wait) {
331 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback(
332 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL__null, tId,
333 codeptr);
334 }
335 if (ompt_enabled.ompt_callback_sync_region) {
336 ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback(
337 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL__null, tId,
338 codeptr);
339 }
340#endif
341 if (!KMP_MASTER_TID(ds_tid)(0 == (ds_tid))) {
342 if (ompt_enabled.ompt_callback_implicit_task) {
343 int flags = this_thr->th.ompt_thread_info.parallel_flags;
344 flags = (flags & ompt_parallel_league) ? ompt_task_initial
345 : ompt_task_implicit;
346 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
347 ompt_scope_end, NULL__null, tId, 0, ds_tid, flags);
348 }
349 // return to idle state
350 this_thr->th.ompt_thread_info.state = ompt_state_idle;
351 } else {
352 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
353 }
354 }
355}
356#endif
357
358/* Spin wait loop that first does pause/yield, then sleep. A thread that calls
359 __kmp_wait_* must make certain that another thread calls __kmp_release
360 to wake it back up to prevent deadlocks!
361
362 NOTE: We may not belong to a team at this point. */
363template <class C, bool final_spin, bool Cancellable = false,
364 bool Sleepable = true>
365static inline bool
366__kmp_wait_template(kmp_info_t *this_thr,
367 C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
368#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
369 volatile void *spin = flag->get();
370#endif
371 kmp_uint32 spins;
372 int th_gtid;
373 int tasks_completed = FALSE0;
374#if !KMP_USE_MONITOR
375 kmp_uint64 poll_count;
376 kmp_uint64 hibernate_goal;
377#else
378 kmp_uint32 hibernate;
379#endif
380 kmp_uint64 time;
381
382 KMP_FSYNC_SPIN_INIT(spin, NULL)int sync_iters = 0; if (__kmp_itt_fsync_prepare_ptr__3_0) { if
(spin == __null) { spin = __null; } } __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0x4376) : "%ebx")
;
383 if (flag->done_check()) {
384 KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin))do { __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0x4377) : "%ebx"); if (sync_iters >= __kmp_itt_prepare_delay
) { (!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)((void *)const_cast<void *>(spin))); } } while
(0)
;
385 return false;
386 }
387 th_gtid = this_thr->th.th_info.ds.ds_gtid;
388 if (Cancellable) {
389 kmp_team_t *team = this_thr->th.th_team;
390 if (team && team->t.t_cancel_request == cancel_parallel)
391 return true;
392 }
393#if KMP_OS_UNIX1
394 if (final_spin)
395 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true)(&this_thr->th.th_blocking)->store(true, std::memory_order_release
)
;
396#endif
397 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n"
, th_gtid, flag); }
398 ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n"
, th_gtid, flag); }
;
399#if KMP_STATS_ENABLED0
400 stats_state_e thread_state = KMP_GET_THREAD_STATE()((void)0);
401#endif
402
403/* OMPT Behavior:
404THIS function is called from
405 __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions)
406 these have join / fork behavior
407
408 In these cases, we don't change the state or trigger events in THIS
409function.
410 Events are triggered in the calling code (__kmp_barrier):
411
412 state := ompt_state_overhead
413 barrier-begin
414 barrier-wait-begin
415 state := ompt_state_wait_barrier
416 call join-barrier-implementation (finally arrive here)
417 {}
418 call fork-barrier-implementation (finally arrive here)
419 {}
420 state := ompt_state_overhead
421 barrier-wait-end
422 barrier-end
423 state := ompt_state_work_parallel
424
425
426 __kmp_fork_barrier (after thread creation, before executing implicit task)
427 call fork-barrier-implementation (finally arrive here)
428 {} // worker arrive here with state = ompt_state_idle
429
430
431 __kmp_join_barrier (implicit barrier at end of parallel region)
432 state := ompt_state_barrier_implicit
433 barrier-begin
434 barrier-wait-begin
435 call join-barrier-implementation (finally arrive here
436final_spin=FALSE)
437 {
438 }
439 __kmp_fork_barrier (implicit barrier at end of parallel region)
440 call fork-barrier-implementation (finally arrive here final_spin=TRUE)
441
442 Worker after task-team is finished:
443 barrier-wait-end
444 barrier-end
445 implicit-task-end
446 idle-begin
447 state := ompt_state_idle
448
449 Before leaving, if state = ompt_state_idle
450 idle-end
451 state := ompt_state_overhead
452*/
453#if OMPT_SUPPORT1
454 ompt_state_t ompt_entry_state;
455 ompt_data_t *tId;
456 if (ompt_enabled.enabled) {
457 ompt_entry_state = this_thr->th.ompt_thread_info.state;
458 if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
459 KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)(0 == (this_thr->th.th_info.ds.ds_tid))) {
460 ompt_lw_taskteam_t *team = NULL__null;
461 if (this_thr->th.th_team)
462 team = this_thr->th.th_team->t.ompt_serialized_team_info;
463 if (team) {
464 tId = &(team->ompt_task_info.task_data);
465 } else {
466 tId = OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data
))
;
467 }
468 } else {
469 tId = &(this_thr->th.ompt_thread_info.task_data);
470 }
471 if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
472 this_thr->th.th_task_team == NULL__null)) {
473 // implicit task is done. Either no taskqueue, or task-team finished
474 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
475 }
476 }
477#endif
478
479 KMP_INIT_YIELD(spins){ (spins) = __kmp_yield_init; }; // Setup for waiting
480 KMP_INIT_BACKOFF(time){ (time) = __kmp_pause_init; };
481
482 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647) ||
483 __kmp_pause_status == kmp_soft_paused) {
484#if KMP_USE_MONITOR
485// The worker threads cannot rely on the team struct existing at this point.
486// Use the bt values cached in the thread struct instead.
487#ifdef KMP_ADJUST_BLOCKTIME1
488 if (__kmp_pause_status == kmp_soft_paused ||
489 (__kmp_zero_bt && !this_thr->th.th_team_bt_set))
490 // Force immediate suspend if not set by user and more threads than
491 // available procs
492 hibernate = 0;
493 else
494 hibernate = this_thr->th.th_team_bt_intervals;
495#else
496 hibernate = this_thr->th.th_team_bt_intervals;
497#endif /* KMP_ADJUST_BLOCKTIME */
498
499 /* If the blocktime is nonzero, we want to make sure that we spin wait for
500 the entirety of the specified #intervals, plus up to one interval more.
501 This increment make certain that this thread doesn't go to sleep too
502 soon. */
503 if (hibernate != 0)
504 hibernate++;
505
506 // Add in the current time value.
507 hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value)(__kmp_global.g.g_time.dt.t_value);
508 KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",if (kmp_f_debug >= 20) { __kmp_debug_printf ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n"
, th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate, hibernate
- __kmp_global.g.g_time.dt.t_value); }
509 th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,if (kmp_f_debug >= 20) { __kmp_debug_printf ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n"
, th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate, hibernate
- __kmp_global.g.g_time.dt.t_value); }
510 hibernate - __kmp_global.g.g_time.dt.t_value))if (kmp_f_debug >= 20) { __kmp_debug_printf ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n"
, th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate, hibernate
- __kmp_global.g.g_time.dt.t_value); }
;
511#else
512 if (__kmp_pause_status == kmp_soft_paused) {
513 // Force immediate suspend
514 hibernate_goal = KMP_NOW()__kmp_hardware_timestamp();
515 } else
516 hibernate_goal = KMP_NOW()__kmp_hardware_timestamp() + this_thr->th.th_team_bt_intervals;
517 poll_count = 0;
518 (void)poll_count;
519#endif // KMP_USE_MONITOR
520 }
521
522 KMP_MB();
523
524 // Main wait spin loop
525 while (flag->notdone_check()) {
526 kmp_task_team_t *task_team = NULL__null;
527 if (__kmp_tasking_mode != tskm_immediate_exec) {
528 task_team = this_thr->th.th_task_team;
529 /* If the thread's task team pointer is NULL, it means one of 3 things:
530 1) A newly-created thread is first being released by
531 __kmp_fork_barrier(), and its task team has not been set up yet.
532 2) All tasks have been executed to completion.
533 3) Tasking is off for this region. This could be because we are in a
534 serialized region (perhaps the outer one), or else tasking was manually
535 disabled (KMP_TASKING=0). */
536 if (task_team != NULL__null) {
537 if (TCR_SYNC_4(task_team->tt.tt_active)(task_team->tt.tt_active)) {
538 if (KMP_TASKING_ENABLED(task_team)((!0) == ((task_team)->tt.tt_found_tasks))) {
539 flag->execute_tasks(
540 this_thr, th_gtid, final_spin,
541 &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj, 0);
542 } else
543 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP1;
544 } else {
545 KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid))if (!(!(0 == (this_thr->th.th_info.ds.ds_tid)))) { __kmp_debug_assert
("!(0 == (this_thr->th.th_info.ds.ds_tid))", "openmp/runtime/src/kmp_wait_release.h"
, 545); }
;
546#if OMPT_SUPPORT1
547 // task-team is done now, other cases should be catched above
548 if (final_spin && ompt_enabled.enabled)
549 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
550#endif
551 this_thr->th.th_task_team = NULL__null;
552 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP1;
553 }
554 } else {
555 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP1;
556 } // if
557 } // if
558
559 KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin))do { if (__kmp_itt_fsync_prepare_ptr__3_0 && sync_iters
< __kmp_itt_prepare_delay) { ++sync_iters; if (sync_iters
>= __kmp_itt_prepare_delay) { (!__kmp_itt_fsync_prepare_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)((void
*)const_cast<void *>(spin))); } } } while (0)
;
560 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) {
561 if (__kmp_global.g.g_abort)
562 __kmp_abort_thread();
563 break;
564 }
565
566 // If we are oversubscribed, or have waited a bit (and
567 // KMP_LIBRARY=throughput), then yield
568 KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time){ if (__kmp_tpause_enabled) { if (((__kmp_nth) > (__kmp_avail_proc
? __kmp_avail_proc : __kmp_xproc))) { __kmp_tpause(0, (time)
); } else { __kmp_tpause(__kmp_tpause_hint, (time)); } (time)
= (time << 1 | 1) & ((kmp_uint64)0xFFFF); } else {
__kmp_x86_pause(); if ((((__kmp_use_yield == 1 || __kmp_use_yield
== 2) && (((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
: __kmp_xproc)))))) { __kmp_yield(); } else if (__kmp_use_yield
== 1) { (spins) -= 2; if (!(spins)) { __kmp_yield(); (spins)
= __kmp_yield_next; } } } }
;
569
570#if KMP_STATS_ENABLED0
571 // Check if thread has been signalled to idle state
572 // This indicates that the logical "join-barrier" has finished
573 if (this_thr->th.th_stats->isIdle() &&
574 KMP_GET_THREAD_STATE()((void)0) == FORK_JOIN_BARRIER) {
575 KMP_SET_THREAD_STATE(IDLE)((void)0);
576 KMP_PUSH_PARTITIONED_TIMER(OMP_idle)((void)0);
577 }
578#endif
579 // Check if the barrier surrounding this wait loop has been cancelled
580 if (Cancellable) {
581 kmp_team_t *team = this_thr->th.th_team;
582 if (team && team->t.t_cancel_request == cancel_parallel)
583 break;
584 }
585
586 // For hidden helper thread, if task_team is nullptr, it means the main
587 // thread has not released the barrier. We cannot wait here because once the
588 // main thread releases all children barriers, all hidden helper threads are
589 // still sleeping. This leads to a problem that following configuration,
590 // such as task team sync, will not be performed such that this thread does
591 // not have task team. Usually it is not bad. However, a corner case is,
592 // when the first task encountered is an untied task, the check in
593 // __kmp_task_alloc will crash because it uses the task team pointer without
594 // checking whether it is nullptr. It is probably under some kind of
595 // assumption.
596 if (task_team && KMP_HIDDEN_HELPER_WORKER_THREAD(th_gtid)((th_gtid) > 1 && (th_gtid) <= __kmp_hidden_helper_threads_num
)
&&
597 !TCR_4(__kmp_hidden_helper_team_done)(__kmp_hidden_helper_team_done)) {
598 // If there is still hidden helper tasks to be executed, the hidden helper
599 // thread will not enter a waiting status.
600 if (KMP_ATOMIC_LD_ACQ(&__kmp_unexecuted_hidden_helper_tasks)(&__kmp_unexecuted_hidden_helper_tasks)->load(std::memory_order_acquire
)
== 0) {
601 __kmp_hidden_helper_worker_thread_wait();
602 }
603 continue;
604 }
605
606 // Don't suspend if KMP_BLOCKTIME is set to "infinite"
607 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME(2147483647) &&
608 __kmp_pause_status != kmp_soft_paused)
609 continue;
610
611 // Don't suspend if there is a likelihood of new tasks being spawned.
612 if (task_team != NULL__null && TCR_4(task_team->tt.tt_found_tasks)(task_team->tt.tt_found_tasks) &&
613 !__kmp_wpolicy_passive)
614 continue;
615
616#if KMP_USE_MONITOR
617 // If we have waited a bit more, fall asleep
618 if (TCR_4(__kmp_global.g.g_time.dt.t_value)(__kmp_global.g.g_time.dt.t_value) < hibernate)
619 continue;
620#else
621 if (KMP_BLOCKING(hibernate_goal, poll_count++)((hibernate_goal) > __kmp_hardware_timestamp()))
622 continue;
623#endif
624 // Don't suspend if wait loop designated non-sleepable
625 // in template parameters
626 if (!Sleepable)
627 continue;
628
629#if KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0) || KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0)
630 if (__kmp_mwait_enabled || __kmp_umwait_enabled) {
631 KF_TRACE(50, ("__kmp_wait_sleep: T#%d using monitor/mwait\n", th_gtid))if (kmp_f_debug >= 50) { __kmp_debug_printf ("__kmp_wait_sleep: T#%d using monitor/mwait\n"
, th_gtid); }
;
632 flag->mwait(th_gtid);
633 } else {
634#endif
635 KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid))if (kmp_f_debug >= 50) { __kmp_debug_printf ("__kmp_wait_sleep: T#%d suspend time reached\n"
, th_gtid); }
;
636#if KMP_OS_UNIX1
637 if (final_spin)
638 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false)(&this_thr->th.th_blocking)->store(false, std::memory_order_release
)
;
639#endif
640 flag->suspend(th_gtid);
641#if KMP_OS_UNIX1
642 if (final_spin)
643 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true)(&this_thr->th.th_blocking)->store(true, std::memory_order_release
)
;
644#endif
645#if KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0) || KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0)
646 }
647#endif
648
649 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) {
650 if (__kmp_global.g.g_abort)
651 __kmp_abort_thread();
652 break;
653 } else if (__kmp_tasking_mode != tskm_immediate_exec &&
654 this_thr->th.th_reap_state == KMP_SAFE_TO_REAP1) {
655 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP0;
656 }
657 // TODO: If thread is done with work and times out, disband/free
658 }
659
660#if OMPT_SUPPORT1
661 ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
662 if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) {
663#if OMPT_OPTIONAL1
664 if (final_spin) {
665 __ompt_implicit_task_end(this_thr, ompt_exit_state, tId);
666 ompt_exit_state = this_thr->th.ompt_thread_info.state;
667 }
668#endif
669 if (ompt_exit_state == ompt_state_idle) {
670 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
671 }
672 }
673#endif
674#if KMP_STATS_ENABLED0
675 // If we were put into idle state, pop that off the state stack
676 if (KMP_GET_THREAD_STATE()((void)0) == IDLE) {
677 KMP_POP_PARTITIONED_TIMER()((void)0);
678 KMP_SET_THREAD_STATE(thread_state)((void)0);
679 this_thr->th.th_stats->resetIdleFlag();
680 }
681#endif
682
683#if KMP_OS_UNIX1
684 if (final_spin)
685 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false)(&this_thr->th.th_blocking)->store(false, std::memory_order_release
)
;
686#endif
687 KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin))do { __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0x4377) : "%ebx"); if (sync_iters >= __kmp_itt_prepare_delay
) { (!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)((void *)const_cast<void *>(spin))); } } while
(0)
;
688 if (Cancellable) {
689 kmp_team_t *team = this_thr->th.th_team;
690 if (team && team->t.t_cancel_request == cancel_parallel) {
691 if (tasks_completed) {
692 // undo the previous decrement of unfinished_threads so that the
693 // thread can decrement at the join barrier with no problem
694 kmp_task_team_t *task_team = this_thr->th.th_task_team;
695 std::atomic<kmp_int32> *unfinished_threads =
696 &(task_team->tt.tt_unfinished_threads);
697 KMP_ATOMIC_INC(unfinished_threads)(unfinished_threads)->fetch_add(1, std::memory_order_acq_rel
)
;
698 }
699 return true;
700 }
701 }
702 return false;
703}
704
705#if KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0) || KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0)
706// Set up a monitor on the flag variable causing the calling thread to wait in
707// a less active state until the flag variable is modified.
708template <class C>
709static inline void __kmp_mwait_template(int th_gtid, C *flag) {
710 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_mwait)((void)0);
711 kmp_info_t *th = __kmp_threads[th_gtid];
712
713 KF_TRACE(30, ("__kmp_mwait_template: T#%d enter for flag = %p\n", th_gtid,if (kmp_f_debug >= 30) { __kmp_debug_printf ("__kmp_mwait_template: T#%d enter for flag = %p\n"
, th_gtid, flag->get()); }
714 flag->get()))if (kmp_f_debug >= 30) { __kmp_debug_printf ("__kmp_mwait_template: T#%d enter for flag = %p\n"
, th_gtid, flag->get()); }
;
715
716 // User-level mwait is available
717 KMP_DEBUG_ASSERT(__kmp_mwait_enabled || __kmp_umwait_enabled)if (!(__kmp_mwait_enabled || __kmp_umwait_enabled)) { __kmp_debug_assert
("__kmp_mwait_enabled || __kmp_umwait_enabled", "openmp/runtime/src/kmp_wait_release.h"
, 717); }
;
718
719 __kmp_suspend_initialize_thread(th);
720 __kmp_lock_suspend_mx(th);
721
722 volatile void *spin = flag->get();
723 void *cacheline = (void *)(kmp_uintptr_t(spin) & ~(CACHE_LINE64 - 1));
724
725 if (!flag->done_check()) {
726 // Mark thread as no longer active
727 th->th.th_active = FALSE0;
728 if (th->th.th_active_in_pool) {
729 th->th.th_active_in_pool = FALSE0;
730 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth)(&__kmp_thread_pool_active_nth)->fetch_sub(1, std::memory_order_acq_rel
)
;
731 KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0)if (!((__kmp_thread_pool_active_nth) >= 0)) { __kmp_debug_assert
("(__kmp_thread_pool_active_nth) >= 0", "openmp/runtime/src/kmp_wait_release.h"
, 731); }
;
732 }
733 flag->set_sleeping();
734 KF_TRACE(50, ("__kmp_mwait_template: T#%d calling monitor\n", th_gtid))if (kmp_f_debug >= 50) { __kmp_debug_printf ("__kmp_mwait_template: T#%d calling monitor\n"
, th_gtid); }
;
735#if KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0)
736 if (__kmp_umwait_enabled) {
737 __kmp_umonitor(cacheline);
738 }
739#elif KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0)
740 if (__kmp_mwait_enabled) {
741 __kmp_mm_monitor(cacheline, 0, 0);
742 }
743#endif
744 // To avoid a race, check flag between 'monitor' and 'mwait'. A write to
745 // the address could happen after the last time we checked and before
746 // monitoring started, in which case monitor can't detect the change.
747 if (flag->done_check())
748 flag->unset_sleeping();
749 else {
750 // if flag changes here, wake-up happens immediately
751 TCW_PTR(th->th.th_sleep_loc, (void *)flag)((th->th.th_sleep_loc)) = (((void *)flag));
752 th->th.th_sleep_loc_type = flag->get_type();
753 __kmp_unlock_suspend_mx(th);
754 KF_TRACE(50, ("__kmp_mwait_template: T#%d calling mwait\n", th_gtid))if (kmp_f_debug >= 50) { __kmp_debug_printf ("__kmp_mwait_template: T#%d calling mwait\n"
, th_gtid); }
;
755#if KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0)
756 if (__kmp_umwait_enabled) {
757 __kmp_umwait(1, 100); // to do: enable ctrl via hints, backoff counter
758 }
759#elif KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0)
760 if (__kmp_mwait_enabled) {
761 __kmp_mm_mwait(0, __kmp_mwait_hints);
762 }
763#endif
764 KF_TRACE(50, ("__kmp_mwait_template: T#%d mwait done\n", th_gtid))if (kmp_f_debug >= 50) { __kmp_debug_printf ("__kmp_mwait_template: T#%d mwait done\n"
, th_gtid); }
;
765 __kmp_lock_suspend_mx(th);
766 // Clean up sleep info; doesn't matter how/why this thread stopped waiting
767 if (flag->is_sleeping())
768 flag->unset_sleeping();
769 TCW_PTR(th->th.th_sleep_loc, NULL)((th->th.th_sleep_loc)) = ((__null));
770 th->th.th_sleep_loc_type = flag_unset;
771 }
772 // Mark thread as active again
773 th->th.th_active = TRUE(!0);
774 if (TCR_4(th->th.th_in_pool)(th->th.th_in_pool)) {
775 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth)(&__kmp_thread_pool_active_nth)->fetch_add(1, std::memory_order_acq_rel
)
;
776 th->th.th_active_in_pool = TRUE(!0);
777 }
778 } // Drop out to main wait loop to check flag, handle tasks, etc.
779 __kmp_unlock_suspend_mx(th);
780 KF_TRACE(30, ("__kmp_mwait_template: T#%d exit\n", th_gtid))if (kmp_f_debug >= 30) { __kmp_debug_printf ("__kmp_mwait_template: T#%d exit\n"
, th_gtid); }
;
781}
782#endif // KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
783
784/* Release any threads specified as waiting on the flag by releasing the flag
785 and resume the waiting thread if indicated by the sleep bit(s). A thread that
786 calls __kmp_wait_template must call this function to wake up the potentially
787 sleeping thread and prevent deadlocks! */
788template <class C> static inline void __kmp_release_template(C *flag) {
789#ifdef KMP_DEBUG1
790 int gtid = TCR_4(__kmp_init_gtid)(__kmp_init_gtid) ? __kmp_get_gtid()__kmp_get_global_thread_id() : -1;
791#endif
792 KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()))if (kmp_f_debug >= 20) { __kmp_debug_printf ("__kmp_release: T#%d releasing flag(%x)\n"
, gtid, flag->get()); }
;
793 KMP_DEBUG_ASSERT(flag->get())if (!(flag->get())) { __kmp_debug_assert("flag->get()",
"openmp/runtime/src/kmp_wait_release.h", 793); }
;
794 KMP_FSYNC_RELEASING(flag->get_void_p())(!__kmp_itt_fsync_releasing_ptr__3_0) ? (void)0 : __kmp_itt_fsync_releasing_ptr__3_0
((void *)(flag->get_void_p()))
;
795
796 flag->internal_release();
797
798 KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),if (kmp_f_debug >= 100) { __kmp_debug_printf ("__kmp_release: T#%d set new spin=%d\n"
, gtid, flag->get(), flag->load()); }
799 flag->load()))if (kmp_f_debug >= 100) { __kmp_debug_printf ("__kmp_release: T#%d set new spin=%d\n"
, gtid, flag->get(), flag->load()); }
;
800
801 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) {
802 // Only need to check sleep stuff if infinite block time not set.
803 // Are *any* threads waiting on flag sleeping?
804 if (flag->is_any_sleeping()) {
805 for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
806 // if sleeping waiter exists at i, sets current_waiter to i inside flag
807 kmp_info_t *waiter = flag->get_waiter(i);
808 if (waiter) {
809 int wait_gtid = waiter->th.th_info.ds.ds_gtid;
810 // Wake up thread if needed
811 KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep "if (kmp_f_debug >= 50) { __kmp_debug_printf ("__kmp_release: T#%d waking up thread T#%d since sleep "
"flag(%p) set\n", gtid, wait_gtid, flag->get()); }
812 "flag(%p) set\n",if (kmp_f_debug >= 50) { __kmp_debug_printf ("__kmp_release: T#%d waking up thread T#%d since sleep "
"flag(%p) set\n", gtid, wait_gtid, flag->get()); }
813 gtid, wait_gtid, flag->get()))if (kmp_f_debug >= 50) { __kmp_debug_printf ("__kmp_release: T#%d waking up thread T#%d since sleep "
"flag(%p) set\n", gtid, wait_gtid, flag->get()); }
;
814 flag->resume(wait_gtid); // unsets flag's current_waiter when done
815 }
816 }
817 }
818 }
819}
820
821template <bool Cancellable, bool Sleepable>
822class kmp_flag_32 : public kmp_flag_atomic<kmp_uint32, flag32, Sleepable> {
823public:
824 kmp_flag_32(std::atomic<kmp_uint32> *p)
825 : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p) {}
826 kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
827 : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p, thr) {}
828 kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
829 : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p, c) {}
830 void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
831#if KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0) || KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0)
832 void mwait(int th_gtid) { __kmp_mwait_32(th_gtid, this); }
833#endif
834 void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
835 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
836 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj,
837 kmp_int32 is_constrained) {
838 return __kmp_execute_tasks_32(
839 this_thr, gtid, this, final_spin,
840 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj, is_constrained);
841 }
842 bool wait(kmp_info_t *this_thr,
843 int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
844 if (final_spin)
845 return __kmp_wait_template<kmp_flag_32, TRUE(!0), Cancellable, Sleepable>(
846 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
847 else
848 return __kmp_wait_template<kmp_flag_32, FALSE0, Cancellable, Sleepable>(
849 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
850 }
851 void release() { __kmp_release_template(this); }
852 flag_type get_ptr_type() { return flag32; }
853};
854
855template <bool Cancellable, bool Sleepable>
856class kmp_flag_64 : public kmp_flag_native<kmp_uint64, flag64, Sleepable> {
857public:
858 kmp_flag_64(volatile kmp_uint64 *p)
859 : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p) {}
860 kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
861 : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, thr) {}
862 kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
863 : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, c) {}
864 kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c, std::atomic<bool> *loc)
865 : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, c, loc) {}
866 void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
867#if KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0) || KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0)
868 void mwait(int th_gtid) { __kmp_mwait_64(th_gtid, this); }
869#endif
870 void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
871 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
872 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj,
873 kmp_int32 is_constrained) {
874 return __kmp_execute_tasks_64(
875 this_thr, gtid, this, final_spin,
876 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj, is_constrained);
877 }
878 bool wait(kmp_info_t *this_thr,
879 int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
880 if (final_spin
12.1
'final_spin' is 1
12.1
'final_spin' is 1
)
13
Taking true branch
881 return __kmp_wait_template<kmp_flag_64, TRUE(!0), Cancellable, Sleepable>(
14
Value assigned to 'ompt_enabled.enabled', which participates in a condition later
15
Value assigned to 'ompt_enabled.ompt_callback_sync_region', which participates in a condition later
16
Value assigned to 'ompt_callbacks.ompt_callback_sync_region_callback'
882 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
883 else
884 return __kmp_wait_template<kmp_flag_64, FALSE0, Cancellable, Sleepable>(
885 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
886 }
887 void release() { __kmp_release_template(this); }
888 flag_type get_ptr_type() { return flag64; }
889};
890
891template <bool Cancellable, bool Sleepable>
892class kmp_atomic_flag_64
893 : public kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable> {
894public:
895 kmp_atomic_flag_64(std::atomic<kmp_uint64> *p)
896 : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p) {}
897 kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_info_t *thr)
898 : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, thr) {}
899 kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_uint64 c)
900 : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, c) {}
901 kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_uint64 c,
902 std::atomic<bool> *loc)
903 : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, c, loc) {}
904 void suspend(int th_gtid) { __kmp_atomic_suspend_64(th_gtid, this); }
905 void mwait(int th_gtid) { __kmp_atomic_mwait_64(th_gtid, this); }
906 void resume(int th_gtid) { __kmp_atomic_resume_64(th_gtid, this); }
907 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
908 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj,
909 kmp_int32 is_constrained) {
910 return __kmp_atomic_execute_tasks_64(
911 this_thr, gtid, this, final_spin,
912 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj, is_constrained);
913 }
914 bool wait(kmp_info_t *this_thr,
915 int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
916 if (final_spin)
917 return __kmp_wait_template<kmp_atomic_flag_64, TRUE(!0), Cancellable,
918 Sleepable>(
919 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
920 else
921 return __kmp_wait_template<kmp_atomic_flag_64, FALSE0, Cancellable,
922 Sleepable>(
923 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
924 }
925 void release() { __kmp_release_template(this); }
926 flag_type get_ptr_type() { return atomic_flag64; }
927};
928
929// Hierarchical 64-bit on-core barrier instantiation
930class kmp_flag_oncore : public kmp_flag_native<kmp_uint64, flag_oncore, false> {
931 kmp_uint32 offset; /**< Portion of flag of interest for an operation. */
932 bool flag_switch; /**< Indicates a switch in flag location. */
933 enum barrier_type bt; /**< Barrier type. */
934 kmp_info_t *this_thr; /**< Thread to redirect to different flag location. */
935#if USE_ITT_BUILD1
936 void *itt_sync_obj; /**< ITT object to pass to new flag location. */
937#endif
938 unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
939 return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc))reinterpret_cast<unsigned char *>(const_cast<kmp_uint64
*>(loc))
)[offset];
940 }
941
942public:
943 kmp_flag_oncore(volatile kmp_uint64 *p)
944 : kmp_flag_native<kmp_uint64, flag_oncore, false>(p), flag_switch(false) {
945 }
946 kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
947 : kmp_flag_native<kmp_uint64, flag_oncore, false>(p), offset(idx),
948 flag_switch(false),
949 bt(bs_last_barrier) USE_ITT_BUILD_ARG(itt_sync_obj(nullptr)), itt_sync_obj(nullptr) {}
950 kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
951 enum barrier_type bar_t,
952 kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt), void *itt)
953 : kmp_flag_native<kmp_uint64, flag_oncore, false>(p, c), offset(idx),
954 flag_switch(false), bt(bar_t),
955 this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)), itt_sync_obj(itt) {}
956 virtual ~kmp_flag_oncore() override {}
957 void *operator new(size_t size) { return __kmp_allocate(size)___kmp_allocate((size), "openmp/runtime/src/kmp_wait_release.h"
, 957)
; }
958 void operator delete(void *p) { __kmp_free(p)___kmp_free((p), "openmp/runtime/src/kmp_wait_release.h", 958
)
; }
959 bool done_check_val(kmp_uint64 old_loc) override {
960 return byteref(&old_loc, offset) == checker;
961 }
962 bool done_check() override { return done_check_val(*get()); }
963 bool notdone_check() override {
964 // Calculate flag_switch
965 if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG3)
966 flag_switch = true;
967 if (byteref(get(), offset) != 1 && !flag_switch)
968 return true;
969 else if (flag_switch) {
970 this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING4;
971 kmp_flag_64<> flag(&this_thr->th.th_bar[bt].bb.b_go,
972 (kmp_uint64)KMP_BARRIER_STATE_BUMP(1 << 2));
973 __kmp_wait_64(this_thr, &flag, TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
974 }
975 return false;
976 }
977 void internal_release() {
978 // Other threads can write their own bytes simultaneously.
979 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME(2147483647)) {
980 byteref(get(), offset) = 1;
981 } else {
982 kmp_uint64 mask = 0;
983 byteref(&mask, offset) = 1;
984 KMP_TEST_THEN_OR64(get(), mask)__sync_fetch_and_or((volatile kmp_uint64 *)(get()), (kmp_uint64
)(mask))
;
985 }
986 }
987 void wait(kmp_info_t *this_thr, int final_spin) {
988 if (final_spin)
989 __kmp_wait_template<kmp_flag_oncore, TRUE(!0)>(
990 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
991 else
992 __kmp_wait_template<kmp_flag_oncore, FALSE0>(
993 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
994 }
995 void release() { __kmp_release_template(this); }
996 void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
997#if KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0) || KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0)
998 void mwait(int th_gtid) { __kmp_mwait_oncore(th_gtid, this); }
999#endif
1000 void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
1001 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
1002 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj,
1003 kmp_int32 is_constrained) {
1004#if OMPD_SUPPORT1
1005 int ret = __kmp_execute_tasks_oncore(
1006 this_thr, gtid, this, final_spin,
1007 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj, is_constrained);
1008 if (ompd_state & OMPD_ENABLE_BP0x1)
1009 ompd_bp_task_end();
1010 return ret;
1011#else
1012 return __kmp_execute_tasks_oncore(
1013 this_thr, gtid, this, final_spin,
1014 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj, is_constrained);
1015#endif
1016 }
1017 enum barrier_type get_bt() { return bt; }
1018 flag_type get_ptr_type() { return flag_oncore; }
1019};
1020
1021static inline void __kmp_null_resume_wrapper(kmp_info_t *thr) {
1022 int gtid = __kmp_gtid_from_thread(thr);
1023 void *flag = CCAST(void *, thr->th.th_sleep_loc)const_cast<void *>(thr->th.th_sleep_loc);
1024 flag_type type = thr->th.th_sleep_loc_type;
1025 if (!flag)
1026 return;
1027 // Attempt to wake up a thread: examine its type and call appropriate template
1028 switch (type) {
1029 case flag32:
1030 __kmp_resume_32(gtid, RCAST(kmp_flag_32<> *, flag)reinterpret_cast<kmp_flag_32<> *>(flag));
1031 break;
1032 case flag64:
1033 __kmp_resume_64(gtid, RCAST(kmp_flag_64<> *, flag)reinterpret_cast<kmp_flag_64<> *>(flag));
1034 break;
1035 case atomic_flag64:
1036 __kmp_atomic_resume_64(gtid, RCAST(kmp_atomic_flag_64<> *, flag)reinterpret_cast<kmp_atomic_flag_64<> *>(flag));
1037 break;
1038 case flag_oncore:
1039 __kmp_resume_oncore(gtid, RCAST(kmp_flag_oncore *, flag)reinterpret_cast<kmp_flag_oncore *>(flag));
1040 break;
1041#ifdef KMP_DEBUG1
1042 case flag_unset:
1043 KF_TRACE(100, ("__kmp_null_resume_wrapper: flag type %d is unset\n", type))if (kmp_f_debug >= 100) { __kmp_debug_printf ("__kmp_null_resume_wrapper: flag type %d is unset\n"
, type); }
;
1044 break;
1045 default:
1046 KF_TRACE(100, ("__kmp_null_resume_wrapper: flag type %d does not match any "if (kmp_f_debug >= 100) { __kmp_debug_printf ("__kmp_null_resume_wrapper: flag type %d does not match any "
"known flag type\n", type); }
1047 "known flag type\n",if (kmp_f_debug >= 100) { __kmp_debug_printf ("__kmp_null_resume_wrapper: flag type %d does not match any "
"known flag type\n", type); }
1048 type))if (kmp_f_debug >= 100) { __kmp_debug_printf ("__kmp_null_resume_wrapper: flag type %d does not match any "
"known flag type\n", type); }
;
1049#endif
1050 }
1051}
1052
1053/*!
1054@}
1055*/
1056
1057#endif // KMP_WAIT_RELEASE_H