Bug Summary

File:build/source/openmp/runtime/src/kmp_csupport.cpp
Warning:line 1768, column 5
Called C++ object pointer is uninitialized

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name kmp_csupport.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-16/lib/clang/16.0.0 -I projects/openmp/runtime/src -I /build/source/openmp/runtime/src -I include -I /build/source/llvm/include -I /build/source/openmp/runtime/src/i18n -I /build/source/openmp/runtime/src/include -I /build/source/openmp/runtime/src/thirdparty/ittnotify -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D omp_EXPORTS -D _FORTIFY_SOURCE=2 -D NDEBUG -D _GNU_SOURCE -D _REENTRANT -D _FORTIFY_SOURCE=2 -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-16/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/source/= -source-date-epoch 1670066131 -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -Wno-enum-constexpr-conversion -Wno-extra -Wno-pedantic -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-covered-switch-default -Wno-frame-address -Wno-strict-aliasing -Wno-stringop-truncation -Wno-switch -Wno-uninitialized -Wno-return-type-c-linkage -Wno-cast-qual -Wno-int-to-void-pointer-cast -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fno-rtti -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-12-03-132955-15984-1 -x c++ /build/source/openmp/runtime/src/kmp_csupport.cpp
1/*
2 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#define __KMP_IMP
14#include "omp.h" /* extern "C" declarations of user-visible routines */
15#include "kmp.h"
16#include "kmp_error.h"
17#include "kmp_i18n.h"
18#include "kmp_itt.h"
19#include "kmp_lock.h"
20#include "kmp_stats.h"
21#include "ompt-specific.h"
22
23#define MAX_MESSAGE512 512
24
25// flags will be used in future, e.g. to implement openmp_strict library
26// restrictions
27
28/*!
29 * @ingroup STARTUP_SHUTDOWN
30 * @param loc in source location information
31 * @param flags in for future use (currently ignored)
32 *
33 * Initialize the runtime library. This call is optional; if it is not made then
34 * it will be implicitly called by attempts to use other library functions.
35 */
36void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
37 // By default __kmpc_begin() is no-op.
38 char *env;
39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL__null &&
40 __kmp_str_match_true(env)) {
41 __kmp_middle_initialize();
42 __kmp_assign_root_init_mask();
43 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_begin: middle initialization called\n"
); }
;
44 } else if (__kmp_ignore_mppbeg() == FALSE0) {
45 // By default __kmp_ignore_mppbeg() returns TRUE.
46 __kmp_internal_begin();
47 KC_TRACE(10, ("__kmpc_begin: called\n"))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_begin: called\n"
); }
;
48 }
49}
50
51/*!
52 * @ingroup STARTUP_SHUTDOWN
53 * @param loc source location information
54 *
55 * Shutdown the runtime library. This is also optional, and even if called will
56 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to
57 * zero.
58 */
59void __kmpc_end(ident_t *loc) {
60 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
61 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
62 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
63 // returns FALSE and __kmpc_end() will unregister this root (it can cause
64 // library shut down).
65 if (__kmp_ignore_mppend() == FALSE0) {
66 KC_TRACE(10, ("__kmpc_end: called\n"))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_end: called\n"
); }
;
67 KA_TRACE(30, ("__kmpc_end\n"))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmpc_end\n"
); }
;
68
69 __kmp_internal_end_thread(-1);
70 }
71#if KMP_OS_WINDOWS0 && OMPT_SUPPORT1
72 // Normal exit process on Windows does not allow worker threads of the final
73 // parallel region to finish reporting their events, so shutting down the
74 // library here fixes the issue at least for the cases where __kmpc_end() is
75 // placed properly.
76 if (ompt_enabled.enabled)
77 __kmp_internal_end_library(__kmp_gtid_get_specific());
78#endif
79}
80
81/*!
82@ingroup THREAD_STATES
83@param loc Source location information.
84@return The global thread index of the active thread.
85
86This function can be called in any context.
87
88If the runtime has ony been entered at the outermost level from a
89single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is
90that which would be returned by omp_get_thread_num() in the outermost
91active parallel construct. (Or zero if there is no active parallel
92construct, since the primary thread is necessarily thread zero).
93
94If multiple non-OpenMP threads all enter an OpenMP construct then this
95will be a unique thread identifier among all the threads created by
96the OpenMP runtime (but the value cannot be defined in terms of
97OpenMP thread ids returned by omp_get_thread_num()).
98*/
99kmp_int32 __kmpc_global_thread_num(ident_t *loc) {
100 kmp_int32 gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
101
102 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_global_thread_num: T#%d\n"
, gtid); }
;
103
104 return gtid;
105}
106
107/*!
108@ingroup THREAD_STATES
109@param loc Source location information.
110@return The number of threads under control of the OpenMP<sup>*</sup> runtime
111
112This function can be called in any context.
113It returns the total number of threads under the control of the OpenMP runtime.
114That is not a number that can be determined by any OpenMP standard calls, since
115the library may be called from more than one non-OpenMP thread, and this
116reflects the total over all such calls. Similarly the runtime maintains
117underlying threads even when they are not active (since the cost of creating
118and destroying OS threads is high), this call counts all such threads even if
119they are not waiting for work.
120*/
121kmp_int32 __kmpc_global_num_threads(ident_t *loc) {
122 KC_TRACE(10,if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_global_num_threads: num_threads = %d\n"
, __kmp_all_nth); }
123 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_global_num_threads: num_threads = %d\n"
, __kmp_all_nth); }
;
124
125 return TCR_4(__kmp_all_nth)(__kmp_all_nth);
126}
127
128/*!
129@ingroup THREAD_STATES
130@param loc Source location information.
131@return The thread number of the calling thread in the innermost active parallel
132construct.
133*/
134kmp_int32 __kmpc_bound_thread_num(ident_t *loc) {
135 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_bound_thread_num: called\n"
); }
;
136 return __kmp_tid_from_gtid(__kmp_entry_gtid()__kmp_get_global_thread_id_reg());
137}
138
139/*!
140@ingroup THREAD_STATES
141@param loc Source location information.
142@return The number of threads in the innermost active parallel construct.
143*/
144kmp_int32 __kmpc_bound_num_threads(ident_t *loc) {
145 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_bound_num_threads: called\n"
); }
;
146
147 return __kmp_entry_thread()->th.th_team->t.t_nproc;
148}
149
150/*!
151 * @ingroup DEPRECATED
152 * @param loc location description
153 *
154 * This function need not be called. It always returns TRUE.
155 */
156kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
157#ifndef KMP_DEBUG1
158
159 return TRUE(!0);
160
161#else
162
163 const char *semi2;
164 const char *semi3;
165 int line_no;
166
167 if (__kmp_par_range == 0) {
168 return TRUE(!0);
169 }
170 semi2 = loc->psource;
171 if (semi2 == NULL__null) {
172 return TRUE(!0);
173 }
174 semi2 = strchr(semi2, ';');
175 if (semi2 == NULL__null) {
176 return TRUE(!0);
177 }
178 semi2 = strchr(semi2 + 1, ';');
179 if (semi2 == NULL__null) {
180 return TRUE(!0);
181 }
182 if (__kmp_par_range_filename[0]) {
183 const char *name = semi2 - 1;
184 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
185 name--;
186 }
187 if ((*name == '/') || (*name == ';')) {
188 name++;
189 }
190 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
191 return __kmp_par_range < 0;
192 }
193 }
194 semi3 = strchr(semi2 + 1, ';');
195 if (__kmp_par_range_routine[0]) {
196 if ((semi3 != NULL__null) && (semi3 > semi2) &&
197 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
198 return __kmp_par_range < 0;
199 }
200 }
201 if (KMP_SSCANFsscanf(semi3 + 1, "%d", &line_no) == 1) {
202 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
203 return __kmp_par_range > 0;
204 }
205 return __kmp_par_range < 0;
206 }
207 return TRUE(!0);
208
209#endif /* KMP_DEBUG */
210}
211
212/*!
213@ingroup THREAD_STATES
214@param loc Source location information.
215@return 1 if this thread is executing inside an active parallel region, zero if
216not.
217*/
218kmp_int32 __kmpc_in_parallel(ident_t *loc) {
219 return __kmp_entry_thread()->th.th_root->r.r_active;
220}
221
222/*!
223@ingroup PARALLEL
224@param loc source location information
225@param global_tid global thread number
226@param num_threads number of threads requested for this parallel construct
227
228Set the number of threads to be used by the next fork spawned by this thread.
229This call is only required if the parallel construct has a `num_threads` clause.
230*/
231void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
232 kmp_int32 num_threads) {
233 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n"
, global_tid, num_threads); }
234 global_tid, num_threads))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n"
, global_tid, num_threads); }
;
235 __kmp_assert_valid_gtid(global_tid);
236 __kmp_push_num_threads(loc, global_tid, num_threads);
237}
238
239void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
240 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_pop_num_threads: enter\n"
); }
;
241 /* the num_threads are automatically popped */
242}
243
244void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
245 kmp_int32 proc_bind) {
246 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n"
, global_tid, proc_bind); }
247 proc_bind))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n"
, global_tid, proc_bind); }
;
248 __kmp_assert_valid_gtid(global_tid);
249 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
250}
251
252/*!
253@ingroup PARALLEL
254@param loc source location information
255@param argc total number of arguments in the ellipsis
256@param microtask pointer to callback routine consisting of outlined parallel
257construct
258@param ... pointers to shared variables that aren't global
259
260Do the actual fork and call the microtask in the relevant number of threads.
261*/
262void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
263 int gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
264
265#if (KMP_STATS_ENABLED0)
266 // If we were in a serial region, then stop the serial timer, record
267 // the event, and start parallel region timer
268 stats_state_e previous_state = KMP_GET_THREAD_STATE()((void)0);
269 if (previous_state == stats_state_e::SERIAL_REGION) {
270 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
271 } else {
272 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead)((void)0);
273 }
274 int inParallel = __kmpc_in_parallel(loc);
275 if (inParallel) {
276 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL)((void)0);
277 } else {
278 KMP_COUNT_BLOCK(OMP_PARALLEL)((void)0);
279 }
280#endif
281
282 // maybe to save thr_state is enough here
283 {
284 va_list ap;
285 va_start(ap, microtask)__builtin_va_start(ap, microtask);
286
287#if OMPT_SUPPORT1
288 ompt_frame_t *ompt_frame;
289 if (ompt_enabled.enabled) {
290 kmp_info_t *master_th = __kmp_threads[gtid];
291 ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
292 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
293 }
294 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
295#endif
296
297#if INCLUDE_SSC_MARKS(1 && 1)
298 SSC_MARK_FORKING()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0xd693) : "%ebx")
;
299#endif
300 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
301 VOLATILE_CAST(microtask_t)(microtask_t) microtask, // "wrapped" task
302 VOLATILE_CAST(launch_t)(launch_t) __kmp_invoke_task_func,
303 kmp_va_addr_of(ap)(&(ap)));
304#if INCLUDE_SSC_MARKS(1 && 1)
305 SSC_MARK_JOINING()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0xd694) : "%ebx")
;
306#endif
307 __kmp_join_call(loc, gtid
308#if OMPT_SUPPORT1
309 ,
310 fork_context_intel
311#endif
312 );
313
314 va_end(ap)__builtin_va_end(ap);
315
316#if OMPT_SUPPORT1
317 if (ompt_enabled.enabled) {
318 ompt_frame->enter_frame = ompt_data_none{0};
319 }
320#endif
321 }
322
323#if KMP_STATS_ENABLED0
324 if (previous_state == stats_state_e::SERIAL_REGION) {
325 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
326 KMP_SET_THREAD_STATE(previous_state)((void)0);
327 } else {
328 KMP_POP_PARTITIONED_TIMER()((void)0);
329 }
330#endif // KMP_STATS_ENABLED
331}
332
333/*!
334@ingroup PARALLEL
335@param loc source location information
336@param global_tid global thread number
337@param num_teams number of teams requested for the teams construct
338@param num_threads number of threads per team requested for the teams construct
339
340Set the number of teams to be used by the teams construct.
341This call is only required if the teams construct has a `num_teams` clause
342or a `thread_limit` clause (or both).
343*/
344void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
345 kmp_int32 num_teams, kmp_int32 num_threads) {
346 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n"
, global_tid, num_teams, num_threads); }
347 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n"
, global_tid, num_teams, num_threads); }
348 global_tid, num_teams, num_threads))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n"
, global_tid, num_teams, num_threads); }
;
349 __kmp_assert_valid_gtid(global_tid);
350 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
351}
352
353/*!
354@ingroup PARALLEL
355@param loc source location information
356@param global_tid global thread number
357@param num_teams_lb lower bound on number of teams requested for the teams
358construct
359@param num_teams_ub upper bound on number of teams requested for the teams
360construct
361@param num_threads number of threads per team requested for the teams construct
362
363Set the number of teams to be used by the teams construct. The number of initial
364teams cretaed will be greater than or equal to the lower bound and less than or
365equal to the upper bound.
366This call is only required if the teams construct has a `num_teams` clause
367or a `thread_limit` clause (or both).
368*/
369void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
370 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
371 kmp_int32 num_threads) {
372 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
" num_teams_ub=%d num_threads=%d\n", global_tid, num_teams_lb
, num_teams_ub, num_threads); }
373 " num_teams_ub=%d num_threads=%d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
" num_teams_ub=%d num_threads=%d\n", global_tid, num_teams_lb
, num_teams_ub, num_threads); }
374 global_tid, num_teams_lb, num_teams_ub, num_threads))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
" num_teams_ub=%d num_threads=%d\n", global_tid, num_teams_lb
, num_teams_ub, num_threads); }
;
375 __kmp_assert_valid_gtid(global_tid);
376 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
377 num_threads);
378}
379
380/*!
381@ingroup PARALLEL
382@param loc source location information
383@param argc total number of arguments in the ellipsis
384@param microtask pointer to callback routine consisting of outlined teams
385construct
386@param ... pointers to shared variables that aren't global
387
388Do the actual fork and call the microtask in the relevant number of threads.
389*/
390void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
391 ...) {
392 int gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
393 kmp_info_t *this_thr = __kmp_threads[gtid];
394 va_list ap;
395 va_start(ap, microtask)__builtin_va_start(ap, microtask);
396
397#if KMP_STATS_ENABLED0
398 KMP_COUNT_BLOCK(OMP_TEAMS)((void)0);
399 stats_state_e previous_state = KMP_GET_THREAD_STATE()((void)0);
400 if (previous_state == stats_state_e::SERIAL_REGION) {
401 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
402 } else {
403 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead)((void)0);
404 }
405#endif
406
407 // remember teams entry point and nesting level
408 this_thr->th.th_teams_microtask = microtask;
409 this_thr->th.th_teams_level =
410 this_thr->th.th_team->t.t_level; // AC: can be >0 on host
411
412#if OMPT_SUPPORT1
413 kmp_team_t *parent_team = this_thr->th.th_team;
414 int tid = __kmp_tid_from_gtid(gtid);
415 if (ompt_enabled.enabled) {
416 parent_team->t.t_implicit_task_taskdata[tid]
417 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
418 }
419 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
420#endif
421
422 // check if __kmpc_push_num_teams called, set default number of teams
423 // otherwise
424 if (this_thr->th.th_teams_size.nteams == 0) {
425 __kmp_push_num_teams(loc, gtid, 0, 0);
426 }
427 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1)if (!(this_thr->th.th_set_nproc >= 1)) { __kmp_debug_assert
("this_thr->th.th_set_nproc >= 1", "openmp/runtime/src/kmp_csupport.cpp"
, 427); }
;
428 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1)if (!(this_thr->th.th_teams_size.nteams >= 1)) { __kmp_debug_assert
("this_thr->th.th_teams_size.nteams >= 1", "openmp/runtime/src/kmp_csupport.cpp"
, 428); }
;
429 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1)if (!(this_thr->th.th_teams_size.nth >= 1)) { __kmp_debug_assert
("this_thr->th.th_teams_size.nth >= 1", "openmp/runtime/src/kmp_csupport.cpp"
, 429); }
;
430
431 __kmp_fork_call(
432 loc, gtid, fork_context_intel, argc,
433 VOLATILE_CAST(microtask_t)(microtask_t) __kmp_teams_master, // "wrapped" task
434 VOLATILE_CAST(launch_t)(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap)(&(ap)));
435 __kmp_join_call(loc, gtid
436#if OMPT_SUPPORT1
437 ,
438 fork_context_intel
439#endif
440 );
441
442 // Pop current CG root off list
443 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots)if (!(this_thr->th.th_cg_roots)) { __kmp_debug_assert("this_thr->th.th_cg_roots"
, "openmp/runtime/src/kmp_csupport.cpp", 443); }
;
444 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
445 this_thr->th.th_cg_roots = tmp->up;
446 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
" to node %p. cg_nthreads was %d\n", this_thr, tmp, this_thr
->th.th_cg_roots, tmp->cg_nthreads); }
447 " to node %p. cg_nthreads was %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
" to node %p. cg_nthreads was %d\n", this_thr, tmp, this_thr
->th.th_cg_roots, tmp->cg_nthreads); }
448 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
" to node %p. cg_nthreads was %d\n", this_thr, tmp, this_thr
->th.th_cg_roots, tmp->cg_nthreads); }
;
449 KMP_DEBUG_ASSERT(tmp->cg_nthreads)if (!(tmp->cg_nthreads)) { __kmp_debug_assert("tmp->cg_nthreads"
, "openmp/runtime/src/kmp_csupport.cpp", 449); }
;
450 int i = tmp->cg_nthreads--;
451 if (i == 1) { // check is we are the last thread in CG (not always the case)
452 __kmp_free(tmp)___kmp_free((tmp), "openmp/runtime/src/kmp_csupport.cpp", 452
)
;
453 }
454 // Restore current task's thread_limit from CG root
455 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots)if (!(this_thr->th.th_cg_roots)) { __kmp_debug_assert("this_thr->th.th_cg_roots"
, "openmp/runtime/src/kmp_csupport.cpp", 455); }
;
456 this_thr->th.th_current_task->td_icvs.thread_limit =
457 this_thr->th.th_cg_roots->cg_thread_limit;
458
459 this_thr->th.th_teams_microtask = NULL__null;
460 this_thr->th.th_teams_level = 0;
461 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
462 va_end(ap)__builtin_va_end(ap);
463#if KMP_STATS_ENABLED0
464 if (previous_state == stats_state_e::SERIAL_REGION) {
465 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
466 KMP_SET_THREAD_STATE(previous_state)((void)0);
467 } else {
468 KMP_POP_PARTITIONED_TIMER()((void)0);
469 }
470#endif // KMP_STATS_ENABLED
471}
472
473// I don't think this function should ever have been exported.
474// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
475// openmp code ever called it, but it's been exported from the RTL for so
476// long that I'm afraid to remove the definition.
477int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
478
479/*!
480@ingroup PARALLEL
481@param loc source location information
482@param global_tid global thread number
483
484Enter a serialized parallel construct. This interface is used to handle a
485conditional parallel region, like this,
486@code
487#pragma omp parallel if (condition)
488@endcode
489when the condition is false.
490*/
491void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
492 // The implementation is now in kmp_runtime.cpp so that it can share static
493 // functions with kmp_fork_call since the tasks to be done are similar in
494 // each case.
495 __kmp_assert_valid_gtid(global_tid);
496#if OMPT_SUPPORT1
497 OMPT_STORE_RETURN_ADDRESS(global_tid)OmptReturnAddressGuard ReturnAddressGuard{global_tid, __builtin_return_address
(0)};
;
498#endif
499 __kmp_serialized_parallel(loc, global_tid);
500}
501
502/*!
503@ingroup PARALLEL
504@param loc source location information
505@param global_tid global thread number
506
507Leave a serialized parallel construct.
508*/
509void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
510 kmp_internal_control_t *top;
511 kmp_info_t *this_thr;
512 kmp_team_t *serial_team;
513
514 KC_TRACE(10,if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_end_serialized_parallel: called by T#%d\n"
, global_tid); }
515 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_end_serialized_parallel: called by T#%d\n"
, global_tid); }
;
516
517 /* skip all this code for autopar serialized loops since it results in
518 unacceptable overhead */
519 if (loc != NULL__null && (loc->flags & KMP_IDENT_AUTOPAR))
520 return;
521
522 // Not autopar code
523 __kmp_assert_valid_gtid(global_tid);
524 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
525 __kmp_parallel_initialize();
526
527 __kmp_resume_if_soft_paused();
528
529 this_thr = __kmp_threads[global_tid];
530 serial_team = this_thr->th.th_serial_team;
531
532 kmp_task_team_t *task_team = this_thr->th.th_task_team;
533 // we need to wait for the proxy tasks before finishing the thread
534 if (task_team != NULL__null && (task_team->tt.tt_found_proxy_tasks ||
535 task_team->tt.tt_hidden_helper_task_encountered))
536 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL), __null);
537
538 KMP_MB();
539 KMP_DEBUG_ASSERT(serial_team)if (!(serial_team)) { __kmp_debug_assert("serial_team", "openmp/runtime/src/kmp_csupport.cpp"
, 539); }
;
540 KMP_ASSERT(serial_team->t.t_serialized)if (!(serial_team->t.t_serialized)) { __kmp_debug_assert("serial_team->t.t_serialized"
, "openmp/runtime/src/kmp_csupport.cpp", 540); }
;
541 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team)if (!(this_thr->th.th_team == serial_team)) { __kmp_debug_assert
("this_thr->th.th_team == serial_team", "openmp/runtime/src/kmp_csupport.cpp"
, 541); }
;
542 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team)if (!(serial_team != this_thr->th.th_root->r.r_root_team
)) { __kmp_debug_assert("serial_team != this_thr->th.th_root->r.r_root_team"
, "openmp/runtime/src/kmp_csupport.cpp", 542); }
;
543 KMP_DEBUG_ASSERT(serial_team->t.t_threads)if (!(serial_team->t.t_threads)) { __kmp_debug_assert("serial_team->t.t_threads"
, "openmp/runtime/src/kmp_csupport.cpp", 543); }
;
544 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr)if (!(serial_team->t.t_threads[0] == this_thr)) { __kmp_debug_assert
("serial_team->t.t_threads[0] == this_thr", "openmp/runtime/src/kmp_csupport.cpp"
, 544); }
;
545
546#if OMPT_SUPPORT1
547 if (ompt_enabled.enabled &&
548 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
549 OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info))->frame.exit_frame = ompt_data_none{0};
550 if (ompt_enabled.ompt_callback_implicit_task) {
551 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
552 ompt_scope_end, NULL__null, OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data
))
, 1,
553 OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info))->thread_num, ompt_task_implicit);
554 }
555
556 // reset clear the task id only after unlinking the task
557 ompt_data_t *parent_task_data;
558 __ompt_get_task_info_internal(1, NULL__null, &parent_task_data, NULL__null, NULL__null, NULL__null);
559
560 if (ompt_enabled.ompt_callback_parallel_end) {
561 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)ompt_callback_parallel_end_callback(
562 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
563 ompt_parallel_invoker_program | ompt_parallel_team,
564 OMPT_LOAD_RETURN_ADDRESS(global_tid)__ompt_load_return_address(global_tid));
565 }
566 __ompt_lw_taskteam_unlink(this_thr);
567 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
568 }
569#endif
570
571 /* If necessary, pop the internal control stack values and replace the team
572 * values */
573 top = serial_team->t.t_control_stack_top;
574 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
575 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
576 serial_team->t.t_control_stack_top = top->next;
577 __kmp_free(top)___kmp_free((top), "openmp/runtime/src/kmp_csupport.cpp", 577
)
;
578 }
579
580 /* pop dispatch buffers stack */
581 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer)if (!(serial_team->t.t_dispatch->th_disp_buffer)) { __kmp_debug_assert
("serial_team->t.t_dispatch->th_disp_buffer", "openmp/runtime/src/kmp_csupport.cpp"
, 581); }
;
582 {
583 dispatch_private_info_t *disp_buffer =
584 serial_team->t.t_dispatch->th_disp_buffer;
585 serial_team->t.t_dispatch->th_disp_buffer =
586 serial_team->t.t_dispatch->th_disp_buffer->next;
587 __kmp_free(disp_buffer)___kmp_free((disp_buffer), "openmp/runtime/src/kmp_csupport.cpp"
, 587)
;
588 }
589 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
590
591 --serial_team->t.t_serialized;
592 if (serial_team->t.t_serialized == 0) {
593
594 /* return to the parallel section */
595
596#if KMP_ARCH_X860 || KMP_ARCH_X86_641
597 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
598 __kmp_clear_x87_fpu_status_word();
599 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
600 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
601 }
602#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
603
604 __kmp_pop_current_task_from_thread(this_thr);
605#if OMPD_SUPPORT1
606 if (ompd_state & OMPD_ENABLE_BP0x1)
607 ompd_bp_parallel_end();
608#endif
609
610 this_thr->th.th_team = serial_team->t.t_parent;
611 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
612
613 /* restore values cached in the thread */
614 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
615 this_thr->th.th_team_master =
616 serial_team->t.t_parent->t.t_threads[0]; /* JPH */
617 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
618
619 /* TODO the below shouldn't need to be adjusted for serialized teams */
620 this_thr->th.th_dispatch =
621 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
622
623 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0)if (!(this_thr->th.th_current_task->td_flags.executing ==
0)) { __kmp_debug_assert("this_thr->th.th_current_task->td_flags.executing == 0"
, "openmp/runtime/src/kmp_csupport.cpp", 623); }
;
624 this_thr->th.th_current_task->td_flags.executing = 1;
625
626 if (__kmp_tasking_mode != tskm_immediate_exec) {
627 // Copy the task team from the new child / old parent team to the thread.
628 this_thr->th.th_task_team =
629 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
630 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
"team %p\n", global_tid, this_thr->th.th_task_team, this_thr
->th.th_team); }
631 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
"team %p\n", global_tid, this_thr->th.th_task_team, this_thr
->th.th_team); }
632 "team %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
"team %p\n", global_tid, this_thr->th.th_task_team, this_thr
->th.th_team); }
633 global_tid, this_thr->th.th_task_team, this_thr->th.th_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
"team %p\n", global_tid, this_thr->th.th_task_team, this_thr
->th.th_team); }
;
634 }
635#if KMP_AFFINITY_SUPPORTED1
636 if (this_thr->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
637 __kmp_reset_root_init_mask(global_tid);
638 }
639#endif
640 } else {
641 if (__kmp_tasking_mode != tskm_immediate_exec) {
642 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
"depth of serial team %p to %d\n", global_tid, serial_team, serial_team
->t.t_serialized); }
643 "depth of serial team %p to %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
"depth of serial team %p to %d\n", global_tid, serial_team, serial_team
->t.t_serialized); }
644 global_tid, serial_team, serial_team->t.t_serialized))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
"depth of serial team %p to %d\n", global_tid, serial_team, serial_team
->t.t_serialized); }
;
645 }
646 }
647
648 serial_team->t.t_level--;
649 if (__kmp_env_consistency_check)
650 __kmp_pop_parallel(global_tid, NULL__null);
651#if OMPT_SUPPORT1
652 if (ompt_enabled.enabled)
653 this_thr->th.ompt_thread_info.state =
654 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
655 : ompt_state_work_parallel);
656#endif
657}
658
659/*!
660@ingroup SYNCHRONIZATION
661@param loc source location information.
662
663Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
664depending on the memory ordering convention obeyed by the compiler
665even that may not be necessary).
666*/
667void __kmpc_flush(ident_t *loc) {
668 KC_TRACE(10, ("__kmpc_flush: called\n"))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_flush: called\n"
); }
;
669
670 /* need explicit __mf() here since use volatile instead in library */
671 KMP_MFENCE()if (__builtin_expect(!!(!__kmp_cpuinfo.initialized), 0)) { __kmp_query_cpuid
(&__kmp_cpuinfo); } if (__kmp_cpuinfo.flags.sse2) { __sync_synchronize
(); }
; /* Flush all pending memory write invalidates. */
672
673#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
674 if (ompt_enabled.ompt_callback_flush) {
675 ompt_callbacks.ompt_callback(ompt_callback_flush)ompt_callback_flush_callback(
676 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
677 }
678#endif
679}
680
681/* -------------------------------------------------------------------------- */
682/*!
683@ingroup SYNCHRONIZATION
684@param loc source location information
685@param global_tid thread id.
686
687Execute a barrier.
688*/
689void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
690 KMP_COUNT_BLOCK(OMP_BARRIER)((void)0);
691 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_barrier: called T#%d\n"
, global_tid); }
;
692 __kmp_assert_valid_gtid(global_tid);
693
694 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
695 __kmp_parallel_initialize();
696
697 __kmp_resume_if_soft_paused();
698
699 if (__kmp_env_consistency_check) {
700 if (loc == 0) {
701 KMP_WARNING(ConstructIdentInvalid)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_ConstructIdentInvalid
), __kmp_msg_null)
; // ??? What does it mean for the user?
702 }
703 __kmp_check_barrier(global_tid, ct_barrier, loc);
704 }
705
706#if OMPT_SUPPORT1
707 ompt_frame_t *ompt_frame;
708 if (ompt_enabled.enabled) {
709 __ompt_get_task_info_internal(0, NULL__null, NULL__null, &ompt_frame, NULL__null, NULL__null);
710 if (ompt_frame->enter_frame.ptr == NULL__null)
711 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
712 }
713 OMPT_STORE_RETURN_ADDRESS(global_tid)OmptReturnAddressGuard ReturnAddressGuard{global_tid, __builtin_return_address
(0)};
;
714#endif
715 __kmp_threads[global_tid]->th.th_ident = loc;
716 // TODO: explicit barrier_wait_id:
717 // this function is called when 'barrier' directive is present or
718 // implicit barrier at the end of a worksharing construct.
719 // 1) better to add a per-thread barrier counter to a thread data structure
720 // 2) set to 0 when a new team is created
721 // 4) no sync is required
722
723 __kmp_barrier(bs_plain_barrier, global_tid, FALSE0, 0, NULL__null, NULL__null);
724#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
725 if (ompt_enabled.enabled) {
726 ompt_frame->enter_frame = ompt_data_none{0};
727 }
728#endif
729}
730
731/* The BARRIER for a MASTER section is always explicit */
732/*!
733@ingroup WORK_SHARING
734@param loc source location information.
735@param global_tid global thread number .
736@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
737*/
738kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
739 int status = 0;
740
741 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_master: called T#%d\n"
, global_tid); }
;
742 __kmp_assert_valid_gtid(global_tid);
743
744 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
745 __kmp_parallel_initialize();
746
747 __kmp_resume_if_soft_paused();
748
749 if (KMP_MASTER_GTID(global_tid)(0 == __kmp_tid_from_gtid((global_tid)))) {
750 KMP_COUNT_BLOCK(OMP_MASTER)((void)0);
751 KMP_PUSH_PARTITIONED_TIMER(OMP_master)((void)0);
752 status = 1;
753 }
754
755#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
756 if (status) {
757 if (ompt_enabled.ompt_callback_masked) {
758 kmp_info_t *this_thr = __kmp_threads[global_tid];
759 kmp_team_t *team = this_thr->th.th_team;
760
761 int tid = __kmp_tid_from_gtid(global_tid);
762 ompt_callbacks.ompt_callback(ompt_callback_masked)ompt_callback_masked_callback(
763 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
764 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
765 OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
766 }
767 }
768#endif
769
770 if (__kmp_env_consistency_check) {
771#if KMP_USE_DYNAMIC_LOCK1
772 if (status)
773 __kmp_push_sync(global_tid, ct_master, loc, NULL__null, 0);
774 else
775 __kmp_check_sync(global_tid, ct_master, loc, NULL__null, 0);
776#else
777 if (status)
778 __kmp_push_sync(global_tid, ct_master, loc, NULL__null);
779 else
780 __kmp_check_sync(global_tid, ct_master, loc, NULL__null);
781#endif
782 }
783
784 return status;
785}
786
787/*!
788@ingroup WORK_SHARING
789@param loc source location information.
790@param global_tid global thread number .
791
792Mark the end of a <tt>master</tt> region. This should only be called by the
793thread that executes the <tt>master</tt> region.
794*/
795void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
796 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_end_master: called T#%d\n"
, global_tid); }
;
797 __kmp_assert_valid_gtid(global_tid);
798 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid))if (!((0 == __kmp_tid_from_gtid((global_tid))))) { __kmp_debug_assert
("(0 == __kmp_tid_from_gtid((global_tid)))", "openmp/runtime/src/kmp_csupport.cpp"
, 798); }
;
799 KMP_POP_PARTITIONED_TIMER()((void)0);
800
801#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
802 kmp_info_t *this_thr = __kmp_threads[global_tid];
803 kmp_team_t *team = this_thr->th.th_team;
804 if (ompt_enabled.ompt_callback_masked) {
805 int tid = __kmp_tid_from_gtid(global_tid);
806 ompt_callbacks.ompt_callback(ompt_callback_masked)ompt_callback_masked_callback(
807 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
808 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
809 OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
810 }
811#endif
812
813 if (__kmp_env_consistency_check) {
814 if (KMP_MASTER_GTID(global_tid)(0 == __kmp_tid_from_gtid((global_tid))))
815 __kmp_pop_sync(global_tid, ct_master, loc);
816 }
817}
818
819/*!
820@ingroup WORK_SHARING
821@param loc source location information.
822@param global_tid global thread number.
823@param filter result of evaluating filter clause on thread global_tid, or zero
824if no filter clause present
825@return 1 if this thread should execute the <tt>masked</tt> block, 0 otherwise.
826*/
827kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) {
828 int status = 0;
829 int tid;
830 KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_masked: called T#%d\n"
, global_tid); }
;
831 __kmp_assert_valid_gtid(global_tid);
832
833 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
834 __kmp_parallel_initialize();
835
836 __kmp_resume_if_soft_paused();
837
838 tid = __kmp_tid_from_gtid(global_tid);
839 if (tid == filter) {
840 KMP_COUNT_BLOCK(OMP_MASKED)((void)0);
841 KMP_PUSH_PARTITIONED_TIMER(OMP_masked)((void)0);
842 status = 1;
843 }
844
845#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
846 if (status) {
847 if (ompt_enabled.ompt_callback_masked) {
848 kmp_info_t *this_thr = __kmp_threads[global_tid];
849 kmp_team_t *team = this_thr->th.th_team;
850 ompt_callbacks.ompt_callback(ompt_callback_masked)ompt_callback_masked_callback(
851 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
852 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
853 OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
854 }
855 }
856#endif
857
858 if (__kmp_env_consistency_check) {
859#if KMP_USE_DYNAMIC_LOCK1
860 if (status)
861 __kmp_push_sync(global_tid, ct_masked, loc, NULL__null, 0);
862 else
863 __kmp_check_sync(global_tid, ct_masked, loc, NULL__null, 0);
864#else
865 if (status)
866 __kmp_push_sync(global_tid, ct_masked, loc, NULL__null);
867 else
868 __kmp_check_sync(global_tid, ct_masked, loc, NULL__null);
869#endif
870 }
871
872 return status;
873}
874
875/*!
876@ingroup WORK_SHARING
877@param loc source location information.
878@param global_tid global thread number .
879
880Mark the end of a <tt>masked</tt> region. This should only be called by the
881thread that executes the <tt>masked</tt> region.
882*/
883void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) {
884 KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_end_masked: called T#%d\n"
, global_tid); }
;
885 __kmp_assert_valid_gtid(global_tid);
886 KMP_POP_PARTITIONED_TIMER()((void)0);
887
888#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
889 kmp_info_t *this_thr = __kmp_threads[global_tid];
890 kmp_team_t *team = this_thr->th.th_team;
891 if (ompt_enabled.ompt_callback_masked) {
892 int tid = __kmp_tid_from_gtid(global_tid);
893 ompt_callbacks.ompt_callback(ompt_callback_masked)ompt_callback_masked_callback(
894 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
895 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
896 OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
897 }
898#endif
899
900 if (__kmp_env_consistency_check) {
901 __kmp_pop_sync(global_tid, ct_masked, loc);
902 }
903}
904
905/*!
906@ingroup WORK_SHARING
907@param loc source location information.
908@param gtid global thread number.
909
910Start execution of an <tt>ordered</tt> construct.
911*/
912void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
913 int cid = 0;
914 kmp_info_t *th;
915 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_csupport.cpp", 915); }
;
916
917 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_ordered: called T#%d\n"
, gtid); }
;
918 __kmp_assert_valid_gtid(gtid);
919
920 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
921 __kmp_parallel_initialize();
922
923 __kmp_resume_if_soft_paused();
924
925#if USE_ITT_BUILD1
926 __kmp_itt_ordered_prep(gtid);
927// TODO: ordered_wait_id
928#endif /* USE_ITT_BUILD */
929
930 th = __kmp_threads[gtid];
931
932#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
933 kmp_team_t *team;
934 ompt_wait_id_t lck;
935 void *codeptr_ra;
936 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
937 if (ompt_enabled.enabled) {
938 team = __kmp_team_from_gtid(gtid);
939 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
940 /* OMPT state update */
941 th->th.ompt_thread_info.wait_id = lck;
942 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
943
944 /* OMPT event callback */
945 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
946 if (ompt_enabled.ompt_callback_mutex_acquire) {
947 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
948 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
949 codeptr_ra);
950 }
951 }
952#endif
953
954 if (th->th.th_dispatch->th_deo_fcn != 0)
955 (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
956 else
957 __kmp_parallel_deo(&gtid, &cid, loc);
958
959#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
960 if (ompt_enabled.enabled) {
961 /* OMPT state update */
962 th->th.ompt_thread_info.state = ompt_state_work_parallel;
963 th->th.ompt_thread_info.wait_id = 0;
964
965 /* OMPT event callback */
966 if (ompt_enabled.ompt_callback_mutex_acquired) {
967 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
968 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
969 }
970 }
971#endif
972
973#if USE_ITT_BUILD1
974 __kmp_itt_ordered_start(gtid);
975#endif /* USE_ITT_BUILD */
976}
977
978/*!
979@ingroup WORK_SHARING
980@param loc source location information.
981@param gtid global thread number.
982
983End execution of an <tt>ordered</tt> construct.
984*/
985void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
986 int cid = 0;
987 kmp_info_t *th;
988
989 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_end_ordered: called T#%d\n"
, gtid); }
;
990 __kmp_assert_valid_gtid(gtid);
991
992#if USE_ITT_BUILD1
993 __kmp_itt_ordered_end(gtid);
994// TODO: ordered_wait_id
995#endif /* USE_ITT_BUILD */
996
997 th = __kmp_threads[gtid];
998
999 if (th->th.th_dispatch->th_dxo_fcn != 0)
1000 (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
1001 else
1002 __kmp_parallel_dxo(&gtid, &cid, loc);
1003
1004#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1005 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
1006 if (ompt_enabled.ompt_callback_mutex_released) {
1007 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)ompt_callback_mutex_released_callback(
1008 ompt_mutex_ordered,
1009 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
1010 ->t.t_ordered.dt.t_value,
1011 OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid));
1012 }
1013#endif
1014}
1015
1016#if KMP_USE_DYNAMIC_LOCK1
1017
1018static __forceinline__inline void
1019__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
1020 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
1021 // Pointer to the allocated indirect lock is written to crit, while indexing
1022 // is ignored.
1023 void *idx;
1024 kmp_indirect_lock_t **lck;
1025 lck = (kmp_indirect_lock_t **)crit;
1026 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
1027 KMP_I_LOCK_FUNC(ilk, init)__kmp_indirect_init[((kmp_indirect_lock_t *)(ilk))->type](ilk->lock);
1028 KMP_SET_I_LOCK_LOCATION(ilk, loc){ if (__kmp_indirect_set_location[(ilk)->type] != __null) __kmp_indirect_set_location
[(ilk)->type]((ilk)->lock, loc); }
;
1029 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section){ if (__kmp_indirect_set_flags[(ilk)->type] != __null) __kmp_indirect_set_flags
[(ilk)->type]((ilk)->lock, 1); }
;
1030 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n"
, tag); }
1031 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n"
, tag); }
;
1032#if USE_ITT_BUILD1
1033 __kmp_itt_critical_creating(ilk->lock, loc);
1034#endif
1035 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk)__sync_bool_compare_and_swap((void *volatile *)(lck), (void *
)(nullptr), (void *)(ilk))
;
1036 if (status == 0) {
1037#if USE_ITT_BUILD1
1038 __kmp_itt_critical_destroyed(ilk->lock);
1039#endif
1040 // We don't really need to destroy the unclaimed lock here since it will be
1041 // cleaned up at program exit.
1042 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
1043 }
1044 KMP_DEBUG_ASSERT(*lck != NULL)if (!(*lck != __null)) { __kmp_debug_assert("*lck != __null",
"openmp/runtime/src/kmp_csupport.cpp", 1044); }
;
1045}
1046
1047// Fast-path acquire tas lock
1048#define KMP_ACQUIRE_TAS_LOCK(lock, gtid){ kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; kmp_int32 tas_free
= (locktag_tas); kmp_int32 tas_busy = ((gtid + 1) << 8
| locktag_tas); if ((&l->lk.poll)->load(std::memory_order_relaxed
) != tas_free || !__kmp_atomic_compare_store_acq(&l->lk
.poll, tas_free, tas_busy)) { kmp_uint32 spins; (!__kmp_itt_fsync_prepare_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)(l)); {
(spins) = __kmp_yield_init; }; kmp_backoff_t backoff = __kmp_spin_backoff_params
; do { if ((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
: __kmp_xproc)) { { __kmp_x86_pause(); if (((!0)) &&
(((__kmp_use_yield == 1) || (__kmp_use_yield == 2 &&
(((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
))))))) __kmp_yield(); }; } else { { __kmp_x86_pause(); if ((
(__kmp_use_yield == 1) || (__kmp_use_yield == 2 && ((
(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
)))))) { (spins) -= 2; if (!(spins)) { __kmp_yield(); (spins)
= __kmp_yield_next; } } }; } __kmp_spin_backoff(&backoff
); } while ( (&l->lk.poll)->load(std::memory_order_relaxed
) != tas_free || !__kmp_atomic_compare_store_acq(&l->lk
.poll, tas_free, tas_busy)); } (!__kmp_itt_fsync_acquired_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0((void *)(l));
}
\
1049 { \
1050 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1051 kmp_int32 tas_free = KMP_LOCK_FREE(tas)(locktag_tas); \
1052 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas)((gtid + 1) << 8 | locktag_tas); \
1053 if (KMP_ATOMIC_LD_RLX(&l->lk.poll)(&l->lk.poll)->load(std::memory_order_relaxed) != tas_free || \
1054 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1055 kmp_uint32 spins; \
1056 KMP_FSYNC_PREPARE(l)(!__kmp_itt_fsync_prepare_ptr__3_0) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0
((void *)(l))
; \
1057 KMP_INIT_YIELD(spins){ (spins) = __kmp_yield_init; }; \
1058 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1059 do { \
1060 if (TCR_4(__kmp_nth)(__kmp_nth) > \
1061 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1062 KMP_YIELD(TRUE){ __kmp_x86_pause(); if (((!0)) && (((__kmp_use_yield
== 1) || (__kmp_use_yield == 2 && (((__kmp_nth) >
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))))))) __kmp_yield
(); }
; \
1063 } else { \
1064 KMP_YIELD_SPIN(spins){ __kmp_x86_pause(); if (((__kmp_use_yield == 1) || (__kmp_use_yield
== 2 && (((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
: __kmp_xproc)))))) { (spins) -= 2; if (!(spins)) { __kmp_yield
(); (spins) = __kmp_yield_next; } } }
; \
1065 } \
1066 __kmp_spin_backoff(&backoff); \
1067 } while ( \
1068 KMP_ATOMIC_LD_RLX(&l->lk.poll)(&l->lk.poll)->load(std::memory_order_relaxed) != tas_free || \
1069 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1070 } \
1071 KMP_FSYNC_ACQUIRED(l)(!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)(l))
; \
1072 }
1073
1074// Fast-path test tas lock
1075#define KMP_TEST_TAS_LOCK(lock, gtid, rc){ kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; kmp_int32 tas_free
= (locktag_tas); kmp_int32 tas_busy = ((gtid + 1) << 8
| locktag_tas); rc = (&l->lk.poll)->load(std::memory_order_relaxed
) == tas_free && __kmp_atomic_compare_store_acq(&
l->lk.poll, tas_free, tas_busy); }
\
1076 { \
1077 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1078 kmp_int32 tas_free = KMP_LOCK_FREE(tas)(locktag_tas); \
1079 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas)((gtid + 1) << 8 | locktag_tas); \
1080 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll)(&l->lk.poll)->load(std::memory_order_relaxed) == tas_free && \
1081 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1082 }
1083
1084// Fast-path release tas lock
1085#define KMP_RELEASE_TAS_LOCK(lock, gtid){ (&((kmp_tas_lock_t *)lock)->lk.poll)->store((locktag_tas
), std::memory_order_release); }
\
1086 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas))(&((kmp_tas_lock_t *)lock)->lk.poll)->store((locktag_tas
), std::memory_order_release)
; }
1087
1088#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
1089
1090#include <sys/syscall.h>
1091#include <unistd.h>
1092#ifndef FUTEX_WAIT0
1093#define FUTEX_WAIT0 0
1094#endif
1095#ifndef FUTEX_WAKE1
1096#define FUTEX_WAKE1 1
1097#endif
1098
1099// Fast-path acquire futex lock
1100#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid){ kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; kmp_int32
gtid_code = (gtid + 1) << 1; ; (!__kmp_itt_fsync_prepare_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)(ftx))
; kmp_int32 poll_val; while ((poll_val = __sync_val_compare_and_swap
((volatile kmp_uint32 *)(&(ftx->lk.poll)), (kmp_uint32
)((locktag_futex)), (kmp_uint32)(((gtid_code) << 8 | locktag_futex
)))) != (locktag_futex)) { kmp_int32 cond = ((poll_val) >>
8) & 1; if (!cond) { if (!__sync_val_compare_and_swap((volatile
kmp_uint32 *)(&(ftx->lk.poll)), (kmp_uint32)(poll_val
), (kmp_uint32)(poll_val | ((1) << 8 | locktag_futex)))
) { continue; } poll_val |= ((1) << 8 | locktag_futex);
} kmp_int32 rc; if ((rc = syscall(202, &(ftx->lk.poll
), 0, poll_val, __null, __null, 0)) != 0) { continue; } gtid_code
|= 1; } (!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)(ftx)); }
\
1101 { \
1102 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1103 kmp_int32 gtid_code = (gtid + 1) << 1; \
1104 KMP_MB(); \
1105 KMP_FSYNC_PREPARE(ftx)(!__kmp_itt_fsync_prepare_ptr__3_0) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0
((void *)(ftx))
; \
1106 kmp_int32 poll_val; \
1107 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \__sync_val_compare_and_swap((volatile kmp_uint32 *)(&(ftx
->lk.poll)), (kmp_uint32)((locktag_futex)), (kmp_uint32)((
(gtid_code) << 8 | locktag_futex)))
1108 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \__sync_val_compare_and_swap((volatile kmp_uint32 *)(&(ftx
->lk.poll)), (kmp_uint32)((locktag_futex)), (kmp_uint32)((
(gtid_code) << 8 | locktag_futex)))
1109 KMP_LOCK_BUSY(gtid_code, futex))__sync_val_compare_and_swap((volatile kmp_uint32 *)(&(ftx
->lk.poll)), (kmp_uint32)((locktag_futex)), (kmp_uint32)((
(gtid_code) << 8 | locktag_futex)))
) != KMP_LOCK_FREE(futex)(locktag_futex)) { \
1110 kmp_int32 cond = KMP_LOCK_STRIP(poll_val)((poll_val) >> 8) & 1; \
1111 if (!cond) { \
1112 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \__sync_val_compare_and_swap((volatile kmp_uint32 *)(&(ftx
->lk.poll)), (kmp_uint32)(poll_val), (kmp_uint32)(poll_val
| ((1) << 8 | locktag_futex)))
1113 poll_val | \__sync_val_compare_and_swap((volatile kmp_uint32 *)(&(ftx
->lk.poll)), (kmp_uint32)(poll_val), (kmp_uint32)(poll_val
| ((1) << 8 | locktag_futex)))
1114 KMP_LOCK_BUSY(1, futex))__sync_val_compare_and_swap((volatile kmp_uint32 *)(&(ftx
->lk.poll)), (kmp_uint32)(poll_val), (kmp_uint32)(poll_val
| ((1) << 8 | locktag_futex)))
) { \
1115 continue; \
1116 } \
1117 poll_val |= KMP_LOCK_BUSY(1, futex)((1) << 8 | locktag_futex); \
1118 } \
1119 kmp_int32 rc; \
1120 if ((rc = syscall(__NR_futex202, &(ftx->lk.poll), FUTEX_WAIT0, poll_val, \
1121 NULL__null, NULL__null, 0)) != 0) { \
1122 continue; \
1123 } \
1124 gtid_code |= 1; \
1125 } \
1126 KMP_FSYNC_ACQUIRED(ftx)(!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)(ftx))
; \
1127 }
1128
1129// Fast-path test futex lock
1130#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc){ kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; if (__sync_bool_compare_and_swap
((volatile kmp_uint32 *)(&(ftx->lk.poll)), (kmp_uint32
)((locktag_futex)), (kmp_uint32)(((gtid + 1 << 1) <<
8 | locktag_futex)))) { (!__kmp_itt_fsync_acquired_ptr__3_0)
? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0((void *)(ftx))
; rc = (!0); } else { rc = 0; } }
\
1131 { \
1132 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1133 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(ftx
->lk.poll)), (kmp_uint32)((locktag_futex)), (kmp_uint32)((
(gtid + 1 << 1) << 8 | locktag_futex)))
1134 KMP_LOCK_BUSY(gtid + 1 << 1, futex))__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(ftx
->lk.poll)), (kmp_uint32)((locktag_futex)), (kmp_uint32)((
(gtid + 1 << 1) << 8 | locktag_futex)))
) { \
1135 KMP_FSYNC_ACQUIRED(ftx)(!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)(ftx))
; \
1136 rc = TRUE(!0); \
1137 } else { \
1138 rc = FALSE0; \
1139 } \
1140 }
1141
1142// Fast-path release futex lock
1143#define KMP_RELEASE_FUTEX_LOCK(lock, gtid){ kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; ; (!__kmp_itt_fsync_releasing_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_releasing_ptr__3_0((void *)(ftx
)); kmp_int32 poll_val = __sync_lock_test_and_set((volatile kmp_uint32
*)(&(ftx->lk.poll)), (kmp_uint32)((locktag_futex))); if
(((poll_val) >> 8) & 1) { syscall(202, &(ftx->
lk.poll), 1, ((1) << 8 | locktag_futex), __null, __null
, 0); } ; { __kmp_x86_pause(); if ((((__kmp_use_yield == 1 ||
__kmp_use_yield == 2) && (((__kmp_nth) > (__kmp_avail_proc
? __kmp_avail_proc : __kmp_xproc)))))) __kmp_yield(); }; }
\
1144 { \
1145 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1146 KMP_MB(); \
1147 KMP_FSYNC_RELEASING(ftx)(!__kmp_itt_fsync_releasing_ptr__3_0) ? (void)0 : __kmp_itt_fsync_releasing_ptr__3_0
((void *)(ftx))
; \
1148 kmp_int32 poll_val = \
1149 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex))__sync_lock_test_and_set((volatile kmp_uint32 *)(&(ftx->
lk.poll)), (kmp_uint32)((locktag_futex)))
; \
1150 if (KMP_LOCK_STRIP(poll_val)((poll_val) >> 8) & 1) { \
1151 syscall(__NR_futex202, &(ftx->lk.poll), FUTEX_WAKE1, \
1152 KMP_LOCK_BUSY(1, futex)((1) << 8 | locktag_futex), NULL__null, NULL__null, 0); \
1153 } \
1154 KMP_MB(); \
1155 KMP_YIELD_OVERSUB(){ __kmp_x86_pause(); if ((((__kmp_use_yield == 1 || __kmp_use_yield
== 2) && (((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
: __kmp_xproc)))))) __kmp_yield(); }
; \
1156 }
1157
1158#endif // KMP_USE_FUTEX
1159
1160#else // KMP_USE_DYNAMIC_LOCK
1161
1162static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1163 ident_t const *loc,
1164 kmp_int32 gtid) {
1165 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1166
1167 // Because of the double-check, the following load doesn't need to be volatile
1168 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp)((void *)(*lck_pp));
1169
1170 if (lck == NULL__null) {
1171 void *idx;
1172
1173 // Allocate & initialize the lock.
1174 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1175 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section1);
1176 __kmp_init_user_lock_with_checks(lck);
1177 __kmp_set_user_lock_location(lck, loc);
1178#if USE_ITT_BUILD1
1179 __kmp_itt_critical_creating(lck);
1180// __kmp_itt_critical_creating() should be called *before* the first usage
1181// of underlying lock. It is the only place where we can guarantee it. There
1182// are chances the lock will destroyed with no usage, but it is not a
1183// problem, because this is not real event seen by user but rather setting
1184// name for object (lock). See more details in kmp_itt.h.
1185#endif /* USE_ITT_BUILD */
1186
1187 // Use a cmpxchg instruction to slam the start of the critical section with
1188 // the lock pointer. If another thread beat us to it, deallocate the lock,
1189 // and use the lock that the other thread allocated.
1190 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck)__sync_bool_compare_and_swap((void *volatile *)(lck_pp), (void
*)(0), (void *)(lck))
;
1191
1192 if (status == 0) {
1193// Deallocate the lock and reload the value.
1194#if USE_ITT_BUILD1
1195 __kmp_itt_critical_destroyed(lck);
1196// Let ITT know the lock is destroyed and the same memory location may be reused
1197// for another purpose.
1198#endif /* USE_ITT_BUILD */
1199 __kmp_destroy_user_lock_with_checks(lck);
1200 __kmp_user_lock_free(&idx, gtid, lck);
1201 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp)((void *)(*lck_pp));
1202 KMP_DEBUG_ASSERT(lck != NULL)if (!(lck != __null)) { __kmp_debug_assert("lck != __null", "openmp/runtime/src/kmp_csupport.cpp"
, 1202); }
;
1203 }
1204 }
1205 return lck;
1206}
1207
1208#endif // KMP_USE_DYNAMIC_LOCK
1209
1210/*!
1211@ingroup WORK_SHARING
1212@param loc source location information.
1213@param global_tid global thread number.
1214@param crit identity of the critical section. This could be a pointer to a lock
1215associated with the critical section, or some other suitably unique value.
1216
1217Enter code protected by a `critical` construct.
1218This function blocks until the executing thread can enter the critical section.
1219*/
1220void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1221 kmp_critical_name *crit) {
1222#if KMP_USE_DYNAMIC_LOCK1
1223#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1224 OMPT_STORE_RETURN_ADDRESS(global_tid)OmptReturnAddressGuard ReturnAddressGuard{global_tid, __builtin_return_address
(0)};
;
1225#endif // OMPT_SUPPORT
1226 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1227#else
1228 KMP_COUNT_BLOCK(OMP_CRITICAL)((void)0);
1229#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1230 ompt_state_t prev_state = ompt_state_undefined;
1231 ompt_thread_info_t ti;
1232#endif
1233 kmp_user_lock_p lck;
1234
1235 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_critical: called T#%d\n"
, global_tid); }
;
1236 __kmp_assert_valid_gtid(global_tid);
1237
1238 // TODO: add THR_OVHD_STATE
1239
1240 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait)((void)0);
1241 KMP_CHECK_USER_LOCK_INIT();
1242
1243 if ((__kmp_user_lock_kind == lk_tas) &&
1244 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZEsizeof(void *))) {
1245 lck = (kmp_user_lock_p)crit;
1246 }
1247#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
1248 else if ((__kmp_user_lock_kind == lk_futex) &&
1249 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZEsizeof(void *))) {
1250 lck = (kmp_user_lock_p)crit;
1251 }
1252#endif
1253 else { // ticket, queuing or drdpa
1254 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1255 }
1256
1257 if (__kmp_env_consistency_check)
1258 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1259
1260 // since the critical directive binds to all threads, not just the current
1261 // team we have to check this even if we are in a serialized team.
1262 // also, even if we are the uber thread, we still have to conduct the lock,
1263 // as we have to contend with sibling threads.
1264
1265#if USE_ITT_BUILD1
1266 __kmp_itt_critical_acquiring(lck);
1267#endif /* USE_ITT_BUILD */
1268#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1269 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
1270 void *codeptr_ra = NULL__null;
1271 if (ompt_enabled.enabled) {
1272 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1273 /* OMPT state update */
1274 prev_state = ti.state;
1275 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1276 ti.state = ompt_state_wait_critical;
1277
1278 /* OMPT event callback */
1279 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
1280 if (ompt_enabled.ompt_callback_mutex_acquire) {
1281 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
1282 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1283 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1284 }
1285 }
1286#endif
1287 // Value of 'crit' should be good for using as a critical_id of the critical
1288 // section directive.
1289 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1290
1291#if USE_ITT_BUILD1
1292 __kmp_itt_critical_acquired(lck);
1293#endif /* USE_ITT_BUILD */
1294#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1295 if (ompt_enabled.enabled) {
1296 /* OMPT state update */
1297 ti.state = prev_state;
1298 ti.wait_id = 0;
1299
1300 /* OMPT event callback */
1301 if (ompt_enabled.ompt_callback_mutex_acquired) {
1302 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
1303 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1304 }
1305 }
1306#endif
1307 KMP_POP_PARTITIONED_TIMER()((void)0);
1308
1309 KMP_PUSH_PARTITIONED_TIMER(OMP_critical)((void)0);
1310 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmpc_critical: done T#%d\n"
, global_tid); }
;
1311#endif // KMP_USE_DYNAMIC_LOCK
1312}
1313
1314#if KMP_USE_DYNAMIC_LOCK1
1315
1316// Converts the given hint to an internal lock implementation
1317static __forceinline__inline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1318#if KMP_USE_TSX(0 || 1) && !0
1319#define KMP_TSX_LOCK(seq)lockseq_seq lockseq_##seq
1320#else
1321#define KMP_TSX_LOCK(seq)lockseq_seq __kmp_user_lock_seq
1322#endif
1323
1324#if KMP_ARCH_X860 || KMP_ARCH_X86_641
1325#define KMP_CPUINFO_RTM(__kmp_cpuinfo.flags.rtm) (__kmp_cpuinfo.flags.rtm)
1326#else
1327#define KMP_CPUINFO_RTM(__kmp_cpuinfo.flags.rtm) 0
1328#endif
1329
1330 // Hints that do not require further logic
1331 if (hint & kmp_lock_hint_hle)
1332 return KMP_TSX_LOCK(hle)lockseq_hle;
1333 if (hint & kmp_lock_hint_rtm)
1334 return KMP_CPUINFO_RTM(__kmp_cpuinfo.flags.rtm) ? KMP_TSX_LOCK(rtm_queuing)lockseq_rtm_queuing : __kmp_user_lock_seq;
1335 if (hint & kmp_lock_hint_adaptive)
1336 return KMP_CPUINFO_RTM(__kmp_cpuinfo.flags.rtm) ? KMP_TSX_LOCK(adaptive)lockseq_adaptive : __kmp_user_lock_seq;
1337
1338 // Rule out conflicting hints first by returning the default lock
1339 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1340 return __kmp_user_lock_seq;
1341 if ((hint & omp_lock_hint_speculative) &&
1342 (hint & omp_lock_hint_nonspeculative))
1343 return __kmp_user_lock_seq;
1344
1345 // Do not even consider speculation when it appears to be contended
1346 if (hint & omp_lock_hint_contended)
1347 return lockseq_queuing;
1348
1349 // Uncontended lock without speculation
1350 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1351 return lockseq_tas;
1352
1353 // Use RTM lock for speculation
1354 if (hint & omp_lock_hint_speculative)
1355 return KMP_CPUINFO_RTM(__kmp_cpuinfo.flags.rtm) ? KMP_TSX_LOCK(rtm_spin)lockseq_rtm_spin : __kmp_user_lock_seq;
1356
1357 return __kmp_user_lock_seq;
1358}
1359
1360#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1361#if KMP_USE_DYNAMIC_LOCK1
1362static kmp_mutex_impl_t
1363__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1364 if (user_lock) {
1365 switch (KMP_EXTRACT_D_TAG(user_lock)(*((kmp_dyna_lock_t *)(user_lock)) & ((1 << 8) - 1)
& -(*((kmp_dyna_lock_t *)(user_lock)) & 1))
) {
1366 case 0:
1367 break;
1368#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
1369 case locktag_futex:
1370 return kmp_mutex_impl_queuing;
1371#endif
1372 case locktag_tas:
1373 return kmp_mutex_impl_spin;
1374#if KMP_USE_TSX(0 || 1) && !0
1375 case locktag_hle:
1376 case locktag_rtm_spin:
1377 return kmp_mutex_impl_speculative;
1378#endif
1379 default:
1380 return kmp_mutex_impl_none;
1381 }
1382 ilock = KMP_LOOKUP_I_LOCK(user_lock)((sizeof(int) < sizeof(void *)) ? __kmp_get_i_lock((*(kmp_lock_index_t
*)(user_lock) >> 1)) : *((kmp_indirect_lock_t **)(user_lock
)))
;
1383 }
1384 KMP_ASSERT(ilock)if (!(ilock)) { __kmp_debug_assert("ilock", "openmp/runtime/src/kmp_csupport.cpp"
, 1384); }
;
1385 switch (ilock->type) {
1386#if KMP_USE_TSX(0 || 1) && !0
1387 case locktag_adaptive:
1388 case locktag_rtm_queuing:
1389 return kmp_mutex_impl_speculative;
1390#endif
1391 case locktag_nested_tas:
1392 return kmp_mutex_impl_spin;
1393#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
1394 case locktag_nested_futex:
1395#endif
1396 case locktag_ticket:
1397 case locktag_queuing:
1398 case locktag_drdpa:
1399 case locktag_nested_ticket:
1400 case locktag_nested_queuing:
1401 case locktag_nested_drdpa:
1402 return kmp_mutex_impl_queuing;
1403 default:
1404 return kmp_mutex_impl_none;
1405 }
1406}
1407#else
1408// For locks without dynamic binding
1409static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1410 switch (__kmp_user_lock_kind) {
1411 case lk_tas:
1412 return kmp_mutex_impl_spin;
1413#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
1414 case lk_futex:
1415#endif
1416 case lk_ticket:
1417 case lk_queuing:
1418 case lk_drdpa:
1419 return kmp_mutex_impl_queuing;
1420#if KMP_USE_TSX(0 || 1) && !0
1421 case lk_hle:
1422 case lk_rtm_queuing:
1423 case lk_rtm_spin:
1424 case lk_adaptive:
1425 return kmp_mutex_impl_speculative;
1426#endif
1427 default:
1428 return kmp_mutex_impl_none;
1429 }
1430}
1431#endif // KMP_USE_DYNAMIC_LOCK
1432#endif // OMPT_SUPPORT && OMPT_OPTIONAL
1433
1434/*!
1435@ingroup WORK_SHARING
1436@param loc source location information.
1437@param global_tid global thread number.
1438@param crit identity of the critical section. This could be a pointer to a lock
1439associated with the critical section, or some other suitably unique value.
1440@param hint the lock hint.
1441
1442Enter code protected by a `critical` construct with a hint. The hint value is
1443used to suggest a lock implementation. This function blocks until the executing
1444thread can enter the critical section unless the hint suggests use of
1445speculative execution and the hardware supports it.
1446*/
1447void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1448 kmp_critical_name *crit, uint32_t hint) {
1449 KMP_COUNT_BLOCK(OMP_CRITICAL)((void)0);
1450 kmp_user_lock_p lck;
1451#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1452 ompt_state_t prev_state = ompt_state_undefined;
1453 ompt_thread_info_t ti;
1454 // This is the case, if called from __kmpc_critical:
1455 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid)__ompt_load_return_address(global_tid);
1456 if (!codeptr)
1457 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
1458#endif
1459
1460 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_critical: called T#%d\n"
, global_tid); }
;
1461 __kmp_assert_valid_gtid(global_tid);
1462
1463 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1464 // Check if it is initialized.
1465 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait)((void)0);
1466 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
1467 if (*lk == 0) {
1468 if (KMP_IS_D_LOCK(lockseq)((lockseq) >= lockseq_tas && (lockseq) <= lockseq_rtm_spin
)
) {
1469 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,__sync_bool_compare_and_swap((volatile kmp_uint32 *)((volatile
kmp_int32 *)crit), (kmp_uint32)(0), (kmp_uint32)(((lockseq) <<
1 | 1)))
1470 KMP_GET_D_TAG(lockseq))__sync_bool_compare_and_swap((volatile kmp_uint32 *)((volatile
kmp_int32 *)crit), (kmp_uint32)(0), (kmp_uint32)(((lockseq) <<
1 | 1)))
;
1471 } else {
1472 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq)(kmp_indirect_locktag_t)((lockseq)-lockseq_ticket));
1473 }
1474 }
1475 // Branch for accessing the actual lock object and set operation. This
1476 // branching is inevitable since this lock initialization does not follow the
1477 // normal dispatch path (lock table is not used).
1478 if (KMP_EXTRACT_D_TAG(lk)(*((kmp_dyna_lock_t *)(lk)) & ((1 << 8) - 1) & -
(*((kmp_dyna_lock_t *)(lk)) & 1))
!= 0) {
1479 lck = (kmp_user_lock_p)lk;
1480 if (__kmp_env_consistency_check) {
1481 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1482 __kmp_map_hint_to_lock(hint));
1483 }
1484#if USE_ITT_BUILD1
1485 __kmp_itt_critical_acquiring(lck);
1486#endif
1487#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1488 if (ompt_enabled.enabled) {
1489 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1490 /* OMPT state update */
1491 prev_state = ti.state;
1492 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1493 ti.state = ompt_state_wait_critical;
1494
1495 /* OMPT event callback */
1496 if (ompt_enabled.ompt_callback_mutex_acquire) {
1497 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
1498 ompt_mutex_critical, (unsigned int)hint,
1499 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1500 codeptr);
1501 }
1502 }
1503#endif
1504#if KMP_USE_INLINED_TAS(1 && (0 || 1 || KMP_ARCH_ARM)) && 1
1505 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
1506 KMP_ACQUIRE_TAS_LOCK(lck, global_tid){ kmp_tas_lock_t *l = (kmp_tas_lock_t *)lck; kmp_int32 tas_free
= (locktag_tas); kmp_int32 tas_busy = ((global_tid + 1) <<
8 | locktag_tas); if ((&l->lk.poll)->load(std::memory_order_relaxed
) != tas_free || !__kmp_atomic_compare_store_acq(&l->lk
.poll, tas_free, tas_busy)) { kmp_uint32 spins; (!__kmp_itt_fsync_prepare_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)(l)); {
(spins) = __kmp_yield_init; }; kmp_backoff_t backoff = __kmp_spin_backoff_params
; do { if ((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
: __kmp_xproc)) { { __kmp_x86_pause(); if (((!0)) &&
(((__kmp_use_yield == 1) || (__kmp_use_yield == 2 &&
(((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
))))))) __kmp_yield(); }; } else { { __kmp_x86_pause(); if ((
(__kmp_use_yield == 1) || (__kmp_use_yield == 2 && ((
(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
)))))) { (spins) -= 2; if (!(spins)) { __kmp_yield(); (spins)
= __kmp_yield_next; } } }; } __kmp_spin_backoff(&backoff
); } while ( (&l->lk.poll)->load(std::memory_order_relaxed
) != tas_free || !__kmp_atomic_compare_store_acq(&l->lk
.poll, tas_free, tas_busy)); } (!__kmp_itt_fsync_acquired_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0((void *)(l));
}
;
1507 } else
1508#elif KMP_USE_INLINED_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0)) && 0
1509 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
1510 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid){ kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lck; kmp_int32 gtid_code
= (global_tid + 1) << 1; ; (!__kmp_itt_fsync_prepare_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)(ftx))
; kmp_int32 poll_val; while ((poll_val = __sync_val_compare_and_swap
((volatile kmp_uint32 *)(&(ftx->lk.poll)), (kmp_uint32
)((locktag_futex)), (kmp_uint32)(((gtid_code) << 8 | locktag_futex
)))) != (locktag_futex)) { kmp_int32 cond = ((poll_val) >>
8) & 1; if (!cond) { if (!__sync_val_compare_and_swap((volatile
kmp_uint32 *)(&(ftx->lk.poll)), (kmp_uint32)(poll_val
), (kmp_uint32)(poll_val | ((1) << 8 | locktag_futex)))
) { continue; } poll_val |= ((1) << 8 | locktag_futex);
} kmp_int32 rc; if ((rc = syscall(202, &(ftx->lk.poll
), 0, poll_val, __null, __null, 0)) != 0) { continue; } gtid_code
|= 1; } (!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)(ftx)); }
;
1511 } else
1512#endif
1513 {
1514 KMP_D_LOCK_FUNC(lk, set)__kmp_direct_set[(*((kmp_dyna_lock_t *)(lk)) & ((1 <<
8) - 1) & -(*((kmp_dyna_lock_t *)(lk)) & 1))]
(lk, global_tid);
1515 }
1516 } else {
1517 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1518 lck = ilk->lock;
1519 if (__kmp_env_consistency_check) {
1520 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1521 __kmp_map_hint_to_lock(hint));
1522 }
1523#if USE_ITT_BUILD1
1524 __kmp_itt_critical_acquiring(lck);
1525#endif
1526#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1527 if (ompt_enabled.enabled) {
1528 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1529 /* OMPT state update */
1530 prev_state = ti.state;
1531 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1532 ti.state = ompt_state_wait_critical;
1533
1534 /* OMPT event callback */
1535 if (ompt_enabled.ompt_callback_mutex_acquire) {
1536 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
1537 ompt_mutex_critical, (unsigned int)hint,
1538 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1539 codeptr);
1540 }
1541 }
1542#endif
1543 KMP_I_LOCK_FUNC(ilk, set)__kmp_indirect_set[((kmp_indirect_lock_t *)(ilk))->type](lck, global_tid);
1544 }
1545 KMP_POP_PARTITIONED_TIMER()((void)0);
1546
1547#if USE_ITT_BUILD1
1548 __kmp_itt_critical_acquired(lck);
1549#endif /* USE_ITT_BUILD */
1550#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1551 if (ompt_enabled.enabled) {
1552 /* OMPT state update */
1553 ti.state = prev_state;
1554 ti.wait_id = 0;
1555
1556 /* OMPT event callback */
1557 if (ompt_enabled.ompt_callback_mutex_acquired) {
1558 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
1559 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1560 }
1561 }
1562#endif
1563
1564 KMP_PUSH_PARTITIONED_TIMER(OMP_critical)((void)0);
1565 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmpc_critical: done T#%d\n"
, global_tid); }
;
1566} // __kmpc_critical_with_hint
1567
1568#endif // KMP_USE_DYNAMIC_LOCK
1569
1570/*!
1571@ingroup WORK_SHARING
1572@param loc source location information.
1573@param global_tid global thread number .
1574@param crit identity of the critical section. This could be a pointer to a lock
1575associated with the critical section, or some other suitably unique value.
1576
1577Leave a critical section, releasing any lock that was held during its execution.
1578*/
1579void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1580 kmp_critical_name *crit) {
1581 kmp_user_lock_p lck;
1582
1583 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_end_critical: called T#%d\n"
, global_tid); }
;
1584
1585#if KMP_USE_DYNAMIC_LOCK1
1586 int locktag = KMP_EXTRACT_D_TAG(crit)(*((kmp_dyna_lock_t *)(crit)) & ((1 << 8) - 1) &
-(*((kmp_dyna_lock_t *)(crit)) & 1))
;
1587 if (locktag) {
1588 lck = (kmp_user_lock_p)crit;
1589 KMP_ASSERT(lck != NULL)if (!(lck != __null)) { __kmp_debug_assert("lck != NULL", "openmp/runtime/src/kmp_csupport.cpp"
, 1589); }
;
1590 if (__kmp_env_consistency_check) {
1591 __kmp_pop_sync(global_tid, ct_critical, loc);
1592 }
1593#if USE_ITT_BUILD1
1594 __kmp_itt_critical_releasing(lck);
1595#endif
1596#if KMP_USE_INLINED_TAS(1 && (0 || 1 || KMP_ARCH_ARM)) && 1
1597 if (locktag == locktag_tas && !__kmp_env_consistency_check) {
1598 KMP_RELEASE_TAS_LOCK(lck, global_tid){ (&((kmp_tas_lock_t *)lck)->lk.poll)->store((locktag_tas
), std::memory_order_release); }
;
1599 } else
1600#elif KMP_USE_INLINED_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0)) && 0
1601 if (locktag == locktag_futex && !__kmp_env_consistency_check) {
1602 KMP_RELEASE_FUTEX_LOCK(lck, global_tid){ kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lck; ; (!__kmp_itt_fsync_releasing_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_releasing_ptr__3_0((void *)(ftx
)); kmp_int32 poll_val = __sync_lock_test_and_set((volatile kmp_uint32
*)(&(ftx->lk.poll)), (kmp_uint32)((locktag_futex))); if
(((poll_val) >> 8) & 1) { syscall(202, &(ftx->
lk.poll), 1, ((1) << 8 | locktag_futex), __null, __null
, 0); } ; { __kmp_x86_pause(); if ((((__kmp_use_yield == 1 ||
__kmp_use_yield == 2) && (((__kmp_nth) > (__kmp_avail_proc
? __kmp_avail_proc : __kmp_xproc)))))) __kmp_yield(); }; }
;
1603 } else
1604#endif
1605 {
1606 KMP_D_LOCK_FUNC(lck, unset)__kmp_direct_unset[(*((kmp_dyna_lock_t *)(lck)) & ((1 <<
8) - 1) & -(*((kmp_dyna_lock_t *)(lck)) & 1))]
((kmp_dyna_lock_t *)lck, global_tid);
1607 }
1608 } else {
1609 kmp_indirect_lock_t *ilk =
1610 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit))((void *)(*((kmp_indirect_lock_t **)crit)));
1611 KMP_ASSERT(ilk != NULL)if (!(ilk != __null)) { __kmp_debug_assert("ilk != NULL", "openmp/runtime/src/kmp_csupport.cpp"
, 1611); }
;
1612 lck = ilk->lock;
1613 if (__kmp_env_consistency_check) {
1614 __kmp_pop_sync(global_tid, ct_critical, loc);
1615 }
1616#if USE_ITT_BUILD1
1617 __kmp_itt_critical_releasing(lck);
1618#endif
1619 KMP_I_LOCK_FUNC(ilk, unset)__kmp_indirect_unset[((kmp_indirect_lock_t *)(ilk))->type](lck, global_tid);
1620 }
1621
1622#else // KMP_USE_DYNAMIC_LOCK
1623
1624 if ((__kmp_user_lock_kind == lk_tas) &&
1625 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZEsizeof(void *))) {
1626 lck = (kmp_user_lock_p)crit;
1627 }
1628#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
1629 else if ((__kmp_user_lock_kind == lk_futex) &&
1630 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZEsizeof(void *))) {
1631 lck = (kmp_user_lock_p)crit;
1632 }
1633#endif
1634 else { // ticket, queuing or drdpa
1635 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit))((void *)(*((kmp_user_lock_p *)crit)));
1636 }
1637
1638 KMP_ASSERT(lck != NULL)if (!(lck != __null)) { __kmp_debug_assert("lck != NULL", "openmp/runtime/src/kmp_csupport.cpp"
, 1638); }
;
1639
1640 if (__kmp_env_consistency_check)
1641 __kmp_pop_sync(global_tid, ct_critical, loc);
1642
1643#if USE_ITT_BUILD1
1644 __kmp_itt_critical_releasing(lck);
1645#endif /* USE_ITT_BUILD */
1646 // Value of 'crit' should be good for using as a critical_id of the critical
1647 // section directive.
1648 __kmp_release_user_lock_with_checks(lck, global_tid);
1649
1650#endif // KMP_USE_DYNAMIC_LOCK
1651
1652#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1653 /* OMPT release event triggers after lock is released; place here to trigger
1654 * for all #if branches */
1655 OMPT_STORE_RETURN_ADDRESS(global_tid)OmptReturnAddressGuard ReturnAddressGuard{global_tid, __builtin_return_address
(0)};
;
1656 if (ompt_enabled.ompt_callback_mutex_released) {
1657 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)ompt_callback_mutex_released_callback(
1658 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1659 OMPT_LOAD_RETURN_ADDRESS(0)__ompt_load_return_address(0));
1660 }
1661#endif
1662
1663 KMP_POP_PARTITIONED_TIMER()((void)0);
1664 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmpc_end_critical: done T#%d\n"
, global_tid); }
;
1665}
1666
1667/*!
1668@ingroup SYNCHRONIZATION
1669@param loc source location information
1670@param global_tid thread id.
1671@return one if the thread should execute the master block, zero otherwise
1672
1673Start execution of a combined barrier and master. The barrier is executed inside
1674this function.
1675*/
1676kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1677 int status;
1678 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_barrier_master: called T#%d\n"
, global_tid); }
;
1679 __kmp_assert_valid_gtid(global_tid);
1680
1681 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
1682 __kmp_parallel_initialize();
1683
1684 __kmp_resume_if_soft_paused();
1685
1686 if (__kmp_env_consistency_check)
1687 __kmp_check_barrier(global_tid, ct_barrier, loc);
1688
1689#if OMPT_SUPPORT1
1690 ompt_frame_t *ompt_frame;
1691 if (ompt_enabled.enabled) {
1692 __ompt_get_task_info_internal(0, NULL__null, NULL__null, &ompt_frame, NULL__null, NULL__null);
1693 if (ompt_frame->enter_frame.ptr == NULL__null)
1694 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
1695 }
1696 OMPT_STORE_RETURN_ADDRESS(global_tid)OmptReturnAddressGuard ReturnAddressGuard{global_tid, __builtin_return_address
(0)};
;
1697#endif
1698#if USE_ITT_NOTIFY1
1699 __kmp_threads[global_tid]->th.th_ident = loc;
1700#endif
1701 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE(!0), 0, NULL__null, NULL__null);
1702#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1703 if (ompt_enabled.enabled) {
1704 ompt_frame->enter_frame = ompt_data_none{0};
1705 }
1706#endif
1707
1708 return (status != 0) ? 0 : 1;
1709}
1710
1711/*!
1712@ingroup SYNCHRONIZATION
1713@param loc source location information
1714@param global_tid thread id.
1715
1716Complete the execution of a combined barrier and master. This function should
1717only be called at the completion of the <tt>master</tt> code. Other threads will
1718still be waiting at the barrier and this call releases them.
1719*/
1720void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1721 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_end_barrier_master: called T#%d\n"
, global_tid); }
;
1722 __kmp_assert_valid_gtid(global_tid);
1723 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1724}
1725
1726/*!
1727@ingroup SYNCHRONIZATION
1728@param loc source location information
1729@param global_tid thread id.
1730@return one if the thread should execute the master block, zero otherwise
1731
1732Start execution of a combined barrier and master(nowait) construct.
1733The barrier is executed inside this function.
1734There is no equivalent "end" function, since the
1735*/
1736kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1737 kmp_int32 ret;
1738 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_barrier_master_nowait: called T#%d\n"
, global_tid); }
;
1
Assuming 'kmp_c_debug' is < 10
2
Taking false branch
1739 __kmp_assert_valid_gtid(global_tid);
1740
1741 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
3
Assuming '__kmp_init_parallel' is not equal to 0
4
Taking false branch
1742 __kmp_parallel_initialize();
1743
1744 __kmp_resume_if_soft_paused();
1745
1746 if (__kmp_env_consistency_check) {
5
Assuming '__kmp_env_consistency_check' is 0
6
Taking false branch
1747 if (loc == 0) {
1748 KMP_WARNING(ConstructIdentInvalid)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_ConstructIdentInvalid
), __kmp_msg_null)
; // ??? What does it mean for the user?
1749 }
1750 __kmp_check_barrier(global_tid, ct_barrier, loc);
1751 }
1752
1753#if OMPT_SUPPORT1
1754 ompt_frame_t *ompt_frame;
7
'ompt_frame' declared without an initial value
1755 if (ompt_enabled.enabled) {
8
Assuming field 'enabled' is 0
9
Taking false branch
1756 __ompt_get_task_info_internal(0, NULL__null, NULL__null, &ompt_frame, NULL__null, NULL__null);
1757 if (ompt_frame->enter_frame.ptr == NULL__null)
1758 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
1759 }
1760 OMPT_STORE_RETURN_ADDRESS(global_tid)OmptReturnAddressGuard ReturnAddressGuard{global_tid, __builtin_return_address
(0)};
;
1761#endif
1762#if USE_ITT_NOTIFY1
1763 __kmp_threads[global_tid]->th.th_ident = loc;
1764#endif
1765 __kmp_barrier(bs_plain_barrier, global_tid, FALSE0, 0, NULL__null, NULL__null);
1766#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1767 if (ompt_enabled.enabled) {
10
Assuming field 'enabled' is not equal to 0
11
Taking true branch
1768 ompt_frame->enter_frame = ompt_data_none{0};
12
Called C++ object pointer is uninitialized
1769 }
1770#endif
1771
1772 ret = __kmpc_master(loc, global_tid);
1773
1774 if (__kmp_env_consistency_check) {
1775 /* there's no __kmpc_end_master called; so the (stats) */
1776 /* actions of __kmpc_end_master are done here */
1777 if (ret) {
1778 /* only one thread should do the pop since only */
1779 /* one did the push (see __kmpc_master()) */
1780 __kmp_pop_sync(global_tid, ct_master, loc);
1781 }
1782 }
1783
1784 return (ret);
1785}
1786
1787/* The BARRIER for a SINGLE process section is always explicit */
1788/*!
1789@ingroup WORK_SHARING
1790@param loc source location information
1791@param global_tid global thread number
1792@return One if this thread should execute the single construct, zero otherwise.
1793
1794Test whether to execute a <tt>single</tt> construct.
1795There are no implicit barriers in the two "single" calls, rather the compiler
1796should introduce an explicit barrier if it is required.
1797*/
1798
1799kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1800 __kmp_assert_valid_gtid(global_tid);
1801 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE(!0));
1802
1803 if (rc) {
1804 // We are going to execute the single statement, so we should count it.
1805 KMP_COUNT_BLOCK(OMP_SINGLE)((void)0);
1806 KMP_PUSH_PARTITIONED_TIMER(OMP_single)((void)0);
1807 }
1808
1809#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1810 kmp_info_t *this_thr = __kmp_threads[global_tid];
1811 kmp_team_t *team = this_thr->th.th_team;
1812 int tid = __kmp_tid_from_gtid(global_tid);
1813
1814 if (ompt_enabled.enabled) {
1815 if (rc) {
1816 if (ompt_enabled.ompt_callback_work) {
1817 ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback(
1818 ompt_work_single_executor, ompt_scope_begin,
1819 &(team->t.ompt_team_info.parallel_data),
1820 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1821 1, OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
1822 }
1823 } else {
1824 if (ompt_enabled.ompt_callback_work) {
1825 ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback(
1826 ompt_work_single_other, ompt_scope_begin,
1827 &(team->t.ompt_team_info.parallel_data),
1828 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1829 1, OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
1830 ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback(
1831 ompt_work_single_other, ompt_scope_end,
1832 &(team->t.ompt_team_info.parallel_data),
1833 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1834 1, OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
1835 }
1836 }
1837 }
1838#endif
1839
1840 return rc;
1841}
1842
1843/*!
1844@ingroup WORK_SHARING
1845@param loc source location information
1846@param global_tid global thread number
1847
1848Mark the end of a <tt>single</tt> construct. This function should
1849only be called by the thread that executed the block of code protected
1850by the `single` construct.
1851*/
1852void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1853 __kmp_assert_valid_gtid(global_tid);
1854 __kmp_exit_single(global_tid);
1855 KMP_POP_PARTITIONED_TIMER()((void)0);
1856
1857#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1858 kmp_info_t *this_thr = __kmp_threads[global_tid];
1859 kmp_team_t *team = this_thr->th.th_team;
1860 int tid = __kmp_tid_from_gtid(global_tid);
1861
1862 if (ompt_enabled.ompt_callback_work) {
1863 ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback(
1864 ompt_work_single_executor, ompt_scope_end,
1865 &(team->t.ompt_team_info.parallel_data),
1866 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1867 OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
1868 }
1869#endif
1870}
1871
1872/*!
1873@ingroup WORK_SHARING
1874@param loc Source location
1875@param global_tid Global thread id
1876
1877Mark the end of a statically scheduled loop.
1878*/
1879void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1880 KMP_POP_PARTITIONED_TIMER()((void)0);
1881 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_for_static_fini called T#%d\n"
, global_tid); }
;
1882
1883#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1884 if (ompt_enabled.ompt_callback_work) {
1885 ompt_work_t ompt_work_type = ompt_work_loop;
1886 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL__null);
1887 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1888 // Determine workshare type
1889 if (loc != NULL__null) {
1890 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1891 ompt_work_type = ompt_work_loop;
1892 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1893 ompt_work_type = ompt_work_sections;
1894 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1895 ompt_work_type = ompt_work_distribute;
1896 } else {
1897 // use default set above.
1898 // a warning about this case is provided in __kmpc_for_static_init
1899 }
1900 KMP_DEBUG_ASSERT(ompt_work_type)if (!(ompt_work_type)) { __kmp_debug_assert("ompt_work_type",
"openmp/runtime/src/kmp_csupport.cpp", 1900); }
;
1901 }
1902 ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback(
1903 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1904 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
1905 }
1906#endif
1907 if (__kmp_env_consistency_check)
1908 __kmp_pop_workshare(global_tid, ct_pdo, loc);
1909}
1910
1911// User routines which take C-style arguments (call by value)
1912// different from the Fortran equivalent routines
1913
1914void ompc_set_num_threads(int arg) {
1915 // !!!!! TODO: check the per-task binding
1916 __kmp_set_num_threads(arg, __kmp_entry_gtid()__kmp_get_global_thread_id_reg());
1917}
1918
1919void ompc_set_dynamic(int flag) {
1920 kmp_info_t *thread;
1921
1922 /* For the thread-private implementation of the internal controls */
1923 thread = __kmp_entry_thread();
1924
1925 __kmp_save_internal_controls(thread);
1926
1927 set__dynamic(thread, flag ? true : false)(((thread)->th.th_current_task->td_icvs.dynamic) = (flag
? true : false))
;
1928}
1929
1930void ompc_set_nested(int flag) {
1931 kmp_info_t *thread;
1932
1933 /* For the thread-private internal controls implementation */
1934 thread = __kmp_entry_thread();
1935
1936 __kmp_save_internal_controls(thread);
1937
1938 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1)(((thread)->th.th_current_task->td_icvs.max_active_levels
) = (flag ? __kmp_dflt_max_active_levels : 1))
;
1939}
1940
1941void ompc_set_max_active_levels(int max_active_levels) {
1942 /* TO DO */
1943 /* we want per-task implementation of this internal control */
1944
1945 /* For the per-thread internal controls implementation */
1946 __kmp_set_max_active_levels(__kmp_entry_gtid()__kmp_get_global_thread_id_reg(), max_active_levels);
1947}
1948
1949void ompc_set_schedule(omp_sched_t kind, int modifier) {
1950 // !!!!! TODO: check the per-task binding
1951 __kmp_set_schedule(__kmp_entry_gtid()__kmp_get_global_thread_id_reg(), (kmp_sched_t)kind, modifier);
1952}
1953
1954int ompc_get_ancestor_thread_num(int level) {
1955 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid()__kmp_get_global_thread_id_reg(), level);
1956}
1957
1958int ompc_get_team_size(int level) {
1959 return __kmp_get_team_size(__kmp_entry_gtid()__kmp_get_global_thread_id_reg(), level);
1960}
1961
1962/* OpenMP 5.0 Affinity Format API */
1963void KMP_EXPAND_NAME(ompc_set_affinity_format)__kmp_api_ompc_set_affinity_format(char const *format) {
1964 if (!__kmp_init_serial) {
1965 __kmp_serial_initialize();
1966 }
1967 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1968 format, KMP_STRLENstrlen(format) + 1);
1969}
1970
1971size_t KMP_EXPAND_NAME(ompc_get_affinity_format)__kmp_api_ompc_get_affinity_format(char *buffer, size_t size) {
1972 size_t format_size;
1973 if (!__kmp_init_serial) {
1974 __kmp_serial_initialize();
1975 }
1976 format_size = KMP_STRLENstrlen(__kmp_affinity_format);
1977 if (buffer && size) {
1978 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1979 format_size + 1);
1980 }
1981 return format_size;
1982}
1983
1984void KMP_EXPAND_NAME(ompc_display_affinity)__kmp_api_ompc_display_affinity(char const *format) {
1985 int gtid;
1986 if (!TCR_4(__kmp_init_middle)(__kmp_init_middle)) {
1987 __kmp_middle_initialize();
1988 }
1989 __kmp_assign_root_init_mask();
1990 gtid = __kmp_get_gtid()__kmp_get_global_thread_id();
1991#if KMP_AFFINITY_SUPPORTED1
1992 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
1993 __kmp_affinity.flags.reset) {
1994 __kmp_reset_root_init_mask(gtid);
1995 }
1996#endif
1997 __kmp_aux_display_affinity(gtid, format);
1998}
1999
2000size_t KMP_EXPAND_NAME(ompc_capture_affinity)__kmp_api_ompc_capture_affinity(char *buffer, size_t buf_size,
2001 char const *format) {
2002 int gtid;
2003 size_t num_required;
2004 kmp_str_buf_t capture_buf;
2005 if (!TCR_4(__kmp_init_middle)(__kmp_init_middle)) {
2006 __kmp_middle_initialize();
2007 }
2008 __kmp_assign_root_init_mask();
2009 gtid = __kmp_get_gtid()__kmp_get_global_thread_id();
2010#if KMP_AFFINITY_SUPPORTED1
2011 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2012 __kmp_affinity.flags.reset) {
2013 __kmp_reset_root_init_mask(gtid);
2014 }
2015#endif
2016 __kmp_str_buf_init(&capture_buf){ (&capture_buf)->str = (&capture_buf)->bulk; (
&capture_buf)->size = sizeof((&capture_buf)->bulk
); (&capture_buf)->used = 0; (&capture_buf)->bulk
[0] = 0; }
;
2017 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
2018 if (buffer && buf_size) {
2019 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
2020 capture_buf.used + 1);
2021 }
2022 __kmp_str_buf_free(&capture_buf);
2023 return num_required;
2024}
2025
2026void kmpc_set_stacksize(int arg) {
2027 // __kmp_aux_set_stacksize initializes the library if needed
2028 __kmp_aux_set_stacksize(arg);
2029}
2030
2031void kmpc_set_stacksize_s(size_t arg) {
2032 // __kmp_aux_set_stacksize initializes the library if needed
2033 __kmp_aux_set_stacksize(arg);
2034}
2035
2036void kmpc_set_blocktime(int arg) {
2037 int gtid, tid;
2038 kmp_info_t *thread;
2039
2040 gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
2041 tid = __kmp_tid_from_gtid(gtid);
2042 thread = __kmp_thread_from_gtid(gtid);
2043
2044 __kmp_aux_set_blocktime(arg, thread, tid);
2045}
2046
2047void kmpc_set_library(int arg) {
2048 // __kmp_user_set_library initializes the library if needed
2049 __kmp_user_set_library((enum library_type)arg);
2050}
2051
2052void kmpc_set_defaults(char const *str) {
2053 // __kmp_aux_set_defaults initializes the library if needed
2054 __kmp_aux_set_defaults(str, KMP_STRLENstrlen(str));
2055}
2056
2057void kmpc_set_disp_num_buffers(int arg) {
2058 // ignore after initialization because some teams have already
2059 // allocated dispatch buffers
2060 if (__kmp_init_serial == FALSE0 && arg >= KMP_MIN_DISP_NUM_BUFF1 &&
2061 arg <= KMP_MAX_DISP_NUM_BUFF4096) {
2062 __kmp_dispatch_num_buffers = arg;
2063 }
2064}
2065
2066int kmpc_set_affinity_mask_proc(int proc, void **mask) {
2067#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED1
2068 return -1;
2069#else
2070 if (!TCR_4(__kmp_init_middle)(__kmp_init_middle)) {
2071 __kmp_middle_initialize();
2072 }
2073 __kmp_assign_root_init_mask();
2074 return __kmp_aux_set_affinity_mask_proc(proc, mask);
2075#endif
2076}
2077
2078int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
2079#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED1
2080 return -1;
2081#else
2082 if (!TCR_4(__kmp_init_middle)(__kmp_init_middle)) {
2083 __kmp_middle_initialize();
2084 }
2085 __kmp_assign_root_init_mask();
2086 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2087#endif
2088}
2089
2090int kmpc_get_affinity_mask_proc(int proc, void **mask) {
2091#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED1
2092 return -1;
2093#else
2094 if (!TCR_4(__kmp_init_middle)(__kmp_init_middle)) {
2095 __kmp_middle_initialize();
2096 }
2097 __kmp_assign_root_init_mask();
2098 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2099#endif
2100}
2101
2102/* -------------------------------------------------------------------------- */
2103/*!
2104@ingroup THREADPRIVATE
2105@param loc source location information
2106@param gtid global thread number
2107@param cpy_size size of the cpy_data buffer
2108@param cpy_data pointer to data to be copied
2109@param cpy_func helper function to call for copying data
2110@param didit flag variable: 1=single thread; 0=not single thread
2111
2112__kmpc_copyprivate implements the interface for the private data broadcast
2113needed for the copyprivate clause associated with a single region in an
2114OpenMP<sup>*</sup> program (both C and Fortran).
2115All threads participating in the parallel region call this routine.
2116One of the threads (called the single thread) should have the <tt>didit</tt>
2117variable set to 1 and all other threads should have that variable set to 0.
2118All threads pass a pointer to a data buffer (cpy_data) that they have built.
2119
2120The OpenMP specification forbids the use of nowait on the single region when a
2121copyprivate clause is present. However, @ref __kmpc_copyprivate implements a
2122barrier internally to avoid race conditions, so the code generation for the
2123single region should avoid generating a barrier after the call to @ref
2124__kmpc_copyprivate.
2125
2126The <tt>gtid</tt> parameter is the global thread id for the current thread.
2127The <tt>loc</tt> parameter is a pointer to source location information.
2128
2129Internal implementation: The single thread will first copy its descriptor
2130address (cpy_data) to a team-private location, then the other threads will each
2131call the function pointed to by the parameter cpy_func, which carries out the
2132copy by copying the data using the cpy_data buffer.
2133
2134The cpy_func routine used for the copy and the contents of the data area defined
2135by cpy_data and cpy_size may be built in any fashion that will allow the copy
2136to be done. For instance, the cpy_data buffer can hold the actual data to be
2137copied or it may hold a list of pointers to the data. The cpy_func routine must
2138interpret the cpy_data buffer appropriately.
2139
2140The interface to cpy_func is as follows:
2141@code
2142void cpy_func( void *destination, void *source )
2143@endcode
2144where void *destination is the cpy_data pointer for the thread being copied to
2145and void *source is the cpy_data pointer for the thread being copied from.
2146*/
2147void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2148 void *cpy_data, void (*cpy_func)(void *, void *),
2149 kmp_int32 didit) {
2150 void **data_ptr;
2151 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_copyprivate: called T#%d\n"
, gtid); }
;
2152 __kmp_assert_valid_gtid(gtid);
2153
2154 KMP_MB();
2155
2156 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2157
2158 if (__kmp_env_consistency_check) {
2159 if (loc == 0) {
2160 KMP_WARNING(ConstructIdentInvalid)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_ConstructIdentInvalid
), __kmp_msg_null)
;
2161 }
2162 }
2163
2164 // ToDo: Optimize the following two barriers into some kind of split barrier
2165
2166 if (didit)
2167 *data_ptr = cpy_data;
2168
2169#if OMPT_SUPPORT1
2170 ompt_frame_t *ompt_frame;
2171 if (ompt_enabled.enabled) {
2172 __ompt_get_task_info_internal(0, NULL__null, NULL__null, &ompt_frame, NULL__null, NULL__null);
2173 if (ompt_frame->enter_frame.ptr == NULL__null)
2174 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
2175 }
2176 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
2177#endif
2178/* This barrier is not a barrier region boundary */
2179#if USE_ITT_NOTIFY1
2180 __kmp_threads[gtid]->th.th_ident = loc;
2181#endif
2182 __kmp_barrier(bs_plain_barrier, gtid, FALSE0, 0, NULL__null, NULL__null);
2183
2184 if (!didit)
2185 (*cpy_func)(cpy_data, *data_ptr);
2186
2187 // Consider next barrier a user-visible barrier for barrier region boundaries
2188 // Nesting checks are already handled by the single construct checks
2189 {
2190#if OMPT_SUPPORT1
2191 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
2192#endif
2193#if USE_ITT_NOTIFY1
2194 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2195// tasks can overwrite the location)
2196#endif
2197 __kmp_barrier(bs_plain_barrier, gtid, FALSE0, 0, NULL__null, NULL__null);
2198#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2199 if (ompt_enabled.enabled) {
2200 ompt_frame->enter_frame = ompt_data_none{0};
2201 }
2202#endif
2203 }
2204}
2205
2206/* --------------------------------------------------------------------------*/
2207/*!
2208@ingroup THREADPRIVATE
2209@param loc source location information
2210@param gtid global thread number
2211@param cpy_data pointer to the data to be saved/copied or 0
2212@return the saved pointer to the data
2213
2214__kmpc_copyprivate_light is a lighter version of __kmpc_copyprivate:
2215__kmpc_copyprivate_light only saves the pointer it's given (if it's not 0, so
2216coming from single), and returns that pointer in all calls (for single thread
2217it's not needed). This version doesn't do any actual data copying. Data copying
2218has to be done somewhere else, e.g. inline in the generated code. Due to this,
2219this function doesn't have any barrier at the end of the function, like
2220__kmpc_copyprivate does, so generated code needs barrier after copying of all
2221data was done.
2222*/
2223void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) {
2224 void **data_ptr;
2225
2226 KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_copyprivate_light: called T#%d\n"
, gtid); }
;
2227
2228 KMP_MB();
2229
2230 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2231
2232 if (__kmp_env_consistency_check) {
2233 if (loc == 0) {
2234 KMP_WARNING(ConstructIdentInvalid)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_ConstructIdentInvalid
), __kmp_msg_null)
;
2235 }
2236 }
2237
2238 // ToDo: Optimize the following barrier
2239
2240 if (cpy_data)
2241 *data_ptr = cpy_data;
2242
2243#if OMPT_SUPPORT1
2244 ompt_frame_t *ompt_frame;
2245 if (ompt_enabled.enabled) {
2246 __ompt_get_task_info_internal(0, NULL__null, NULL__null, &ompt_frame, NULL__null, NULL__null);
2247 if (ompt_frame->enter_frame.ptr == NULL__null)
2248 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
2249 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
2250 }
2251#endif
2252/* This barrier is not a barrier region boundary */
2253#if USE_ITT_NOTIFY1
2254 __kmp_threads[gtid]->th.th_ident = loc;
2255#endif
2256 __kmp_barrier(bs_plain_barrier, gtid, FALSE0, 0, NULL__null, NULL__null);
2257
2258 return *data_ptr;
2259}
2260
2261/* -------------------------------------------------------------------------- */
2262
2263#define INIT_LOCK__kmp_init_user_lock_with_checks __kmp_init_user_lock_with_checks
2264#define INIT_NESTED_LOCK__kmp_init_nested_user_lock_with_checks __kmp_init_nested_user_lock_with_checks
2265#define ACQUIRE_LOCK__kmp_acquire_user_lock_with_checks __kmp_acquire_user_lock_with_checks
2266#define ACQUIRE_LOCK_TIMED__kmp_acquire_user_lock_with_checks_timed __kmp_acquire_user_lock_with_checks_timed
2267#define ACQUIRE_NESTED_LOCK__kmp_acquire_nested_user_lock_with_checks __kmp_acquire_nested_user_lock_with_checks
2268#define ACQUIRE_NESTED_LOCK_TIMED__kmp_acquire_nested_user_lock_with_checks_timed \
2269 __kmp_acquire_nested_user_lock_with_checks_timed
2270#define RELEASE_LOCK__kmp_release_user_lock_with_checks __kmp_release_user_lock_with_checks
2271#define RELEASE_NESTED_LOCK__kmp_release_nested_user_lock_with_checks __kmp_release_nested_user_lock_with_checks
2272#define TEST_LOCK__kmp_test_user_lock_with_checks __kmp_test_user_lock_with_checks
2273#define TEST_NESTED_LOCK__kmp_test_nested_user_lock_with_checks __kmp_test_nested_user_lock_with_checks
2274#define DESTROY_LOCK__kmp_destroy_user_lock_with_checks __kmp_destroy_user_lock_with_checks
2275#define DESTROY_NESTED_LOCK__kmp_destroy_nested_user_lock_with_checks __kmp_destroy_nested_user_lock_with_checks
2276
2277// TODO: Make check abort messages use location info & pass it into
2278// with_checks routines
2279
2280#if KMP_USE_DYNAMIC_LOCK1
2281
2282// internal lock initializer
2283static __forceinline__inline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2284 kmp_dyna_lockseq_t seq) {
2285 if (KMP_IS_D_LOCK(seq)((seq) >= lockseq_tas && (seq) <= lockseq_rtm_spin
)
) {
2286 KMP_INIT_D_LOCK(lock, seq)__kmp_direct_init[((seq) << 1 | 1)]((kmp_dyna_lock_t *)
lock, seq)
;
2287#if USE_ITT_BUILD1
2288 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL__null);
2289#endif
2290 } else {
2291 KMP_INIT_I_LOCK(lock, seq)__kmp_direct_init[0]((kmp_dyna_lock_t *)(lock), seq);
2292#if USE_ITT_BUILD1
2293 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock)((sizeof(int) < sizeof(void *)) ? __kmp_get_i_lock((*(kmp_lock_index_t
*)(lock) >> 1)) : *((kmp_indirect_lock_t **)(lock)))
;
2294 __kmp_itt_lock_creating(ilk->lock, loc);
2295#endif
2296 }
2297}
2298
2299// internal nest lock initializer
2300static __forceinline__inline void
2301__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2302 kmp_dyna_lockseq_t seq) {
2303#if KMP_USE_TSX(0 || 1) && !0
2304 // Don't have nested lock implementation for speculative locks
2305 if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2306 seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2307 seq = __kmp_user_lock_seq;
2308#endif
2309 switch (seq) {
2310 case lockseq_tas:
2311 seq = lockseq_nested_tas;
2312 break;
2313#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
2314 case lockseq_futex:
2315 seq = lockseq_nested_futex;
2316 break;
2317#endif
2318 case lockseq_ticket:
2319 seq = lockseq_nested_ticket;
2320 break;
2321 case lockseq_queuing:
2322 seq = lockseq_nested_queuing;
2323 break;
2324 case lockseq_drdpa:
2325 seq = lockseq_nested_drdpa;
2326 break;
2327 default:
2328 seq = lockseq_nested_queuing;
2329 }
2330 KMP_INIT_I_LOCK(lock, seq)__kmp_direct_init[0]((kmp_dyna_lock_t *)(lock), seq);
2331#if USE_ITT_BUILD1
2332 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock)((sizeof(int) < sizeof(void *)) ? __kmp_get_i_lock((*(kmp_lock_index_t
*)(lock) >> 1)) : *((kmp_indirect_lock_t **)(lock)))
;
2333 __kmp_itt_lock_creating(ilk->lock, loc);
2334#endif
2335}
2336
2337/* initialize the lock with a hint */
2338void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2339 uintptr_t hint) {
2340 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_csupport.cpp", 2340); }
;
2341 if (__kmp_env_consistency_check && user_lock == NULL__null) {
2342 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_LockIsUninitialized
, "omp_init_lock_with_hint"), __kmp_msg_null)
;
2343 }
2344
2345 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2346
2347#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2348 // This is the case, if called from omp_init_lock_with_hint:
2349 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2350 if (!codeptr)
2351 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2352 if (ompt_enabled.ompt_callback_lock_init) {
2353 ompt_callbacks.ompt_callback(ompt_callback_lock_init)ompt_callback_lock_init_callback(
2354 ompt_mutex_lock, (omp_lock_hint_t)hint,
2355 __ompt_get_mutex_impl_type(user_lock),
2356 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2357 }
2358#endif
2359}
2360
2361/* initialize the lock with a hint */
2362void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2363 void **user_lock, uintptr_t hint) {
2364 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_csupport.cpp", 2364); }
;
2365 if (__kmp_env_consistency_check && user_lock == NULL__null) {
2366 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_LockIsUninitialized
, "omp_init_nest_lock_with_hint"), __kmp_msg_null)
;
2367 }
2368
2369 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2370
2371#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2372 // This is the case, if called from omp_init_lock_with_hint:
2373 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2374 if (!codeptr)
2375 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2376 if (ompt_enabled.ompt_callback_lock_init) {
2377 ompt_callbacks.ompt_callback(ompt_callback_lock_init)ompt_callback_lock_init_callback(
2378 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2379 __ompt_get_mutex_impl_type(user_lock),
2380 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2381 }
2382#endif
2383}
2384
2385#endif // KMP_USE_DYNAMIC_LOCK
2386
2387/* initialize the lock */
2388void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2389#if KMP_USE_DYNAMIC_LOCK1
2390
2391 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_csupport.cpp", 2391); }
;
2392 if (__kmp_env_consistency_check && user_lock == NULL__null) {
2393 KMP_FATAL(LockIsUninitialized, "omp_init_lock")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_LockIsUninitialized
, "omp_init_lock"), __kmp_msg_null)
;
2394 }
2395 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2396
2397#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2398 // This is the case, if called from omp_init_lock_with_hint:
2399 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2400 if (!codeptr)
2401 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2402 if (ompt_enabled.ompt_callback_lock_init) {
2403 ompt_callbacks.ompt_callback(ompt_callback_lock_init)ompt_callback_lock_init_callback(
2404 ompt_mutex_lock, omp_lock_hint_none,
2405 __ompt_get_mutex_impl_type(user_lock),
2406 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2407 }
2408#endif
2409
2410#else // KMP_USE_DYNAMIC_LOCK
2411
2412 static char const *const func = "omp_init_lock";
2413 kmp_user_lock_p lck;
2414 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_csupport.cpp", 2414); }
;
2415
2416 if (__kmp_env_consistency_check) {
2417 if (user_lock == NULL__null) {
2418 KMP_FATAL(LockIsUninitialized, func)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_LockIsUninitialized
, func), __kmp_msg_null)
;
2419 }
2420 }
2421
2422 KMP_CHECK_USER_LOCK_INIT();
2423
2424 if ((__kmp_user_lock_kind == lk_tas) &&
2425 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
2426 lck = (kmp_user_lock_p)user_lock;
2427 }
2428#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
2429 else if ((__kmp_user_lock_kind == lk_futex) &&
2430 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
2431 lck = (kmp_user_lock_p)user_lock;
2432 }
2433#endif
2434 else {
2435 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2436 }
2437 INIT_LOCK__kmp_init_user_lock_with_checks(lck);
2438 __kmp_set_user_lock_location(lck, loc);
2439
2440#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2441 // This is the case, if called from omp_init_lock_with_hint:
2442 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2443 if (!codeptr)
2444 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2445 if (ompt_enabled.ompt_callback_lock_init) {
2446 ompt_callbacks.ompt_callback(ompt_callback_lock_init)ompt_callback_lock_init_callback(
2447 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2448 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2449 }
2450#endif
2451
2452#if USE_ITT_BUILD1
2453 __kmp_itt_lock_creating(lck);
2454#endif /* USE_ITT_BUILD */
2455
2456#endif // KMP_USE_DYNAMIC_LOCK
2457} // __kmpc_init_lock
2458
2459/* initialize the lock */
2460void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2461#if KMP_USE_DYNAMIC_LOCK1
2462
2463 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_csupport.cpp", 2463); }
;
2464 if (__kmp_env_consistency_check && user_lock == NULL__null) {
2465 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_LockIsUninitialized
, "omp_init_nest_lock"), __kmp_msg_null)
;
2466 }
2467 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2468
2469#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2470 // This is the case, if called from omp_init_lock_with_hint:
2471 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2472 if (!codeptr)
2473 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2474 if (ompt_enabled.ompt_callback_lock_init) {
2475 ompt_callbacks.ompt_callback(ompt_callback_lock_init)ompt_callback_lock_init_callback(
2476 ompt_mutex_nest_lock, omp_lock_hint_none,
2477 __ompt_get_mutex_impl_type(user_lock),
2478 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2479 }
2480#endif
2481
2482#else // KMP_USE_DYNAMIC_LOCK
2483
2484 static char const *const func = "omp_init_nest_lock";
2485 kmp_user_lock_p lck;
2486 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_csupport.cpp", 2486); }
;
2487
2488 if (__kmp_env_consistency_check) {
2489 if (user_lock == NULL__null) {
2490 KMP_FATAL(LockIsUninitialized, func)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_LockIsUninitialized
, func), __kmp_msg_null)
;
2491 }
2492 }
2493
2494 KMP_CHECK_USER_LOCK_INIT();
2495
2496 if ((__kmp_user_lock_kind == lk_tas) &&
2497 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2498 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
2499 lck = (kmp_user_lock_p)user_lock;
2500 }
2501#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
2502 else if ((__kmp_user_lock_kind == lk_futex) &&
2503 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2504 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
2505 lck = (kmp_user_lock_p)user_lock;
2506 }
2507#endif
2508 else {
2509 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2510 }
2511
2512 INIT_NESTED_LOCK__kmp_init_nested_user_lock_with_checks(lck);
2513 __kmp_set_user_lock_location(lck, loc);
2514
2515#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2516 // This is the case, if called from omp_init_lock_with_hint:
2517 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2518 if (!codeptr)
2519 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2520 if (ompt_enabled.ompt_callback_lock_init) {
2521 ompt_callbacks.ompt_callback(ompt_callback_lock_init)ompt_callback_lock_init_callback(
2522 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2523 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2524 }
2525#endif
2526
2527#if USE_ITT_BUILD1
2528 __kmp_itt_lock_creating(lck);
2529#endif /* USE_ITT_BUILD */
2530
2531#endif // KMP_USE_DYNAMIC_LOCK
2532} // __kmpc_init_nest_lock
2533
2534void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2535#if KMP_USE_DYNAMIC_LOCK1
2536
2537#if USE_ITT_BUILD1
2538 kmp_user_lock_p lck;
2539 if (KMP_EXTRACT_D_TAG(user_lock)(*((kmp_dyna_lock_t *)(user_lock)) & ((1 << 8) - 1)
& -(*((kmp_dyna_lock_t *)(user_lock)) & 1))
== 0) {
2540 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock)((sizeof(int) < sizeof(void *)) ? __kmp_get_i_lock((*(kmp_lock_index_t
*)(user_lock) >> 1)) : *((kmp_indirect_lock_t **)(user_lock
)))
)->lock;
2541 } else {
2542 lck = (kmp_user_lock_p)user_lock;
2543 }
2544 __kmp_itt_lock_destroyed(lck);
2545#endif
2546#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2547 // This is the case, if called from omp_init_lock_with_hint:
2548 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2549 if (!codeptr)
2550 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2551 if (ompt_enabled.ompt_callback_lock_destroy) {
2552 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)ompt_callback_lock_destroy_callback(
2553 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2554 }
2555#endif
2556 KMP_D_LOCK_FUNC(user_lock, destroy)__kmp_direct_destroy[(*((kmp_dyna_lock_t *)(user_lock)) &
((1 << 8) - 1) & -(*((kmp_dyna_lock_t *)(user_lock
)) & 1))]
((kmp_dyna_lock_t *)user_lock);
2557#else
2558 kmp_user_lock_p lck;
2559
2560 if ((__kmp_user_lock_kind == lk_tas) &&
2561 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
2562 lck = (kmp_user_lock_p)user_lock;
2563 }
2564#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
2565 else if ((__kmp_user_lock_kind == lk_futex) &&
2566 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
2567 lck = (kmp_user_lock_p)user_lock;
2568 }
2569#endif
2570 else {
2571 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2572 }
2573
2574#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2575 // This is the case, if called from omp_init_lock_with_hint:
2576 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2577 if (!codeptr)
2578 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2579 if (ompt_enabled.ompt_callback_lock_destroy) {
2580 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)ompt_callback_lock_destroy_callback(
2581 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2582 }
2583#endif
2584
2585#if USE_ITT_BUILD1
2586 __kmp_itt_lock_destroyed(lck);
2587#endif /* USE_ITT_BUILD */
2588 DESTROY_LOCK__kmp_destroy_user_lock_with_checks(lck);
2589
2590 if ((__kmp_user_lock_kind == lk_tas) &&
2591 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
2592 ;
2593 }
2594#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
2595 else if ((__kmp_user_lock_kind == lk_futex) &&
2596 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
2597 ;
2598 }
2599#endif
2600 else {
2601 __kmp_user_lock_free(user_lock, gtid, lck);
2602 }
2603#endif // KMP_USE_DYNAMIC_LOCK
2604} // __kmpc_destroy_lock
2605
2606/* destroy the lock */
2607void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2608#if KMP_USE_DYNAMIC_LOCK1
2609
2610#if USE_ITT_BUILD1
2611 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock)((sizeof(int) < sizeof(void *)) ? __kmp_get_i_lock((*(kmp_lock_index_t
*)(user_lock) >> 1)) : *((kmp_indirect_lock_t **)(user_lock
)))
;
2612 __kmp_itt_lock_destroyed(ilk->lock);
2613#endif
2614#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2615 // This is the case, if called from omp_init_lock_with_hint:
2616 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2617 if (!codeptr)
2618 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2619 if (ompt_enabled.ompt_callback_lock_destroy) {
2620 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)ompt_callback_lock_destroy_callback(
2621 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2622 }
2623#endif
2624 KMP_D_LOCK_FUNC(user_lock, destroy)__kmp_direct_destroy[(*((kmp_dyna_lock_t *)(user_lock)) &
((1 << 8) - 1) & -(*((kmp_dyna_lock_t *)(user_lock
)) & 1))]
((kmp_dyna_lock_t *)user_lock);
2625
2626#else // KMP_USE_DYNAMIC_LOCK
2627
2628 kmp_user_lock_p lck;
2629
2630 if ((__kmp_user_lock_kind == lk_tas) &&
2631 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2632 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
2633 lck = (kmp_user_lock_p)user_lock;
2634 }
2635#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
2636 else if ((__kmp_user_lock_kind == lk_futex) &&
2637 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2638 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
2639 lck = (kmp_user_lock_p)user_lock;
2640 }
2641#endif
2642 else {
2643 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2644 }
2645
2646#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2647 // This is the case, if called from omp_init_lock_with_hint:
2648 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2649 if (!codeptr)
2650 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2651 if (ompt_enabled.ompt_callback_lock_destroy) {
2652 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)ompt_callback_lock_destroy_callback(
2653 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2654 }
2655#endif
2656
2657#if USE_ITT_BUILD1
2658 __kmp_itt_lock_destroyed(lck);
2659#endif /* USE_ITT_BUILD */
2660
2661 DESTROY_NESTED_LOCK__kmp_destroy_nested_user_lock_with_checks(lck);
2662
2663 if ((__kmp_user_lock_kind == lk_tas) &&
2664 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2665 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
2666 ;
2667 }
2668#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
2669 else if ((__kmp_user_lock_kind == lk_futex) &&
2670 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2671 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
2672 ;
2673 }
2674#endif
2675 else {
2676 __kmp_user_lock_free(user_lock, gtid, lck);
2677 }
2678#endif // KMP_USE_DYNAMIC_LOCK
2679} // __kmpc_destroy_nest_lock
2680
2681void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2682 KMP_COUNT_BLOCK(OMP_set_lock)((void)0);
2683#if KMP_USE_DYNAMIC_LOCK1
2684 int tag = KMP_EXTRACT_D_TAG(user_lock)(*((kmp_dyna_lock_t *)(user_lock)) & ((1 << 8) - 1)
& -(*((kmp_dyna_lock_t *)(user_lock)) & 1))
;
2685#if USE_ITT_BUILD1
2686 __kmp_itt_lock_acquiring(
2687 (kmp_user_lock_p)
2688 user_lock); // itt function will get to the right lock object.
2689#endif
2690#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2691 // This is the case, if called from omp_init_lock_with_hint:
2692 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2693 if (!codeptr)
2694 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2695 if (ompt_enabled.ompt_callback_mutex_acquire) {
2696 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
2697 ompt_mutex_lock, omp_lock_hint_none,
2698 __ompt_get_mutex_impl_type(user_lock),
2699 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2700 }
2701#endif
2702#if KMP_USE_INLINED_TAS(1 && (0 || 1 || KMP_ARCH_ARM)) && 1
2703 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2704 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid){ kmp_tas_lock_t *l = (kmp_tas_lock_t *)user_lock; kmp_int32 tas_free
= (locktag_tas); kmp_int32 tas_busy = ((gtid + 1) << 8
| locktag_tas); if ((&l->lk.poll)->load(std::memory_order_relaxed
) != tas_free || !__kmp_atomic_compare_store_acq(&l->lk
.poll, tas_free, tas_busy)) { kmp_uint32 spins; (!__kmp_itt_fsync_prepare_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)(l)); {
(spins) = __kmp_yield_init; }; kmp_backoff_t backoff = __kmp_spin_backoff_params
; do { if ((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
: __kmp_xproc)) { { __kmp_x86_pause(); if (((!0)) &&
(((__kmp_use_yield == 1) || (__kmp_use_yield == 2 &&
(((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
))))))) __kmp_yield(); }; } else { { __kmp_x86_pause(); if ((
(__kmp_use_yield == 1) || (__kmp_use_yield == 2 && ((
(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
)))))) { (spins) -= 2; if (!(spins)) { __kmp_yield(); (spins)
= __kmp_yield_next; } } }; } __kmp_spin_backoff(&backoff
); } while ( (&l->lk.poll)->load(std::memory_order_relaxed
) != tas_free || !__kmp_atomic_compare_store_acq(&l->lk
.poll, tas_free, tas_busy)); } (!__kmp_itt_fsync_acquired_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0((void *)(l));
}
;
2705 } else
2706#elif KMP_USE_INLINED_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0)) && 0
2707 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2708 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid){ kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)user_lock; kmp_int32
gtid_code = (gtid + 1) << 1; ; (!__kmp_itt_fsync_prepare_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)(ftx))
; kmp_int32 poll_val; while ((poll_val = __sync_val_compare_and_swap
((volatile kmp_uint32 *)(&(ftx->lk.poll)), (kmp_uint32
)((locktag_futex)), (kmp_uint32)(((gtid_code) << 8 | locktag_futex
)))) != (locktag_futex)) { kmp_int32 cond = ((poll_val) >>
8) & 1; if (!cond) { if (!__sync_val_compare_and_swap((volatile
kmp_uint32 *)(&(ftx->lk.poll)), (kmp_uint32)(poll_val
), (kmp_uint32)(poll_val | ((1) << 8 | locktag_futex)))
) { continue; } poll_val |= ((1) << 8 | locktag_futex);
} kmp_int32 rc; if ((rc = syscall(202, &(ftx->lk.poll
), 0, poll_val, __null, __null, 0)) != 0) { continue; } gtid_code
|= 1; } (!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)(ftx)); }
;
2709 } else
2710#endif
2711 {
2712 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2713 }
2714#if USE_ITT_BUILD1
2715 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2716#endif
2717#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2718 if (ompt_enabled.ompt_callback_mutex_acquired) {
2719 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
2720 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2721 }
2722#endif
2723
2724#else // KMP_USE_DYNAMIC_LOCK
2725
2726 kmp_user_lock_p lck;
2727
2728 if ((__kmp_user_lock_kind == lk_tas) &&
2729 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
2730 lck = (kmp_user_lock_p)user_lock;
2731 }
2732#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
2733 else if ((__kmp_user_lock_kind == lk_futex) &&
2734 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
2735 lck = (kmp_user_lock_p)user_lock;
2736 }
2737#endif
2738 else {
2739 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2740 }
2741
2742#if USE_ITT_BUILD1
2743 __kmp_itt_lock_acquiring(lck);
2744#endif /* USE_ITT_BUILD */
2745#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2746 // This is the case, if called from omp_init_lock_with_hint:
2747 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2748 if (!codeptr)
2749 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2750 if (ompt_enabled.ompt_callback_mutex_acquire) {
2751 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
2752 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2753 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2754 }
2755#endif
2756
2757 ACQUIRE_LOCK__kmp_acquire_user_lock_with_checks(lck, gtid);
2758
2759#if USE_ITT_BUILD1
2760 __kmp_itt_lock_acquired(lck);
2761#endif /* USE_ITT_BUILD */
2762
2763#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2764 if (ompt_enabled.ompt_callback_mutex_acquired) {
2765 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
2766 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2767 }
2768#endif
2769
2770#endif // KMP_USE_DYNAMIC_LOCK
2771}
2772
2773void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2774#if KMP_USE_DYNAMIC_LOCK1
2775
2776#if USE_ITT_BUILD1
2777 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2778#endif
2779#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2780 // This is the case, if called from omp_init_lock_with_hint:
2781 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2782 if (!codeptr)
2783 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2784 if (ompt_enabled.enabled) {
2785 if (ompt_enabled.ompt_callback_mutex_acquire) {
2786 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
2787 ompt_mutex_nest_lock, omp_lock_hint_none,
2788 __ompt_get_mutex_impl_type(user_lock),
2789 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2790 }
2791 }
2792#endif
2793 int acquire_status =
2794 KMP_D_LOCK_FUNC(user_lock, set)__kmp_direct_set[(*((kmp_dyna_lock_t *)(user_lock)) & ((1
<< 8) - 1) & -(*((kmp_dyna_lock_t *)(user_lock)) &
1))]
((kmp_dyna_lock_t *)user_lock, gtid);
2795 (void)acquire_status;
2796#if USE_ITT_BUILD1
2797 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2798#endif
2799
2800#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2801 if (ompt_enabled.enabled) {
2802 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST1) {
2803 if (ompt_enabled.ompt_callback_mutex_acquired) {
2804 // lock_first
2805 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
2806 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2807 codeptr);
2808 }
2809 } else {
2810 if (ompt_enabled.ompt_callback_nest_lock) {
2811 // lock_next
2812 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)ompt_callback_nest_lock_callback(
2813 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2814 }
2815 }
2816 }
2817#endif
2818
2819#else // KMP_USE_DYNAMIC_LOCK
2820 int acquire_status;
2821 kmp_user_lock_p lck;
2822
2823 if ((__kmp_user_lock_kind == lk_tas) &&
2824 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2825 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
2826 lck = (kmp_user_lock_p)user_lock;
2827 }
2828#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
2829 else if ((__kmp_user_lock_kind == lk_futex) &&
2830 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2831 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
2832 lck = (kmp_user_lock_p)user_lock;
2833 }
2834#endif
2835 else {
2836 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2837 }
2838
2839#if USE_ITT_BUILD1
2840 __kmp_itt_lock_acquiring(lck);
2841#endif /* USE_ITT_BUILD */
2842#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2843 // This is the case, if called from omp_init_lock_with_hint:
2844 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2845 if (!codeptr)
2846 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2847 if (ompt_enabled.enabled) {
2848 if (ompt_enabled.ompt_callback_mutex_acquire) {
2849 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
2850 ompt_mutex_nest_lock, omp_lock_hint_none,
2851 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2852 codeptr);
2853 }
2854 }
2855#endif
2856
2857 ACQUIRE_NESTED_LOCK__kmp_acquire_nested_user_lock_with_checks(lck, gtid, &acquire_status);
2858
2859#if USE_ITT_BUILD1
2860 __kmp_itt_lock_acquired(lck);
2861#endif /* USE_ITT_BUILD */
2862
2863#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2864 if (ompt_enabled.enabled) {
2865 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST1) {
2866 if (ompt_enabled.ompt_callback_mutex_acquired) {
2867 // lock_first
2868 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
2869 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2870 }
2871 } else {
2872 if (ompt_enabled.ompt_callback_nest_lock) {
2873 // lock_next
2874 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)ompt_callback_nest_lock_callback(
2875 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2876 }
2877 }
2878 }
2879#endif
2880
2881#endif // KMP_USE_DYNAMIC_LOCK
2882}
2883
2884void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2885#if KMP_USE_DYNAMIC_LOCK1
2886
2887 int tag = KMP_EXTRACT_D_TAG(user_lock)(*((kmp_dyna_lock_t *)(user_lock)) & ((1 << 8) - 1)
& -(*((kmp_dyna_lock_t *)(user_lock)) & 1))
;
2888#if USE_ITT_BUILD1
2889 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2890#endif
2891#if KMP_USE_INLINED_TAS(1 && (0 || 1 || KMP_ARCH_ARM)) && 1
2892 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2893 KMP_RELEASE_TAS_LOCK(user_lock, gtid){ (&((kmp_tas_lock_t *)user_lock)->lk.poll)->store(
(locktag_tas), std::memory_order_release); }
;
2894 } else
2895#elif KMP_USE_INLINED_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0)) && 0
2896 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2897 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid){ kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)user_lock; ; (!
__kmp_itt_fsync_releasing_ptr__3_0) ? (void)0 : __kmp_itt_fsync_releasing_ptr__3_0
((void *)(ftx)); kmp_int32 poll_val = __sync_lock_test_and_set
((volatile kmp_uint32 *)(&(ftx->lk.poll)), (kmp_uint32
)((locktag_futex))); if (((poll_val) >> 8) & 1) { syscall
(202, &(ftx->lk.poll), 1, ((1) << 8 | locktag_futex
), __null, __null, 0); } ; { __kmp_x86_pause(); if ((((__kmp_use_yield
== 1 || __kmp_use_yield == 2) && (((__kmp_nth) > (
__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)))))) __kmp_yield
(); }; }
;
2898 } else
2899#endif
2900 {
2901 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2902 }
2903
2904#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2905 // This is the case, if called from omp_init_lock_with_hint:
2906 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2907 if (!codeptr)
2908 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2909 if (ompt_enabled.ompt_callback_mutex_released) {
2910 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)ompt_callback_mutex_released_callback(
2911 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2912 }
2913#endif
2914
2915#else // KMP_USE_DYNAMIC_LOCK
2916
2917 kmp_user_lock_p lck;
2918
2919 /* Can't use serial interval since not block structured */
2920 /* release the lock */
2921
2922 if ((__kmp_user_lock_kind == lk_tas) &&
2923 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
2924#if KMP_OS_LINUX1 && \
2925 (KMP_ARCH_X860 || KMP_ARCH_X86_641 || KMP_ARCH_ARM || KMP_ARCH_AARCH640)
2926// "fast" path implemented to fix customer performance issue
2927#if USE_ITT_BUILD1
2928 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2929#endif /* USE_ITT_BUILD */
2930 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0)(((kmp_user_lock_p)user_lock)->tas.lk.poll) = (0);
2931 KMP_MB();
2932
2933#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2934 // This is the case, if called from omp_init_lock_with_hint:
2935 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2936 if (!codeptr)
2937 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2938 if (ompt_enabled.ompt_callback_mutex_released) {
2939 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)ompt_callback_mutex_released_callback(
2940 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2941 }
2942#endif
2943
2944 return;
2945#else
2946 lck = (kmp_user_lock_p)user_lock;
2947#endif
2948 }
2949#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
2950 else if ((__kmp_user_lock_kind == lk_futex) &&
2951 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
2952 lck = (kmp_user_lock_p)user_lock;
2953 }
2954#endif
2955 else {
2956 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2957 }
2958
2959#if USE_ITT_BUILD1
2960 __kmp_itt_lock_releasing(lck);
2961#endif /* USE_ITT_BUILD */
2962
2963 RELEASE_LOCK__kmp_release_user_lock_with_checks(lck, gtid);
2964
2965#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2966 // This is the case, if called from omp_init_lock_with_hint:
2967 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2968 if (!codeptr)
2969 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2970 if (ompt_enabled.ompt_callback_mutex_released) {
2971 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)ompt_callback_mutex_released_callback(
2972 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2973 }
2974#endif
2975
2976#endif // KMP_USE_DYNAMIC_LOCK
2977}
2978
2979/* release the lock */
2980void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2981#if KMP_USE_DYNAMIC_LOCK1
2982
2983#if USE_ITT_BUILD1
2984 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2985#endif
2986 int release_status =
2987 KMP_D_LOCK_FUNC(user_lock, unset)__kmp_direct_unset[(*((kmp_dyna_lock_t *)(user_lock)) & (
(1 << 8) - 1) & -(*((kmp_dyna_lock_t *)(user_lock))
& 1))]
((kmp_dyna_lock_t *)user_lock, gtid);
2988 (void)release_status;
2989
2990#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2991 // This is the case, if called from omp_init_lock_with_hint:
2992 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2993 if (!codeptr)
2994 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2995 if (ompt_enabled.enabled) {
2996 if (release_status == KMP_LOCK_RELEASED1) {
2997 if (ompt_enabled.ompt_callback_mutex_released) {
2998 // release_lock_last
2999 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)ompt_callback_mutex_released_callback(
3000 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3001 codeptr);
3002 }
3003 } else if (ompt_enabled.ompt_callback_nest_lock) {
3004 // release_lock_prev
3005 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)ompt_callback_nest_lock_callback(
3006 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3007 }
3008 }
3009#endif
3010
3011#else // KMP_USE_DYNAMIC_LOCK
3012
3013 kmp_user_lock_p lck;
3014
3015 /* Can't use serial interval since not block structured */
3016
3017 if ((__kmp_user_lock_kind == lk_tas) &&
3018 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3019 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
3020#if KMP_OS_LINUX1 && \
3021 (KMP_ARCH_X860 || KMP_ARCH_X86_641 || KMP_ARCH_ARM || KMP_ARCH_AARCH640)
3022 // "fast" path implemented to fix customer performance issue
3023 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
3024#if USE_ITT_BUILD1
3025 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3026#endif /* USE_ITT_BUILD */
3027
3028#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3029 int release_status = KMP_LOCK_STILL_HELD0;
3030#endif
3031
3032 if (--(tl->lk.depth_locked) == 0) {
3033 TCW_4(tl->lk.poll, 0)(tl->lk.poll) = (0);
3034#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3035 release_status = KMP_LOCK_RELEASED1;
3036#endif
3037 }
3038 KMP_MB();
3039
3040#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3041 // This is the case, if called from omp_init_lock_with_hint:
3042 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
3043 if (!codeptr)
3044 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
3045 if (ompt_enabled.enabled) {
3046 if (release_status == KMP_LOCK_RELEASED1) {
3047 if (ompt_enabled.ompt_callback_mutex_released) {
3048 // release_lock_last
3049 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)ompt_callback_mutex_released_callback(
3050 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3051 }
3052 } else if (ompt_enabled.ompt_callback_nest_lock) {
3053 // release_lock_previous
3054 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)ompt_callback_nest_lock_callback(
3055 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3056 }
3057 }
3058#endif
3059
3060 return;
3061#else
3062 lck = (kmp_user_lock_p)user_lock;
3063#endif
3064 }
3065#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
3066 else if ((__kmp_user_lock_kind == lk_futex) &&
3067 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3068 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
3069 lck = (kmp_user_lock_p)user_lock;
3070 }
3071#endif
3072 else {
3073 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
3074 }
3075
3076#if USE_ITT_BUILD1
3077 __kmp_itt_lock_releasing(lck);
3078#endif /* USE_ITT_BUILD */
3079
3080 int release_status;
3081 release_status = RELEASE_NESTED_LOCK__kmp_release_nested_user_lock_with_checks(lck, gtid);
3082#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3083 // This is the case, if called from omp_init_lock_with_hint:
3084 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
3085 if (!codeptr)
3086 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
3087 if (ompt_enabled.enabled) {
3088 if (release_status == KMP_LOCK_RELEASED1) {
3089 if (ompt_enabled.ompt_callback_mutex_released) {
3090 // release_lock_last
3091 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)ompt_callback_mutex_released_callback(
3092 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3093 }
3094 } else if (ompt_enabled.ompt_callback_nest_lock) {
3095 // release_lock_previous
3096 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)ompt_callback_nest_lock_callback(
3097 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3098 }
3099 }
3100#endif
3101
3102#endif // KMP_USE_DYNAMIC_LOCK
3103}
3104
3105/* try to acquire the lock */
3106int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3107 KMP_COUNT_BLOCK(OMP_test_lock)((void)0);
3108
3109#if KMP_USE_DYNAMIC_LOCK1
3110 int rc;
3111 int tag = KMP_EXTRACT_D_TAG(user_lock)(*((kmp_dyna_lock_t *)(user_lock)) & ((1 << 8) - 1)
& -(*((kmp_dyna_lock_t *)(user_lock)) & 1))
;
3112#if USE_ITT_BUILD1
3113 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3114#endif
3115#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3116 // This is the case, if called from omp_init_lock_with_hint:
3117 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
3118 if (!codeptr)
3119 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
3120 if (ompt_enabled.ompt_callback_mutex_acquire) {
3121 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
3122 ompt_mutex_lock, omp_lock_hint_none,
3123 __ompt_get_mutex_impl_type(user_lock),
3124 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3125 }
3126#endif
3127#if KMP_USE_INLINED_TAS(1 && (0 || 1 || KMP_ARCH_ARM)) && 1
3128 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3129 KMP_TEST_TAS_LOCK(user_lock, gtid, rc){ kmp_tas_lock_t *l = (kmp_tas_lock_t *)user_lock; kmp_int32 tas_free
= (locktag_tas); kmp_int32 tas_busy = ((gtid + 1) << 8
| locktag_tas); rc = (&l->lk.poll)->load(std::memory_order_relaxed
) == tas_free && __kmp_atomic_compare_store_acq(&
l->lk.poll, tas_free, tas_busy); }
;
3130 } else
3131#elif KMP_USE_INLINED_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0)) && 0
3132 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3133 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc){ kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)user_lock; if (
__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(ftx
->lk.poll)), (kmp_uint32)((locktag_futex)), (kmp_uint32)((
(gtid + 1 << 1) << 8 | locktag_futex)))) { (!__kmp_itt_fsync_acquired_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0((void *)(ftx)
); rc = (!0); } else { rc = 0; } }
;
3134 } else
3135#endif
3136 {
3137 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3138 }
3139 if (rc) {
3140#if USE_ITT_BUILD1
3141 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3142#endif
3143#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3144 if (ompt_enabled.ompt_callback_mutex_acquired) {
3145 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
3146 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3147 }
3148#endif
3149 return FTN_TRUE(!0);
3150 } else {
3151#if USE_ITT_BUILD1
3152 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3153#endif
3154 return FTN_FALSE0;
3155 }
3156
3157#else // KMP_USE_DYNAMIC_LOCK
3158
3159 kmp_user_lock_p lck;
3160 int rc;
3161
3162 if ((__kmp_user_lock_kind == lk_tas) &&
3163 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
3164 lck = (kmp_user_lock_p)user_lock;
3165 }
3166#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
3167 else if ((__kmp_user_lock_kind == lk_futex) &&
3168 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
3169 lck = (kmp_user_lock_p)user_lock;
3170 }
3171#endif
3172 else {
3173 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3174 }
3175
3176#if USE_ITT_BUILD1
3177 __kmp_itt_lock_acquiring(lck);
3178#endif /* USE_ITT_BUILD */
3179#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3180 // This is the case, if called from omp_init_lock_with_hint:
3181 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
3182 if (!codeptr)
3183 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
3184 if (ompt_enabled.ompt_callback_mutex_acquire) {
3185 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
3186 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3187 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3188 }
3189#endif
3190
3191 rc = TEST_LOCK__kmp_test_user_lock_with_checks(lck, gtid);
3192#if USE_ITT_BUILD1
3193 if (rc) {
3194 __kmp_itt_lock_acquired(lck);
3195 } else {
3196 __kmp_itt_lock_cancelled(lck);
3197 }
3198#endif /* USE_ITT_BUILD */
3199#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3200 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3201 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
3202 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3203 }
3204#endif
3205
3206 return (rc ? FTN_TRUE(!0) : FTN_FALSE0);
3207
3208 /* Can't use serial interval since not block structured */
3209
3210#endif // KMP_USE_DYNAMIC_LOCK
3211}
3212
3213/* try to acquire the lock */
3214int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3215#if KMP_USE_DYNAMIC_LOCK1
3216 int rc;
3217#if USE_ITT_BUILD1
3218 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3219#endif
3220#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3221 // This is the case, if called from omp_init_lock_with_hint:
3222 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
3223 if (!codeptr)
3224 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
3225 if (ompt_enabled.ompt_callback_mutex_acquire) {
3226 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
3227 ompt_mutex_nest_lock, omp_lock_hint_none,
3228 __ompt_get_mutex_impl_type(user_lock),
3229 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3230 }
3231#endif
3232 rc = KMP_D_LOCK_FUNC(user_lock, test)__kmp_direct_test[(*((kmp_dyna_lock_t *)(user_lock)) & ((
1 << 8) - 1) & -(*((kmp_dyna_lock_t *)(user_lock)) &
1))]
((kmp_dyna_lock_t *)user_lock, gtid);
3233#if USE_ITT_BUILD1
3234 if (rc) {
3235 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3236 } else {
3237 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3238 }
3239#endif
3240#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3241 if (ompt_enabled.enabled && rc) {
3242 if (rc == 1) {
3243 if (ompt_enabled.ompt_callback_mutex_acquired) {
3244 // lock_first
3245 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
3246 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3247 codeptr);
3248 }
3249 } else {
3250 if (ompt_enabled.ompt_callback_nest_lock) {
3251 // lock_next
3252 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)ompt_callback_nest_lock_callback(
3253 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3254 }
3255 }
3256 }
3257#endif
3258 return rc;
3259
3260#else // KMP_USE_DYNAMIC_LOCK
3261
3262 kmp_user_lock_p lck;
3263 int rc;
3264
3265 if ((__kmp_user_lock_kind == lk_tas) &&
3266 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3267 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
3268 lck = (kmp_user_lock_p)user_lock;
3269 }
3270#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
3271 else if ((__kmp_user_lock_kind == lk_futex) &&
3272 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3273 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
3274 lck = (kmp_user_lock_p)user_lock;
3275 }
3276#endif
3277 else {
3278 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3279 }
3280
3281#if USE_ITT_BUILD1
3282 __kmp_itt_lock_acquiring(lck);
3283#endif /* USE_ITT_BUILD */
3284
3285#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3286 // This is the case, if called from omp_init_lock_with_hint:
3287 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
3288 if (!codeptr)
3289 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
3290 if (ompt_enabled.enabled) &&
3291 ompt_enabled.ompt_callback_mutex_acquire) {
3292 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
3293 ompt_mutex_nest_lock, omp_lock_hint_none,
3294 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3295 codeptr);
3296 }
3297#endif
3298
3299 rc = TEST_NESTED_LOCK__kmp_test_nested_user_lock_with_checks(lck, gtid);
3300#if USE_ITT_BUILD1
3301 if (rc) {
3302 __kmp_itt_lock_acquired(lck);
3303 } else {
3304 __kmp_itt_lock_cancelled(lck);
3305 }
3306#endif /* USE_ITT_BUILD */
3307#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3308 if (ompt_enabled.enabled && rc) {
3309 if (rc == 1) {
3310 if (ompt_enabled.ompt_callback_mutex_acquired) {
3311 // lock_first
3312 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
3313 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3314 }
3315 } else {
3316 if (ompt_enabled.ompt_callback_nest_lock) {
3317 // lock_next
3318 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)ompt_callback_nest_lock_callback(
3319 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3320 }
3321 }
3322 }
3323#endif
3324 return rc;
3325
3326 /* Can't use serial interval since not block structured */
3327
3328#endif // KMP_USE_DYNAMIC_LOCK
3329}
3330
3331// Interface to fast scalable reduce methods routines
3332
3333// keep the selected method in a thread local structure for cross-function
3334// usage: will be used in __kmpc_end_reduce* functions;
3335// another solution: to re-determine the method one more time in
3336// __kmpc_end_reduce* functions (new prototype required then)
3337// AT: which solution is better?
3338#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3339 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3340
3341#define __KMP_GET_REDUCTION_METHOD(gtid) \
3342 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3343
3344// description of the packed_reduction_method variable: look at the macros in
3345// kmp.h
3346
3347// used in a critical section reduce block
3348static __forceinline__inline void
3349__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3350 kmp_critical_name *crit) {
3351
3352 // this lock was visible to a customer and to the threading profile tool as a
3353 // serial overhead span (although it's used for an internal purpose only)
3354 // why was it visible in previous implementation?
3355 // should we keep it visible in new reduce block?
3356 kmp_user_lock_p lck;
3357
3358#if KMP_USE_DYNAMIC_LOCK1
3359
3360 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3361 // Check if it is initialized.
3362 if (*lk == 0) {
3363 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)((__kmp_user_lock_seq) >= lockseq_tas && (__kmp_user_lock_seq
) <= lockseq_rtm_spin)
) {
3364 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,__sync_bool_compare_and_swap((volatile kmp_uint32 *)((volatile
kmp_int32 *)crit), (kmp_uint32)(0), (kmp_uint32)(((__kmp_user_lock_seq
) << 1 | 1)))
3365 KMP_GET_D_TAG(__kmp_user_lock_seq))__sync_bool_compare_and_swap((volatile kmp_uint32 *)((volatile
kmp_int32 *)crit), (kmp_uint32)(0), (kmp_uint32)(((__kmp_user_lock_seq
) << 1 | 1)))
;
3366 } else {
3367 __kmp_init_indirect_csptr(crit, loc, global_tid,
3368 KMP_GET_I_TAG(__kmp_user_lock_seq)(kmp_indirect_locktag_t)((__kmp_user_lock_seq)-lockseq_ticket
)
);
3369 }
3370 }
3371 // Branch for accessing the actual lock object and set operation. This
3372 // branching is inevitable since this lock initialization does not follow the
3373 // normal dispatch path (lock table is not used).
3374 if (KMP_EXTRACT_D_TAG(lk)(*((kmp_dyna_lock_t *)(lk)) & ((1 << 8) - 1) & -
(*((kmp_dyna_lock_t *)(lk)) & 1))
!= 0) {
3375 lck = (kmp_user_lock_p)lk;
3376 KMP_DEBUG_ASSERT(lck != NULL)if (!(lck != __null)) { __kmp_debug_assert("lck != __null", "openmp/runtime/src/kmp_csupport.cpp"
, 3376); }
;
3377 if (__kmp_env_consistency_check) {
3378 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3379 }
3380 KMP_D_LOCK_FUNC(lk, set)__kmp_direct_set[(*((kmp_dyna_lock_t *)(lk)) & ((1 <<
8) - 1) & -(*((kmp_dyna_lock_t *)(lk)) & 1))]
(lk, global_tid);
3381 } else {
3382 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3383 lck = ilk->lock;
3384 KMP_DEBUG_ASSERT(lck != NULL)if (!(lck != __null)) { __kmp_debug_assert("lck != __null", "openmp/runtime/src/kmp_csupport.cpp"
, 3384); }
;
3385 if (__kmp_env_consistency_check) {
3386 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3387 }
3388 KMP_I_LOCK_FUNC(ilk, set)__kmp_indirect_set[((kmp_indirect_lock_t *)(ilk))->type](lck, global_tid);
3389 }
3390
3391#else // KMP_USE_DYNAMIC_LOCK
3392
3393 // We know that the fast reduction code is only emitted by Intel compilers
3394 // with 32 byte critical sections. If there isn't enough space, then we
3395 // have to use a pointer.
3396 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE32) {
3397 lck = (kmp_user_lock_p)crit;
3398 } else {
3399 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3400 }
3401 KMP_DEBUG_ASSERT(lck != NULL)if (!(lck != __null)) { __kmp_debug_assert("lck != __null", "openmp/runtime/src/kmp_csupport.cpp"
, 3401); }
;
3402
3403 if (__kmp_env_consistency_check)
3404 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3405
3406 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3407
3408#endif // KMP_USE_DYNAMIC_LOCK
3409}
3410
3411// used in a critical section reduce block
3412static __forceinline__inline void
3413__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3414 kmp_critical_name *crit) {
3415
3416 kmp_user_lock_p lck;
3417
3418#if KMP_USE_DYNAMIC_LOCK1
3419
3420 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)((__kmp_user_lock_seq) >= lockseq_tas && (__kmp_user_lock_seq
) <= lockseq_rtm_spin)
) {
3421 lck = (kmp_user_lock_p)crit;
3422 if (__kmp_env_consistency_check)
3423 __kmp_pop_sync(global_tid, ct_critical, loc);
3424 KMP_D_LOCK_FUNC(lck, unset)__kmp_direct_unset[(*((kmp_dyna_lock_t *)(lck)) & ((1 <<
8) - 1) & -(*((kmp_dyna_lock_t *)(lck)) & 1))]
((kmp_dyna_lock_t *)lck, global_tid);
3425 } else {
3426 kmp_indirect_lock_t *ilk =
3427 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit))((void *)(*((kmp_indirect_lock_t **)crit)));
3428 if (__kmp_env_consistency_check)
3429 __kmp_pop_sync(global_tid, ct_critical, loc);
3430 KMP_I_LOCK_FUNC(ilk, unset)__kmp_indirect_unset[((kmp_indirect_lock_t *)(ilk))->type](ilk->lock, global_tid);
3431 }
3432
3433#else // KMP_USE_DYNAMIC_LOCK
3434
3435 // We know that the fast reduction code is only emitted by Intel compilers
3436 // with 32 byte critical sections. If there isn't enough space, then we have
3437 // to use a pointer.
3438 if (__kmp_base_user_lock_size > 32) {
3439 lck = *((kmp_user_lock_p *)crit);
3440 KMP_ASSERT(lck != NULL)if (!(lck != __null)) { __kmp_debug_assert("lck != NULL", "openmp/runtime/src/kmp_csupport.cpp"
, 3440); }
;
3441 } else {
3442 lck = (kmp_user_lock_p)crit;
3443 }
3444
3445 if (__kmp_env_consistency_check)
3446 __kmp_pop_sync(global_tid, ct_critical, loc);
3447
3448 __kmp_release_user_lock_with_checks(lck, global_tid);
3449
3450#endif // KMP_USE_DYNAMIC_LOCK
3451} // __kmp_end_critical_section_reduce_block
3452
3453static __forceinline__inline int
3454__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3455 int *task_state) {
3456 kmp_team_t *team;
3457
3458 // Check if we are inside the teams construct?
3459 if (th->th.th_teams_microtask) {
3460 *team_p = team = th->th.th_team;
3461 if (team->t.t_level == th->th.th_teams_level) {
3462 // This is reduction at teams construct.
3463 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid)if (!(!th->th.th_info.ds.ds_tid)) { __kmp_debug_assert("!th->th.th_info.ds.ds_tid"
, "openmp/runtime/src/kmp_csupport.cpp", 3463); }
; // AC: check that tid == 0
3464 // Let's swap teams temporarily for the reduction.
3465 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3466 th->th.th_team = team->t.t_parent;
3467 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3468 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3469 *task_state = th->th.th_task_state;
3470 th->th.th_task_state = 0;
3471
3472 return 1;
3473 }
3474 }
3475 return 0;
3476}
3477
3478static __forceinline__inline void
3479__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3480 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3481 th->th.th_info.ds.ds_tid = 0;
3482 th->th.th_team = team;
3483 th->th.th_team_nproc = team->t.t_nproc;
3484 th->th.th_task_team = team->t.t_task_team[task_state];
3485 __kmp_type_convert(task_state, &(th->th.th_task_state));
3486}
3487
3488/* 2.a.i. Reduce Block without a terminating barrier */
3489/*!
3490@ingroup SYNCHRONIZATION
3491@param loc source location information
3492@param global_tid global thread number
3493@param num_vars number of items (variables) to be reduced
3494@param reduce_size size of data in bytes to be reduced
3495@param reduce_data pointer to data to be reduced
3496@param reduce_func callback function providing reduction operation on two
3497operands and returning result of reduction in lhs_data
3498@param lck pointer to the unique lock data structure
3499@result 1 for the primary thread, 0 for all other team threads, 2 for all team
3500threads if atomic reduction needed
3501
3502The nowait version is used for a reduce clause with the nowait argument.
3503*/
3504kmp_int32
3505__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3506 size_t reduce_size, void *reduce_data,
3507 void (*reduce_func)(void *lhs_data, void *rhs_data),
3508 kmp_critical_name *lck) {
3509
3510 KMP_COUNT_BLOCK(REDUCE_nowait)((void)0);
3511 int retval = 0;
3512 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3513 kmp_info_t *th;
3514 kmp_team_t *team;
3515 int teams_swapped = 0, task_state;
3516 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduce_nowait() enter: called T#%d\n"
, global_tid); }
;
3517 __kmp_assert_valid_gtid(global_tid);
3518
3519 // why do we need this initialization here at all?
3520 // Reduction clause can not be used as a stand-alone directive.
3521
3522 // do not call __kmp_serial_initialize(), it will be called by
3523 // __kmp_parallel_initialize() if needed
3524 // possible detection of false-positive race by the threadchecker ???
3525 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
3526 __kmp_parallel_initialize();
3527
3528 __kmp_resume_if_soft_paused();
3529
3530// check correctness of reduce block nesting
3531#if KMP_USE_DYNAMIC_LOCK1
3532 if (__kmp_env_consistency_check)
3533 __kmp_push_sync(global_tid, ct_reduce, loc, NULL__null, 0);
3534#else
3535 if (__kmp_env_consistency_check)
3536 __kmp_push_sync(global_tid, ct_reduce, loc, NULL__null);
3537#endif
3538
3539 th = __kmp_thread_from_gtid(global_tid);
3540 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3541
3542 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3543 // the value should be kept in a variable
3544 // the variable should be either a construct-specific or thread-specific
3545 // property, not a team specific property
3546 // (a thread can reach the next reduce block on the next construct, reduce
3547 // method may differ on the next construct)
3548 // an ident_t "loc" parameter could be used as a construct-specific property
3549 // (what if loc == 0?)
3550 // (if both construct-specific and team-specific variables were shared,
3551 // then unness extra syncs should be needed)
3552 // a thread-specific variable is better regarding two issues above (next
3553 // construct and extra syncs)
3554 // a thread-specific "th_local.reduction_method" variable is used currently
3555 // each thread executes 'determine' and 'set' lines (no need to execute by one
3556 // thread, to avoid unness extra syncs)
3557
3558 packed_reduction_method = __kmp_determine_reduction_method(
3559 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3560 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3561
3562 OMPT_REDUCTION_DECL(th, global_tid)ompt_data_t *my_task_data = (&(th->th.th_current_task->
ompt_task_info.task_data)); ompt_data_t *my_parallel_data = (
&(th->th.th_team->t.ompt_team_info.parallel_data));
void *return_address = __ompt_load_return_address(global_tid
);
;
3563 if (packed_reduction_method == critical_reduce_block) {
3564
3565 OMPT_REDUCTION_BEGINif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_begin, my_parallel_data, my_task_data, return_address
); }
;
3566
3567 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3568 retval = 1;
3569
3570 } else if (packed_reduction_method == empty_reduce_block) {
3571
3572 OMPT_REDUCTION_BEGINif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_begin, my_parallel_data, my_task_data, return_address
); }
;
3573
3574 // usage: if team size == 1, no synchronization is required ( Intel
3575 // platforms only )
3576 retval = 1;
3577
3578 } else if (packed_reduction_method == atomic_reduce_block) {
3579
3580 retval = 2;
3581
3582 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3583 // won't be called by the code gen)
3584 // (it's not quite good, because the checking block has been closed by
3585 // this 'pop',
3586 // but atomic operation has not been executed yet, will be executed
3587 // slightly later, literally on next instruction)
3588 if (__kmp_env_consistency_check)
3589 __kmp_pop_sync(global_tid, ct_reduce, loc);
3590
3591 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,((((enum _reduction_method)((packed_reduction_method) & (
0x0000FF00)))) == (tree_reduce_block))
3592 tree_reduce_block)((((enum _reduction_method)((packed_reduction_method) & (
0x0000FF00)))) == (tree_reduce_block))
) {
3593
3594// AT: performance issue: a real barrier here
3595// AT: (if primary thread is slow, other threads are blocked here waiting for
3596// the primary thread to come and release them)
3597// AT: (it's not what a customer might expect specifying NOWAIT clause)
3598// AT: (specifying NOWAIT won't result in improvement of performance, it'll
3599// be confusing to a customer)
3600// AT: another implementation of *barrier_gather*nowait() (or some other design)
3601// might go faster and be more in line with sense of NOWAIT
3602// AT: TO DO: do epcc test and compare times
3603
3604// this barrier should be invisible to a customer and to the threading profile
3605// tool (it's neither a terminating barrier nor customer's code, it's
3606// used for an internal purpose)
3607#if OMPT_SUPPORT1
3608 // JP: can this barrier potentially leed to task scheduling?
3609 // JP: as long as there is a barrier in the implementation, OMPT should and
3610 // will provide the barrier events
3611 // so we set-up the necessary frame/return addresses.
3612 ompt_frame_t *ompt_frame;
3613 if (ompt_enabled.enabled) {
3614 __ompt_get_task_info_internal(0, NULL__null, NULL__null, &ompt_frame, NULL__null, NULL__null);
3615 if (ompt_frame->enter_frame.ptr == NULL__null)
3616 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
3617 }
3618 OMPT_STORE_RETURN_ADDRESS(global_tid)OmptReturnAddressGuard ReturnAddressGuard{global_tid, __builtin_return_address
(0)};
;
3619#endif
3620#if USE_ITT_NOTIFY1
3621 __kmp_threads[global_tid]->th.th_ident = loc;
3622#endif
3623 retval =
3624 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method)((enum barrier_type)((packed_reduction_method) & (0x000000FF
)))
,
3625 global_tid, FALSE0, reduce_size, reduce_data, reduce_func);
3626 retval = (retval != 0) ? (0) : (1);
3627#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3628 if (ompt_enabled.enabled) {
3629 ompt_frame->enter_frame = ompt_data_none{0};
3630 }
3631#endif
3632
3633 // all other workers except primary thread should do this pop here
3634 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3635 if (__kmp_env_consistency_check) {
3636 if (retval == 0) {
3637 __kmp_pop_sync(global_tid, ct_reduce, loc);
3638 }
3639 }
3640
3641 } else {
3642
3643 // should never reach this block
3644 KMP_ASSERT(0)if (!(0)) { __kmp_debug_assert("0", "openmp/runtime/src/kmp_csupport.cpp"
, 3644); }
; // "unexpected method"
3645 }
3646 if (teams_swapped) {
3647 __kmp_restore_swapped_teams(th, team, task_state);
3648 }
3649 KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n"
, global_tid, packed_reduction_method, retval); }
3650 10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n"
, global_tid, packed_reduction_method, retval); }
3651 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n"
, global_tid, packed_reduction_method, retval); }
3652 global_tid, packed_reduction_method, retval))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n"
, global_tid, packed_reduction_method, retval); }
;
3653
3654 return retval;
3655}
3656
3657/*!
3658@ingroup SYNCHRONIZATION
3659@param loc source location information
3660@param global_tid global thread id.
3661@param lck pointer to the unique lock data structure
3662
3663Finish the execution of a reduce nowait.
3664*/
3665void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3666 kmp_critical_name *lck) {
3667
3668 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3669
3670 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_end_reduce_nowait() enter: called T#%d\n"
, global_tid); }
;
3671 __kmp_assert_valid_gtid(global_tid);
3672
3673 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3674
3675 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid)ompt_data_t *my_task_data = (&(__kmp_thread_from_gtid(global_tid
)->th.th_current_task->ompt_task_info.task_data)); ompt_data_t
*my_parallel_data = (&(__kmp_thread_from_gtid(global_tid
)->th.th_team->t.ompt_team_info.parallel_data)); void *
return_address = __ompt_load_return_address(global_tid);
;
3676
3677 if (packed_reduction_method == critical_reduce_block) {
3678
3679 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3680 OMPT_REDUCTION_ENDif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_end, my_parallel_data, my_task_data, return_address
); }
;
3681
3682 } else if (packed_reduction_method == empty_reduce_block) {
3683
3684 // usage: if team size == 1, no synchronization is required ( on Intel
3685 // platforms only )
3686
3687 OMPT_REDUCTION_ENDif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_end, my_parallel_data, my_task_data, return_address
); }
;
3688
3689 } else if (packed_reduction_method == atomic_reduce_block) {
3690
3691 // neither primary thread nor other workers should get here
3692 // (code gen does not generate this call in case 2: atomic reduce block)
3693 // actually it's better to remove this elseif at all;
3694 // after removal this value will checked by the 'else' and will assert
3695
3696 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,((((enum _reduction_method)((packed_reduction_method) & (
0x0000FF00)))) == (tree_reduce_block))
3697 tree_reduce_block)((((enum _reduction_method)((packed_reduction_method) & (
0x0000FF00)))) == (tree_reduce_block))
) {
3698
3699 // only primary thread gets here
3700 // OMPT: tree reduction is annotated in the barrier code
3701
3702 } else {
3703
3704 // should never reach this block
3705 KMP_ASSERT(0)if (!(0)) { __kmp_debug_assert("0", "openmp/runtime/src/kmp_csupport.cpp"
, 3705); }
; // "unexpected method"
3706 }
3707
3708 if (__kmp_env_consistency_check)
3709 __kmp_pop_sync(global_tid, ct_reduce, loc);
3710
3711 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n"
, global_tid, packed_reduction_method); }
3712 global_tid, packed_reduction_method))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n"
, global_tid, packed_reduction_method); }
;
3713
3714 return;
3715}
3716
3717/* 2.a.ii. Reduce Block with a terminating barrier */
3718
3719/*!
3720@ingroup SYNCHRONIZATION
3721@param loc source location information
3722@param global_tid global thread number
3723@param num_vars number of items (variables) to be reduced
3724@param reduce_size size of data in bytes to be reduced
3725@param reduce_data pointer to data to be reduced
3726@param reduce_func callback function providing reduction operation on two
3727operands and returning result of reduction in lhs_data
3728@param lck pointer to the unique lock data structure
3729@result 1 for the primary thread, 0 for all other team threads, 2 for all team
3730threads if atomic reduction needed
3731
3732A blocking reduce that includes an implicit barrier.
3733*/
3734kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3735 size_t reduce_size, void *reduce_data,
3736 void (*reduce_func)(void *lhs_data, void *rhs_data),
3737 kmp_critical_name *lck) {
3738 KMP_COUNT_BLOCK(REDUCE_wait)((void)0);
3739 int retval = 0;
3740 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3741 kmp_info_t *th;
3742 kmp_team_t *team;
3743 int teams_swapped = 0, task_state;
3744
3745 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduce() enter: called T#%d\n"
, global_tid); }
;
3746 __kmp_assert_valid_gtid(global_tid);
3747
3748 // why do we need this initialization here at all?
3749 // Reduction clause can not be a stand-alone directive.
3750
3751 // do not call __kmp_serial_initialize(), it will be called by
3752 // __kmp_parallel_initialize() if needed
3753 // possible detection of false-positive race by the threadchecker ???
3754 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
3755 __kmp_parallel_initialize();
3756
3757 __kmp_resume_if_soft_paused();
3758
3759// check correctness of reduce block nesting
3760#if KMP_USE_DYNAMIC_LOCK1
3761 if (__kmp_env_consistency_check)
3762 __kmp_push_sync(global_tid, ct_reduce, loc, NULL__null, 0);
3763#else
3764 if (__kmp_env_consistency_check)
3765 __kmp_push_sync(global_tid, ct_reduce, loc, NULL__null);
3766#endif
3767
3768 th = __kmp_thread_from_gtid(global_tid);
3769 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3770
3771 packed_reduction_method = __kmp_determine_reduction_method(
3772 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3773 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3774
3775 OMPT_REDUCTION_DECL(th, global_tid)ompt_data_t *my_task_data = (&(th->th.th_current_task->
ompt_task_info.task_data)); ompt_data_t *my_parallel_data = (
&(th->th.th_team->t.ompt_team_info.parallel_data));
void *return_address = __ompt_load_return_address(global_tid
);
;
3776
3777 if (packed_reduction_method == critical_reduce_block) {
3778
3779 OMPT_REDUCTION_BEGINif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_begin, my_parallel_data, my_task_data, return_address
); }
;
3780 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3781 retval = 1;
3782
3783 } else if (packed_reduction_method == empty_reduce_block) {
3784
3785 OMPT_REDUCTION_BEGINif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_begin, my_parallel_data, my_task_data, return_address
); }
;
3786 // usage: if team size == 1, no synchronization is required ( Intel
3787 // platforms only )
3788 retval = 1;
3789
3790 } else if (packed_reduction_method == atomic_reduce_block) {
3791
3792 retval = 2;
3793
3794 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,((((enum _reduction_method)((packed_reduction_method) & (
0x0000FF00)))) == (tree_reduce_block))
3795 tree_reduce_block)((((enum _reduction_method)((packed_reduction_method) & (
0x0000FF00)))) == (tree_reduce_block))
) {
3796
3797// case tree_reduce_block:
3798// this barrier should be visible to a customer and to the threading profile
3799// tool (it's a terminating barrier on constructs if NOWAIT not specified)
3800#if OMPT_SUPPORT1
3801 ompt_frame_t *ompt_frame;
3802 if (ompt_enabled.enabled) {
3803 __ompt_get_task_info_internal(0, NULL__null, NULL__null, &ompt_frame, NULL__null, NULL__null);
3804 if (ompt_frame->enter_frame.ptr == NULL__null)
3805 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
3806 }
3807 OMPT_STORE_RETURN_ADDRESS(global_tid)OmptReturnAddressGuard ReturnAddressGuard{global_tid, __builtin_return_address
(0)};
;
3808#endif
3809#if USE_ITT_NOTIFY1
3810 __kmp_threads[global_tid]->th.th_ident =
3811 loc; // needed for correct notification of frames
3812#endif
3813 retval =
3814 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method)((enum barrier_type)((packed_reduction_method) & (0x000000FF
)))
,
3815 global_tid, TRUE(!0), reduce_size, reduce_data, reduce_func);
3816 retval = (retval != 0) ? (0) : (1);
3817#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3818 if (ompt_enabled.enabled) {
3819 ompt_frame->enter_frame = ompt_data_none{0};
3820 }
3821#endif
3822
3823 // all other workers except primary thread should do this pop here
3824 // (none of other workers except primary will enter __kmpc_end_reduce())
3825 if (__kmp_env_consistency_check) {
3826 if (retval == 0) { // 0: all other workers; 1: primary thread
3827 __kmp_pop_sync(global_tid, ct_reduce, loc);
3828 }
3829 }
3830
3831 } else {
3832
3833 // should never reach this block
3834 KMP_ASSERT(0)if (!(0)) { __kmp_debug_assert("0", "openmp/runtime/src/kmp_csupport.cpp"
, 3834); }
; // "unexpected method"
3835 }
3836 if (teams_swapped) {
3837 __kmp_restore_swapped_teams(th, team, task_state);
3838 }
3839
3840 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n"
, global_tid, packed_reduction_method, retval); }
3841 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n"
, global_tid, packed_reduction_method, retval); }
3842 global_tid, packed_reduction_method, retval))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n"
, global_tid, packed_reduction_method, retval); }
;
3843 return retval;
3844}
3845
3846/*!
3847@ingroup SYNCHRONIZATION
3848@param loc source location information
3849@param global_tid global thread id.
3850@param lck pointer to the unique lock data structure
3851
3852Finish the execution of a blocking reduce.
3853The <tt>lck</tt> pointer must be the same as that used in the corresponding
3854start function.
3855*/
3856void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3857 kmp_critical_name *lck) {
3858
3859 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3860 kmp_info_t *th;
3861 kmp_team_t *team;
3862 int teams_swapped = 0, task_state;
3863
3864 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_end_reduce() enter: called T#%d\n"
, global_tid); }
;
3865 __kmp_assert_valid_gtid(global_tid);
3866
3867 th = __kmp_thread_from_gtid(global_tid);
3868 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3869
3870 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3871
3872 // this barrier should be visible to a customer and to the threading profile
3873 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3874 OMPT_REDUCTION_DECL(th, global_tid)ompt_data_t *my_task_data = (&(th->th.th_current_task->
ompt_task_info.task_data)); ompt_data_t *my_parallel_data = (
&(th->th.th_team->t.ompt_team_info.parallel_data));
void *return_address = __ompt_load_return_address(global_tid
);
;
3875
3876 if (packed_reduction_method == critical_reduce_block) {
3877 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3878
3879 OMPT_REDUCTION_ENDif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_end, my_parallel_data, my_task_data, return_address
); }
;
3880
3881// TODO: implicit barrier: should be exposed
3882#if OMPT_SUPPORT1
3883 ompt_frame_t *ompt_frame;
3884 if (ompt_enabled.enabled) {
3885 __ompt_get_task_info_internal(0, NULL__null, NULL__null, &ompt_frame, NULL__null, NULL__null);
3886 if (ompt_frame->enter_frame.ptr == NULL__null)
3887 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
3888 }
3889 OMPT_STORE_RETURN_ADDRESS(global_tid)OmptReturnAddressGuard ReturnAddressGuard{global_tid, __builtin_return_address
(0)};
;
3890#endif
3891#if USE_ITT_NOTIFY1
3892 __kmp_threads[global_tid]->th.th_ident = loc;
3893#endif
3894 __kmp_barrier(bs_plain_barrier, global_tid, FALSE0, 0, NULL__null, NULL__null);
3895#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3896 if (ompt_enabled.enabled) {
3897 ompt_frame->enter_frame = ompt_data_none{0};
3898 }
3899#endif
3900
3901 } else if (packed_reduction_method == empty_reduce_block) {
3902
3903 OMPT_REDUCTION_ENDif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_end, my_parallel_data, my_task_data, return_address
); }
;
3904
3905// usage: if team size==1, no synchronization is required (Intel platforms only)
3906
3907// TODO: implicit barrier: should be exposed
3908#if OMPT_SUPPORT1
3909 ompt_frame_t *ompt_frame;
3910 if (ompt_enabled.enabled) {
3911 __ompt_get_task_info_internal(0, NULL__null, NULL__null, &ompt_frame, NULL__null, NULL__null);
3912 if (ompt_frame->enter_frame.ptr == NULL__null)
3913 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
3914 }
3915 OMPT_STORE_RETURN_ADDRESS(global_tid)OmptReturnAddressGuard ReturnAddressGuard{global_tid, __builtin_return_address
(0)};
;
3916#endif
3917#if USE_ITT_NOTIFY1
3918 __kmp_threads[global_tid]->th.th_ident = loc;
3919#endif
3920 __kmp_barrier(bs_plain_barrier, global_tid, FALSE0, 0, NULL__null, NULL__null);
3921#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3922 if (ompt_enabled.enabled) {
3923 ompt_frame->enter_frame = ompt_data_none{0};
3924 }
3925#endif
3926
3927 } else if (packed_reduction_method == atomic_reduce_block) {
3928
3929#if OMPT_SUPPORT1
3930 ompt_frame_t *ompt_frame;
3931 if (ompt_enabled.enabled) {
3932 __ompt_get_task_info_internal(0, NULL__null, NULL__null, &ompt_frame, NULL__null, NULL__null);
3933 if (ompt_frame->enter_frame.ptr == NULL__null)
3934 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
3935 }
3936 OMPT_STORE_RETURN_ADDRESS(global_tid)OmptReturnAddressGuard ReturnAddressGuard{global_tid, __builtin_return_address
(0)};
;
3937#endif
3938// TODO: implicit barrier: should be exposed
3939#if USE_ITT_NOTIFY1
3940 __kmp_threads[global_tid]->th.th_ident = loc;
3941#endif
3942 __kmp_barrier(bs_plain_barrier, global_tid, FALSE0, 0, NULL__null, NULL__null);
3943#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3944 if (ompt_enabled.enabled) {
3945 ompt_frame->enter_frame = ompt_data_none{0};
3946 }
3947#endif
3948
3949 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,((((enum _reduction_method)((packed_reduction_method) & (
0x0000FF00)))) == (tree_reduce_block))
3950 tree_reduce_block)((((enum _reduction_method)((packed_reduction_method) & (
0x0000FF00)))) == (tree_reduce_block))
) {
3951
3952 // only primary thread executes here (primary releases all other workers)
3953 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method)((enum barrier_type)((packed_reduction_method) & (0x000000FF
)))
,
3954 global_tid);
3955
3956 } else {
3957
3958 // should never reach this block
3959 KMP_ASSERT(0)if (!(0)) { __kmp_debug_assert("0", "openmp/runtime/src/kmp_csupport.cpp"
, 3959); }
; // "unexpected method"
3960 }
3961 if (teams_swapped) {
3962 __kmp_restore_swapped_teams(th, team, task_state);
3963 }
3964
3965 if (__kmp_env_consistency_check)
3966 __kmp_pop_sync(global_tid, ct_reduce, loc);
3967
3968 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_end_reduce() exit: called T#%d: method %08x\n"
, global_tid, packed_reduction_method); }
3969 global_tid, packed_reduction_method))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_end_reduce() exit: called T#%d: method %08x\n"
, global_tid, packed_reduction_method); }
;
3970
3971 return;
3972}
3973
3974#undef __KMP_GET_REDUCTION_METHOD
3975#undef __KMP_SET_REDUCTION_METHOD
3976
3977/* end of interface to fast scalable reduce routines */
3978
3979kmp_uint64 __kmpc_get_taskid() {
3980
3981 kmp_int32 gtid;
3982 kmp_info_t *thread;
3983
3984 gtid = __kmp_get_gtid()__kmp_get_global_thread_id();
3985 if (gtid < 0) {
3986 return 0;
3987 }
3988 thread = __kmp_thread_from_gtid(gtid);
3989 return thread->th.th_current_task->td_task_id;
3990
3991} // __kmpc_get_taskid
3992
3993kmp_uint64 __kmpc_get_parent_taskid() {
3994
3995 kmp_int32 gtid;
3996 kmp_info_t *thread;
3997 kmp_taskdata_t *parent_task;
3998
3999 gtid = __kmp_get_gtid()__kmp_get_global_thread_id();
4000 if (gtid < 0) {
4001 return 0;
4002 }
4003 thread = __kmp_thread_from_gtid(gtid);
4004 parent_task = thread->th.th_current_task->td_parent;
4005 return (parent_task == NULL__null ? 0 : parent_task->td_task_id);
4006
4007} // __kmpc_get_parent_taskid
4008
4009/*!
4010@ingroup WORK_SHARING
4011@param loc source location information.
4012@param gtid global thread number.
4013@param num_dims number of associated doacross loops.
4014@param dims info on loops bounds.
4015
4016Initialize doacross loop information.
4017Expect compiler send us inclusive bounds,
4018e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
4019*/
4020void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
4021 const struct kmp_dim *dims) {
4022 __kmp_assert_valid_gtid(gtid);
4023 int j, idx;
4024 kmp_int64 last, trace_count;
4025 kmp_info_t *th = __kmp_threads[gtid];
4026 kmp_team_t *team = th->th.th_team;
4027 kmp_uint32 *flags;
4028 kmp_disp_t *pr_buf = th->th.th_dispatch;
4029 dispatch_shared_info_t *sh_buf;
4030
4031 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n"
, gtid, num_dims, !team->t.t_serialized); }
4032 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n"
, gtid, num_dims, !team->t.t_serialized); }
4033 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n"
, gtid, num_dims, !team->t.t_serialized); }
4034 gtid, num_dims, !team->t.t_serialized))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n"
, gtid, num_dims, !team->t.t_serialized); }
;
4035 KMP_DEBUG_ASSERT(dims != NULL)if (!(dims != __null)) { __kmp_debug_assert("dims != __null",
"openmp/runtime/src/kmp_csupport.cpp", 4035); }
;
4036 KMP_DEBUG_ASSERT(num_dims > 0)if (!(num_dims > 0)) { __kmp_debug_assert("num_dims > 0"
, "openmp/runtime/src/kmp_csupport.cpp", 4036); }
;
4037
4038 if (team->t.t_serialized) {
4039 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_init() exit: serialized team\n"
); }
;
4040 return; // no dependencies if team is serialized
4041 }
4042 KMP_DEBUG_ASSERT(team->t.t_nproc > 1)if (!(team->t.t_nproc > 1)) { __kmp_debug_assert("team->t.t_nproc > 1"
, "openmp/runtime/src/kmp_csupport.cpp", 4042); }
;
4043 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
4044 // the next loop
4045 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4046
4047 // Save bounds info into allocated private buffer
4048 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL)if (!(pr_buf->th_doacross_info == __null)) { __kmp_debug_assert
("pr_buf->th_doacross_info == __null", "openmp/runtime/src/kmp_csupport.cpp"
, 4048); }
;
4049 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(___kmp_thread_malloc((th), (sizeof(kmp_int64) * (4 * num_dims
+ 1)), "openmp/runtime/src/kmp_csupport.cpp", 4050)
4050 th, sizeof(kmp_int64) * (4 * num_dims + 1))___kmp_thread_malloc((th), (sizeof(kmp_int64) * (4 * num_dims
+ 1)), "openmp/runtime/src/kmp_csupport.cpp", 4050)
;
4051 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL)if (!(pr_buf->th_doacross_info != __null)) { __kmp_debug_assert
("pr_buf->th_doacross_info != __null", "openmp/runtime/src/kmp_csupport.cpp"
, 4051); }
;
4052 pr_buf->th_doacross_info[0] =
4053 (kmp_int64)num_dims; // first element is number of dimensions
4054 // Save also address of num_done in order to access it later without knowing
4055 // the buffer index
4056 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
4057 pr_buf->th_doacross_info[2] = dims[0].lo;
4058 pr_buf->th_doacross_info[3] = dims[0].up;
4059 pr_buf->th_doacross_info[4] = dims[0].st;
4060 last = 5;
4061 for (j = 1; j < num_dims; ++j) {
4062 kmp_int64
4063 range_length; // To keep ranges of all dimensions but the first dims[0]
4064 if (dims[j].st == 1) { // most common case
4065 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
4066 range_length = dims[j].up - dims[j].lo + 1;
4067 } else {
4068 if (dims[j].st > 0) {
4069 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo)if (!(dims[j].up > dims[j].lo)) { __kmp_debug_assert("dims[j].up > dims[j].lo"
, "openmp/runtime/src/kmp_csupport.cpp", 4069); }
;
4070 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
4071 } else { // negative increment
4072 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up)if (!(dims[j].lo > dims[j].up)) { __kmp_debug_assert("dims[j].lo > dims[j].up"
, "openmp/runtime/src/kmp_csupport.cpp", 4072); }
;
4073 range_length =
4074 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
4075 }
4076 }
4077 pr_buf->th_doacross_info[last++] = range_length;
4078 pr_buf->th_doacross_info[last++] = dims[j].lo;
4079 pr_buf->th_doacross_info[last++] = dims[j].up;
4080 pr_buf->th_doacross_info[last++] = dims[j].st;
4081 }
4082
4083 // Compute total trip count.
4084 // Start with range of dims[0] which we don't need to keep in the buffer.
4085 if (dims[0].st == 1) { // most common case
4086 trace_count = dims[0].up - dims[0].lo + 1;
4087 } else if (dims[0].st > 0) {
4088 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo)if (!(dims[0].up > dims[0].lo)) { __kmp_debug_assert("dims[0].up > dims[0].lo"
, "openmp/runtime/src/kmp_csupport.cpp", 4088); }
;
4089 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
4090 } else { // negative increment
4091 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up)if (!(dims[0].lo > dims[0].up)) { __kmp_debug_assert("dims[0].lo > dims[0].up"
, "openmp/runtime/src/kmp_csupport.cpp", 4091); }
;
4092 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
4093 }
4094 for (j = 1; j < num_dims; ++j) {
4095 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
4096 }
4097 KMP_DEBUG_ASSERT(trace_count > 0)if (!(trace_count > 0)) { __kmp_debug_assert("trace_count > 0"
, "openmp/runtime/src/kmp_csupport.cpp", 4097); }
;
4098
4099 // Check if shared buffer is not occupied by other loop (idx -
4100 // __kmp_dispatch_num_buffers)
4101 if (idx != sh_buf->doacross_buf_idx) {
4102 // Shared buffer is occupied, wait for it to be free
4103 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4104 __kmp_eq_4, NULL__null);
4105 }
4106#if KMP_32_BIT_ARCH(0 || KMP_ARCH_ARM || 0)
4107 // Check if we are the first thread. After the CAS the first thread gets 0,
4108 // others get 1 if initialization is in progress, allocated pointer otherwise.
4109 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
4110 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(__sync_val_compare_and_swap((volatile kmp_uint32 *)((volatile
kmp_int32 *)&sh_buf->doacross_flags), (kmp_uint32)(__null
), (kmp_uint32)(1))
4111 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1)__sync_val_compare_and_swap((volatile kmp_uint32 *)((volatile
kmp_int32 *)&sh_buf->doacross_flags), (kmp_uint32)(__null
), (kmp_uint32)(1))
;
4112#else
4113 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(__sync_val_compare_and_swap((volatile kmp_uint64 *)((volatile
kmp_int64 *)&sh_buf->doacross_flags), (kmp_uint64)(__null
), (kmp_uint64)(1LL))
4114 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL)__sync_val_compare_and_swap((volatile kmp_uint64 *)((volatile
kmp_int64 *)&sh_buf->doacross_flags), (kmp_uint64)(__null
), (kmp_uint64)(1LL))
;
4115#endif
4116 if (flags == NULL__null) {
4117 // we are the first thread, allocate the array of flags
4118 size_t size =
4119 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration
4120 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1)___kmp_thread_calloc((th), (size), (1), "openmp/runtime/src/kmp_csupport.cpp"
, 4120)
;
4121 KMP_MB();
4122 sh_buf->doacross_flags = flags;
4123 } else if (flags == (kmp_uint32 *)1) {
4124#if KMP_32_BIT_ARCH(0 || KMP_ARCH_ARM || 0)
4125 // initialization is still in progress, need to wait
4126 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4127#else
4128 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4129#endif
4130 KMP_YIELD(TRUE){ __kmp_x86_pause(); if (((!0)) && (((__kmp_use_yield
== 1) || (__kmp_use_yield == 2 && (((__kmp_nth) >
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))))))) __kmp_yield
(); }
;
4131 KMP_MB();
4132 } else {
4133 KMP_MB();
4134 }
4135 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1)if (!(sh_buf->doacross_flags > (kmp_uint32 *)1)) { __kmp_debug_assert
("sh_buf->doacross_flags > (kmp_uint32 *)1", "openmp/runtime/src/kmp_csupport.cpp"
, 4135); }
; // check ptr value
4136 pr_buf->th_doacross_flags =
4137 sh_buf->doacross_flags; // save private copy in order to not
4138 // touch shared buffer on each iteration
4139 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_init() exit: T#%d\n"
, gtid); }
;
4140}
4141
4142void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
4143 __kmp_assert_valid_gtid(gtid);
4144 kmp_int64 shft;
4145 size_t num_dims, i;
4146 kmp_uint32 flag;
4147 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4148 kmp_info_t *th = __kmp_threads[gtid];
4149 kmp_team_t *team = th->th.th_team;
4150 kmp_disp_t *pr_buf;
4151 kmp_int64 lo, up, st;
4152
4153 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() enter: called T#%d\n"
, gtid); }
;
4154 if (team->t.t_serialized) {
4155 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: serialized team\n"
); }
;
4156 return; // no dependencies if team is serialized
4157 }
4158
4159 // calculate sequential iteration number and check out-of-bounds condition
4160 pr_buf = th->th.th_dispatch;
4161 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL)if (!(pr_buf->th_doacross_info != __null)) { __kmp_debug_assert
("pr_buf->th_doacross_info != __null", "openmp/runtime/src/kmp_csupport.cpp"
, 4161); }
;
4162 num_dims = (size_t)pr_buf->th_doacross_info[0];
4163 lo = pr_buf->th_doacross_info[2];
4164 up = pr_buf->th_doacross_info[3];
4165 st = pr_buf->th_doacross_info[4];
4166#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
4167 ompt_dependence_t deps[num_dims];
4168#endif
4169 if (st == 1) { // most common case
4170 if (vec[0] < lo || vec[0] > up) {
4171 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[0], lo, up); }
4172 "bounds [%lld,%lld]\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[0], lo, up); }
4173 gtid, vec[0], lo, up))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[0], lo, up); }
;
4174 return;
4175 }
4176 iter_number = vec[0] - lo;
4177 } else if (st > 0) {
4178 if (vec[0] < lo || vec[0] > up) {
4179 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[0], lo, up); }
4180 "bounds [%lld,%lld]\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[0], lo, up); }
4181 gtid, vec[0], lo, up))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[0], lo, up); }
;
4182 return;
4183 }
4184 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4185 } else { // negative increment
4186 if (vec[0] > lo || vec[0] < up) {
4187 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[0], lo, up); }
4188 "bounds [%lld,%lld]\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[0], lo, up); }
4189 gtid, vec[0], lo, up))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[0], lo, up); }
;
4190 return;
4191 }
4192 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4193 }
4194#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
4195 deps[0].variable.value = iter_number;
4196 deps[0].dependence_type = ompt_dependence_type_sink;
4197#endif
4198 for (i = 1; i < num_dims; ++i) {
4199 kmp_int64 iter, ln;
4200 size_t j = i * 4;
4201 ln = pr_buf->th_doacross_info[j + 1];
4202 lo = pr_buf->th_doacross_info[j + 2];
4203 up = pr_buf->th_doacross_info[j + 3];
4204 st = pr_buf->th_doacross_info[j + 4];
4205 if (st == 1) {
4206 if (vec[i] < lo || vec[i] > up) {
4207 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[i], lo, up); }
4208 "bounds [%lld,%lld]\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[i], lo, up); }
4209 gtid, vec[i], lo, up))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[i], lo, up); }
;
4210 return;
4211 }
4212 iter = vec[i] - lo;
4213 } else if (st > 0) {
4214 if (vec[i] < lo || vec[i] > up) {
4215 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[i], lo, up); }
4216 "bounds [%lld,%lld]\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[i], lo, up); }
4217 gtid, vec[i], lo, up))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[i], lo, up); }
;
4218 return;
4219 }
4220 iter = (kmp_uint64)(vec[i] - lo) / st;
4221 } else { // st < 0
4222 if (vec[i] > lo || vec[i] < up) {
4223 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[i], lo, up); }
4224 "bounds [%lld,%lld]\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[i], lo, up); }
4225 gtid, vec[i], lo, up))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[i], lo, up); }
;
4226 return;
4227 }
4228 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4229 }
4230 iter_number = iter + ln * iter_number;
4231#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
4232 deps[i].variable.value = iter;
4233 deps[i].dependence_type = ompt_dependence_type_sink;
4234#endif
4235 }
4236 shft = iter_number % 32; // use 32-bit granularity
4237 iter_number >>= 5; // divided by 32
4238 flag = 1 << shft;
4239 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4240 KMP_YIELD(TRUE){ __kmp_x86_pause(); if (((!0)) && (((__kmp_use_yield
== 1) || (__kmp_use_yield == 2 && (((__kmp_nth) >
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))))))) __kmp_yield
(); }
;
4241 }
4242 KMP_MB();
4243#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
4244 if (ompt_enabled.ompt_callback_dependences) {
4245 ompt_callbacks.ompt_callback(ompt_callback_dependences)ompt_callback_dependences_callback(
4246 &(OMPT_CUR_TASK_INFO(th)(&(th->th.th_current_task->ompt_task_info))->task_data), deps, (kmp_uint32)num_dims);
4247 }
4248#endif
4249 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n"
, gtid, (iter_number << 5) + shft); }
4250 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n"
, gtid, (iter_number << 5) + shft); }
4251 gtid, (iter_number << 5) + shft))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n"
, gtid, (iter_number << 5) + shft); }
;
4252}
4253
4254void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4255 __kmp_assert_valid_gtid(gtid);
4256 kmp_int64 shft;
4257 size_t num_dims, i;
4258 kmp_uint32 flag;
4259 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4260 kmp_info_t *th = __kmp_threads[gtid];
4261 kmp_team_t *team = th->th.th_team;
4262 kmp_disp_t *pr_buf;
4263 kmp_int64 lo, st;
4264
4265 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_post() enter: called T#%d\n"
, gtid); }
;
4266 if (team->t.t_serialized) {
4267 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_post() exit: serialized team\n"
); }
;
4268 return; // no dependencies if team is serialized
4269 }
4270
4271 // calculate sequential iteration number (same as in "wait" but no
4272 // out-of-bounds checks)
4273 pr_buf = th->th.th_dispatch;
4274 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL)if (!(pr_buf->th_doacross_info != __null)) { __kmp_debug_assert
("pr_buf->th_doacross_info != __null", "openmp/runtime/src/kmp_csupport.cpp"
, 4274); }
;
4275 num_dims = (size_t)pr_buf->th_doacross_info[0];
4276 lo = pr_buf->th_doacross_info[2];
4277 st = pr_buf->th_doacross_info[4];
4278#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
4279 ompt_dependence_t deps[num_dims];
4280#endif
4281 if (st == 1) { // most common case
4282 iter_number = vec[0] - lo;
4283 } else if (st > 0) {
4284 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4285 } else { // negative increment
4286 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4287 }
4288#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
4289 deps[0].variable.value = iter_number;
4290 deps[0].dependence_type = ompt_dependence_type_source;
4291#endif
4292 for (i = 1; i < num_dims; ++i) {
4293 kmp_int64 iter, ln;
4294 size_t j = i * 4;
4295 ln = pr_buf->th_doacross_info[j + 1];
4296 lo = pr_buf->th_doacross_info[j + 2];
4297 st = pr_buf->th_doacross_info[j + 4];
4298 if (st == 1) {
4299 iter = vec[i] - lo;
4300 } else if (st > 0) {
4301 iter = (kmp_uint64)(vec[i] - lo) / st;
4302 } else { // st < 0
4303 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4304 }
4305 iter_number = iter + ln * iter_number;
4306#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
4307 deps[i].variable.value = iter;
4308 deps[i].dependence_type = ompt_dependence_type_source;
4309#endif
4310 }
4311#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
4312 if (ompt_enabled.ompt_callback_dependences) {
4313 ompt_callbacks.ompt_callback(ompt_callback_dependences)ompt_callback_dependences_callback(
4314 &(OMPT_CUR_TASK_INFO(th)(&(th->th.th_current_task->ompt_task_info))->task_data), deps, (kmp_uint32)num_dims);
4315 }
4316#endif
4317 shft = iter_number % 32; // use 32-bit granularity
4318 iter_number >>= 5; // divided by 32
4319 flag = 1 << shft;
4320 KMP_MB();
4321 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4322 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag)__sync_fetch_and_or((volatile kmp_uint32 *)(&pr_buf->th_doacross_flags
[iter_number]), (kmp_uint32)(flag))
;
4323 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n"
, gtid, (iter_number << 5) + shft); }
4324 (iter_number << 5) + shft))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n"
, gtid, (iter_number << 5) + shft); }
;
4325}
4326
4327void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4328 __kmp_assert_valid_gtid(gtid);
4329 kmp_int32 num_done;
4330 kmp_info_t *th = __kmp_threads[gtid];
4331 kmp_team_t *team = th->th.th_team;
4332 kmp_disp_t *pr_buf = th->th.th_dispatch;
4333
4334 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_fini() enter: called T#%d\n"
, gtid); }
;
4335 if (team->t.t_serialized) {
4336 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_fini() exit: serialized team %p\n"
, team); }
;
4337 return; // nothing to do
4338 }
4339 num_done =
4340 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1]))__sync_fetch_and_add((volatile kmp_int32 *)((kmp_uintptr_t)(pr_buf
->th_doacross_info[1])), 1)
+ 1;
4341 if (num_done == th->th.th_team_nproc) {
4342 // we are the last thread, need to free shared resources
4343 int idx = pr_buf->th_doacross_buf_idx - 1;
4344 dispatch_shared_info_t *sh_buf =
4345 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4346 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==if (!(pr_buf->th_doacross_info[1] == (kmp_int64)&sh_buf
->doacross_num_done)) { __kmp_debug_assert("pr_buf->th_doacross_info[1] == (kmp_int64)&sh_buf->doacross_num_done"
, "openmp/runtime/src/kmp_csupport.cpp", 4347); }
4347 (kmp_int64)&sh_buf->doacross_num_done)if (!(pr_buf->th_doacross_info[1] == (kmp_int64)&sh_buf
->doacross_num_done)) { __kmp_debug_assert("pr_buf->th_doacross_info[1] == (kmp_int64)&sh_buf->doacross_num_done"
, "openmp/runtime/src/kmp_csupport.cpp", 4347); }
;
4348 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done)if (!(num_done == sh_buf->doacross_num_done)) { __kmp_debug_assert
("num_done == sh_buf->doacross_num_done", "openmp/runtime/src/kmp_csupport.cpp"
, 4348); }
;
4349 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx)if (!(idx == sh_buf->doacross_buf_idx)) { __kmp_debug_assert
("idx == sh_buf->doacross_buf_idx", "openmp/runtime/src/kmp_csupport.cpp"
, 4349); }
;
4350 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags))___kmp_thread_free((th), (const_cast<kmp_uint32 *>(sh_buf
->doacross_flags)), "openmp/runtime/src/kmp_csupport.cpp",
4350)
;
4351 sh_buf->doacross_flags = NULL__null;
4352 sh_buf->doacross_num_done = 0;
4353 sh_buf->doacross_buf_idx +=
4354 __kmp_dispatch_num_buffers; // free buffer for future re-use
4355 }
4356 // free private resources (need to keep buffer index forever)
4357 pr_buf->th_doacross_flags = NULL__null;
4358 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info)___kmp_thread_free((th), ((void *)pr_buf->th_doacross_info
), "openmp/runtime/src/kmp_csupport.cpp", 4358)
;
4359 pr_buf->th_doacross_info = NULL__null;
4360 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_fini() exit: T#%d\n"
, gtid); }
;
4361}
4362
4363/* OpenMP 5.1 Memory Management routines */
4364void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4365 return __kmp_alloc(__kmp_entry_gtid()__kmp_get_global_thread_id_reg(), 0, size, allocator);
4366}
4367
4368void *omp_aligned_alloc(size_t align, size_t size,
4369 omp_allocator_handle_t allocator) {
4370 return __kmp_alloc(__kmp_entry_gtid()__kmp_get_global_thread_id_reg(), align, size, allocator);
4371}
4372
4373void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) {
4374 return __kmp_calloc(__kmp_entry_gtid()__kmp_get_global_thread_id_reg(), 0, nmemb, size, allocator);
4375}
4376
4377void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
4378 omp_allocator_handle_t allocator) {
4379 return __kmp_calloc(__kmp_entry_gtid()__kmp_get_global_thread_id_reg(), align, nmemb, size, allocator);
4380}
4381
4382void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
4383 omp_allocator_handle_t free_allocator) {
4384 return __kmp_realloc(__kmp_entry_gtid()__kmp_get_global_thread_id_reg(), ptr, size, allocator,
4385 free_allocator);
4386}
4387
4388void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4389 ___kmpc_free(__kmp_entry_gtid()__kmp_get_global_thread_id_reg(), ptr, allocator);
4390}
4391/* end of OpenMP 5.1 Memory Management routines */
4392
4393int __kmpc_get_target_offload(void) {
4394 if (!__kmp_init_serial) {
4395 __kmp_serial_initialize();
4396 }
4397 return __kmp_target_offload;
4398}
4399
4400int __kmpc_pause_resource(kmp_pause_status_t level) {
4401 if (!__kmp_init_serial) {
4402 return 1; // Can't pause if runtime is not initialized
4403 }
4404 return __kmp_pause_resource(level);
4405}
4406
4407void __kmpc_error(ident_t *loc, int severity, const char *message) {
4408 if (!__kmp_init_serial)
4409 __kmp_serial_initialize();
4410
4411 KMP_ASSERT(severity == severity_warning || severity == severity_fatal)if (!(severity == severity_warning || severity == severity_fatal
)) { __kmp_debug_assert("severity == severity_warning || severity == severity_fatal"
, "openmp/runtime/src/kmp_csupport.cpp", 4411); }
;
4412
4413#if OMPT_SUPPORT1
4414 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
4415 ompt_callbacks.ompt_callback(ompt_callback_error)ompt_callback_error_callback(
4416 (ompt_severity_t)severity, message, KMP_STRLENstrlen(message),
4417 OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
4418 }
4419#endif // OMPT_SUPPORT
4420
4421 char *src_loc;
4422 if (loc && loc->psource) {
4423 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
4424 src_loc =
4425 __kmp_str_format("%s:%s:%s", str_loc.file, str_loc.line, str_loc.col);
4426 __kmp_str_loc_free(&str_loc);
4427 } else {
4428 src_loc = __kmp_str_format("unknown");
4429 }
4430
4431 if (severity == severity_warning)
4432 KMP_WARNING(UserDirectedWarning, src_loc, message)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_UserDirectedWarning
, src_loc, message), __kmp_msg_null)
;
4433 else
4434 KMP_FATAL(UserDirectedError, src_loc, message)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_UserDirectedError, src_loc
, message), __kmp_msg_null)
;
4435
4436 __kmp_str_free(&src_loc);
4437}
4438
4439// Mark begin of scope directive.
4440void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4441// reserved is for extension of scope directive and not used.
4442#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
4443 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4444 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4445 int tid = __kmp_tid_from_gtid(gtid);
4446 ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback(
4447 ompt_work_scope, ompt_scope_begin,
4448 &(team->t.ompt_team_info.parallel_data),
4449 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4450 OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
4451 }
4452#endif // OMPT_SUPPORT && OMPT_OPTIONAL
4453}
4454
4455// Mark end of scope directive
4456void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4457// reserved is for extension of scope directive and not used.
4458#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
4459 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4460 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4461 int tid = __kmp_tid_from_gtid(gtid);
4462 ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback(
4463 ompt_work_scope, ompt_scope_end,
4464 &(team->t.ompt_team_info.parallel_data),
4465 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4466 OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
4467 }
4468#endif // OMPT_SUPPORT && OMPT_OPTIONAL
4469}
4470
4471#ifdef KMP_USE_VERSION_SYMBOLS
4472// For GOMP compatibility there are two versions of each omp_* API.
4473// One is the plain C symbol and one is the Fortran symbol with an appended
4474// underscore. When we implement a specific ompc_* version of an omp_*
4475// function, we want the plain GOMP versioned symbol to alias the ompc_* version
4476// instead of the Fortran versions in kmp_ftn_entry.h
4477extern "C" {
4478// Have to undef these from omp.h so they aren't translated into
4479// their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below
4480#ifdef omp_set_affinity_format
4481#undef omp_set_affinity_format
4482#endif
4483#ifdef omp_get_affinity_format
4484#undef omp_get_affinity_format
4485#endif
4486#ifdef omp_display_affinity
4487#undef omp_display_affinity
4488#endif
4489#ifdef omp_capture_affinity
4490#undef omp_capture_affinity
4491#endif
4492KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,__typeof__(__kmp_api_ompc_set_affinity_format) __kmp_api_ompc_set_affinity_format_50_alias
__attribute__((alias("__kmp_api_ompc_set_affinity_format")))
; __asm__(".symver " "__kmp_api_ompc_set_affinity_format" ","
"ompc_set_affinity_format" "@@" "VERSION" "\n\t"); __asm__( ".symver "
"__kmp_api_ompc_set_affinity_format_50_alias" "," "omp_set_affinity_format"
"@" "OMP_5.0" "\n\t")
4493 "OMP_5.0")__typeof__(__kmp_api_ompc_set_affinity_format) __kmp_api_ompc_set_affinity_format_50_alias
__attribute__((alias("__kmp_api_ompc_set_affinity_format")))
; __asm__(".symver " "__kmp_api_ompc_set_affinity_format" ","
"ompc_set_affinity_format" "@@" "VERSION" "\n\t"); __asm__( ".symver "
"__kmp_api_ompc_set_affinity_format_50_alias" "," "omp_set_affinity_format"
"@" "OMP_5.0" "\n\t")
;
4494KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,__typeof__(__kmp_api_ompc_get_affinity_format) __kmp_api_ompc_get_affinity_format_50_alias
__attribute__((alias("__kmp_api_ompc_get_affinity_format")))
; __asm__(".symver " "__kmp_api_ompc_get_affinity_format" ","
"ompc_get_affinity_format" "@@" "VERSION" "\n\t"); __asm__( ".symver "
"__kmp_api_ompc_get_affinity_format_50_alias" "," "omp_get_affinity_format"
"@" "OMP_5.0" "\n\t")
4495 "OMP_5.0")__typeof__(__kmp_api_ompc_get_affinity_format) __kmp_api_ompc_get_affinity_format_50_alias
__attribute__((alias("__kmp_api_ompc_get_affinity_format")))
; __asm__(".symver " "__kmp_api_ompc_get_affinity_format" ","
"ompc_get_affinity_format" "@@" "VERSION" "\n\t"); __asm__( ".symver "
"__kmp_api_ompc_get_affinity_format_50_alias" "," "omp_get_affinity_format"
"@" "OMP_5.0" "\n\t")
;
4496KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,__typeof__(__kmp_api_ompc_display_affinity) __kmp_api_ompc_display_affinity_50_alias
__attribute__((alias("__kmp_api_ompc_display_affinity"))); __asm__
(".symver " "__kmp_api_ompc_display_affinity" "," "ompc_display_affinity"
"@@" "VERSION" "\n\t"); __asm__( ".symver " "__kmp_api_ompc_display_affinity_50_alias"
"," "omp_display_affinity" "@" "OMP_5.0" "\n\t")
4497 "OMP_5.0")__typeof__(__kmp_api_ompc_display_affinity) __kmp_api_ompc_display_affinity_50_alias
__attribute__((alias("__kmp_api_ompc_display_affinity"))); __asm__
(".symver " "__kmp_api_ompc_display_affinity" "," "ompc_display_affinity"
"@@" "VERSION" "\n\t"); __asm__( ".symver " "__kmp_api_ompc_display_affinity_50_alias"
"," "omp_display_affinity" "@" "OMP_5.0" "\n\t")
;
4498KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,__typeof__(__kmp_api_ompc_capture_affinity) __kmp_api_ompc_capture_affinity_50_alias
__attribute__((alias("__kmp_api_ompc_capture_affinity"))); __asm__
(".symver " "__kmp_api_ompc_capture_affinity" "," "ompc_capture_affinity"
"@@" "VERSION" "\n\t"); __asm__( ".symver " "__kmp_api_ompc_capture_affinity_50_alias"
"," "omp_capture_affinity" "@" "OMP_5.0" "\n\t")
4499 "OMP_5.0")__typeof__(__kmp_api_ompc_capture_affinity) __kmp_api_ompc_capture_affinity_50_alias
__attribute__((alias("__kmp_api_ompc_capture_affinity"))); __asm__
(".symver " "__kmp_api_ompc_capture_affinity" "," "ompc_capture_affinity"
"@@" "VERSION" "\n\t"); __asm__( ".symver " "__kmp_api_ompc_capture_affinity_50_alias"
"," "omp_capture_affinity" "@" "OMP_5.0" "\n\t")
;
4500} // extern "C"
4501#endif