Bug Summary

File:build/source/openmp/runtime/src/kmp_csupport.cpp
Warning:line 3414, column 11
Access to field 'lock' results in a dereference of a null pointer (loaded from variable 'ilk')

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name kmp_csupport.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-17/lib/clang/17 -D _DEBUG -D _GLIBCXX_ASSERTIONS -D _GNU_SOURCE -D _LIBCPP_ENABLE_ASSERTIONS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D omp_EXPORTS -I projects/openmp/runtime/src -I /build/source/openmp/runtime/src -I include -I /build/source/llvm/include -I /build/source/openmp/runtime/src/i18n -I /build/source/openmp/runtime/src/include -I /build/source/openmp/runtime/src/thirdparty/ittnotify -D _FORTIFY_SOURCE=2 -D NDEBUG -D _GNU_SOURCE -D _REENTRANT -D _FORTIFY_SOURCE=2 -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-17/lib/clang/17/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/source/= -source-date-epoch 1683717183 -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -Wno-extra -Wno-pedantic -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-frame-address -Wno-strict-aliasing -Wno-stringop-truncation -Wno-switch -Wno-uninitialized -Wno-cast-qual -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fno-rtti -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2023-05-10-133810-16478-1 -x c++ /build/source/openmp/runtime/src/kmp_csupport.cpp
1/*
2 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#define __KMP_IMP
14#include "omp.h" /* extern "C" declarations of user-visible routines */
15#include "kmp.h"
16#include "kmp_error.h"
17#include "kmp_i18n.h"
18#include "kmp_itt.h"
19#include "kmp_lock.h"
20#include "kmp_stats.h"
21#include "ompt-specific.h"
22
23#define MAX_MESSAGE512 512
24
25// flags will be used in future, e.g. to implement openmp_strict library
26// restrictions
27
28/*!
29 * @ingroup STARTUP_SHUTDOWN
30 * @param loc in source location information
31 * @param flags in for future use (currently ignored)
32 *
33 * Initialize the runtime library. This call is optional; if it is not made then
34 * it will be implicitly called by attempts to use other library functions.
35 */
36void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
37 // By default __kmpc_begin() is no-op.
38 char *env;
39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL__null &&
40 __kmp_str_match_true(env)) {
41 __kmp_middle_initialize();
42 __kmp_assign_root_init_mask();
43 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_begin: middle initialization called\n"
); }
;
44 } else if (__kmp_ignore_mppbeg() == FALSE0) {
45 // By default __kmp_ignore_mppbeg() returns TRUE.
46 __kmp_internal_begin();
47 KC_TRACE(10, ("__kmpc_begin: called\n"))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_begin: called\n"
); }
;
48 }
49}
50
51/*!
52 * @ingroup STARTUP_SHUTDOWN
53 * @param loc source location information
54 *
55 * Shutdown the runtime library. This is also optional, and even if called will
56 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to
57 * zero.
58 */
59void __kmpc_end(ident_t *loc) {
60 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
61 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
62 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
63 // returns FALSE and __kmpc_end() will unregister this root (it can cause
64 // library shut down).
65 if (__kmp_ignore_mppend() == FALSE0) {
66 KC_TRACE(10, ("__kmpc_end: called\n"))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_end: called\n"
); }
;
67 KA_TRACE(30, ("__kmpc_end\n"))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmpc_end\n"
); }
;
68
69 __kmp_internal_end_thread(-1);
70 }
71#if KMP_OS_WINDOWS0 && OMPT_SUPPORT1
72 // Normal exit process on Windows does not allow worker threads of the final
73 // parallel region to finish reporting their events, so shutting down the
74 // library here fixes the issue at least for the cases where __kmpc_end() is
75 // placed properly.
76 if (ompt_enabled.enabled)
77 __kmp_internal_end_library(__kmp_gtid_get_specific());
78#endif
79}
80
81/*!
82@ingroup THREAD_STATES
83@param loc Source location information.
84@return The global thread index of the active thread.
85
86This function can be called in any context.
87
88If the runtime has ony been entered at the outermost level from a
89single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is
90that which would be returned by omp_get_thread_num() in the outermost
91active parallel construct. (Or zero if there is no active parallel
92construct, since the primary thread is necessarily thread zero).
93
94If multiple non-OpenMP threads all enter an OpenMP construct then this
95will be a unique thread identifier among all the threads created by
96the OpenMP runtime (but the value cannot be defined in terms of
97OpenMP thread ids returned by omp_get_thread_num()).
98*/
99kmp_int32 __kmpc_global_thread_num(ident_t *loc) {
100 kmp_int32 gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
101
102 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_global_thread_num: T#%d\n"
, gtid); }
;
103
104 return gtid;
105}
106
107/*!
108@ingroup THREAD_STATES
109@param loc Source location information.
110@return The number of threads under control of the OpenMP<sup>*</sup> runtime
111
112This function can be called in any context.
113It returns the total number of threads under the control of the OpenMP runtime.
114That is not a number that can be determined by any OpenMP standard calls, since
115the library may be called from more than one non-OpenMP thread, and this
116reflects the total over all such calls. Similarly the runtime maintains
117underlying threads even when they are not active (since the cost of creating
118and destroying OS threads is high), this call counts all such threads even if
119they are not waiting for work.
120*/
121kmp_int32 __kmpc_global_num_threads(ident_t *loc) {
122 KC_TRACE(10,if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_global_num_threads: num_threads = %d\n"
, __kmp_all_nth); }
123 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_global_num_threads: num_threads = %d\n"
, __kmp_all_nth); }
;
124
125 return TCR_4(__kmp_all_nth)(__kmp_all_nth);
126}
127
128/*!
129@ingroup THREAD_STATES
130@param loc Source location information.
131@return The thread number of the calling thread in the innermost active parallel
132construct.
133*/
134kmp_int32 __kmpc_bound_thread_num(ident_t *loc) {
135 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_bound_thread_num: called\n"
); }
;
136 return __kmp_tid_from_gtid(__kmp_entry_gtid()__kmp_get_global_thread_id_reg());
137}
138
139/*!
140@ingroup THREAD_STATES
141@param loc Source location information.
142@return The number of threads in the innermost active parallel construct.
143*/
144kmp_int32 __kmpc_bound_num_threads(ident_t *loc) {
145 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_bound_num_threads: called\n"
); }
;
146
147 return __kmp_entry_thread()->th.th_team->t.t_nproc;
148}
149
150/*!
151 * @ingroup DEPRECATED
152 * @param loc location description
153 *
154 * This function need not be called. It always returns TRUE.
155 */
156kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
157#ifndef KMP_DEBUG1
158
159 return TRUE(!0);
160
161#else
162
163 const char *semi2;
164 const char *semi3;
165 int line_no;
166
167 if (__kmp_par_range == 0) {
168 return TRUE(!0);
169 }
170 semi2 = loc->psource;
171 if (semi2 == NULL__null) {
172 return TRUE(!0);
173 }
174 semi2 = strchr(semi2, ';');
175 if (semi2 == NULL__null) {
176 return TRUE(!0);
177 }
178 semi2 = strchr(semi2 + 1, ';');
179 if (semi2 == NULL__null) {
180 return TRUE(!0);
181 }
182 if (__kmp_par_range_filename[0]) {
183 const char *name = semi2 - 1;
184 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
185 name--;
186 }
187 if ((*name == '/') || (*name == ';')) {
188 name++;
189 }
190 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
191 return __kmp_par_range < 0;
192 }
193 }
194 semi3 = strchr(semi2 + 1, ';');
195 if (__kmp_par_range_routine[0]) {
196 if ((semi3 != NULL__null) && (semi3 > semi2) &&
197 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
198 return __kmp_par_range < 0;
199 }
200 }
201 if (KMP_SSCANFsscanf(semi3 + 1, "%d", &line_no) == 1) {
202 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
203 return __kmp_par_range > 0;
204 }
205 return __kmp_par_range < 0;
206 }
207 return TRUE(!0);
208
209#endif /* KMP_DEBUG */
210}
211
212/*!
213@ingroup THREAD_STATES
214@param loc Source location information.
215@return 1 if this thread is executing inside an active parallel region, zero if
216not.
217*/
218kmp_int32 __kmpc_in_parallel(ident_t *loc) {
219 return __kmp_entry_thread()->th.th_root->r.r_active;
220}
221
222/*!
223@ingroup PARALLEL
224@param loc source location information
225@param global_tid global thread number
226@param num_threads number of threads requested for this parallel construct
227
228Set the number of threads to be used by the next fork spawned by this thread.
229This call is only required if the parallel construct has a `num_threads` clause.
230*/
231void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
232 kmp_int32 num_threads) {
233 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n"
, global_tid, num_threads); }
234 global_tid, num_threads))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n"
, global_tid, num_threads); }
;
235 __kmp_assert_valid_gtid(global_tid);
236 __kmp_push_num_threads(loc, global_tid, num_threads);
237}
238
239void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
240 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_pop_num_threads: enter\n"
); }
;
241 /* the num_threads are automatically popped */
242}
243
244void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
245 kmp_int32 proc_bind) {
246 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n"
, global_tid, proc_bind); }
247 proc_bind))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n"
, global_tid, proc_bind); }
;
248 __kmp_assert_valid_gtid(global_tid);
249 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
250}
251
252/*!
253@ingroup PARALLEL
254@param loc source location information
255@param argc total number of arguments in the ellipsis
256@param microtask pointer to callback routine consisting of outlined parallel
257construct
258@param ... pointers to shared variables that aren't global
259
260Do the actual fork and call the microtask in the relevant number of threads.
261*/
262void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
263 int gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
264
265#if (KMP_STATS_ENABLED0)
266 // If we were in a serial region, then stop the serial timer, record
267 // the event, and start parallel region timer
268 stats_state_e previous_state = KMP_GET_THREAD_STATE()((void)0);
269 if (previous_state == stats_state_e::SERIAL_REGION) {
270 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
271 } else {
272 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead)((void)0);
273 }
274 int inParallel = __kmpc_in_parallel(loc);
275 if (inParallel) {
276 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL)((void)0);
277 } else {
278 KMP_COUNT_BLOCK(OMP_PARALLEL)((void)0);
279 }
280#endif
281
282 // maybe to save thr_state is enough here
283 {
284 va_list ap;
285 va_start(ap, microtask)__builtin_va_start(ap, microtask);
286
287#if OMPT_SUPPORT1
288 ompt_frame_t *ompt_frame;
289 if (ompt_enabled.enabled) {
290 kmp_info_t *master_th = __kmp_threads[gtid];
291 ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
292 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
293 }
294 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
295#endif
296
297#if INCLUDE_SSC_MARKS(1 && 1)
298 SSC_MARK_FORKING()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0xd693) : "%ebx")
;
299#endif
300 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
301 VOLATILE_CAST(microtask_t)(microtask_t) microtask, // "wrapped" task
302 VOLATILE_CAST(launch_t)(launch_t) __kmp_invoke_task_func,
303 kmp_va_addr_of(ap)(&(ap)));
304#if INCLUDE_SSC_MARKS(1 && 1)
305 SSC_MARK_JOINING()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0xd694) : "%ebx")
;
306#endif
307 __kmp_join_call(loc, gtid
308#if OMPT_SUPPORT1
309 ,
310 fork_context_intel
311#endif
312 );
313
314 va_end(ap)__builtin_va_end(ap);
315
316#if OMPT_SUPPORT1
317 if (ompt_enabled.enabled) {
318 ompt_frame->enter_frame = ompt_data_none{0};
319 }
320#endif
321 }
322
323#if KMP_STATS_ENABLED0
324 if (previous_state == stats_state_e::SERIAL_REGION) {
325 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
326 KMP_SET_THREAD_STATE(previous_state)((void)0);
327 } else {
328 KMP_POP_PARTITIONED_TIMER()((void)0);
329 }
330#endif // KMP_STATS_ENABLED
331}
332
333/*!
334@ingroup PARALLEL
335@param loc source location information
336@param microtask pointer to callback routine consisting of outlined parallel
337construct
338@param cond condition for running in parallel
339@param args struct of pointers to shared variables that aren't global
340
341Perform a fork only if the condition is true.
342*/
343void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
344 kmp_int32 cond, void *args) {
345 int gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
346 int zero = 0;
347 if (cond) {
348 if (args)
349 __kmpc_fork_call(loc, argc, microtask, args);
350 else
351 __kmpc_fork_call(loc, argc, microtask);
352 } else {
353 __kmpc_serialized_parallel(loc, gtid);
354
355 if (args)
356 microtask(&gtid, &zero, args);
357 else
358 microtask(&gtid, &zero);
359
360 __kmpc_end_serialized_parallel(loc, gtid);
361 }
362}
363
364/*!
365@ingroup PARALLEL
366@param loc source location information
367@param global_tid global thread number
368@param num_teams number of teams requested for the teams construct
369@param num_threads number of threads per team requested for the teams construct
370
371Set the number of teams to be used by the teams construct.
372This call is only required if the teams construct has a `num_teams` clause
373or a `thread_limit` clause (or both).
374*/
375void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
376 kmp_int32 num_teams, kmp_int32 num_threads) {
377 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n"
, global_tid, num_teams, num_threads); }
378 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n"
, global_tid, num_teams, num_threads); }
379 global_tid, num_teams, num_threads))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n"
, global_tid, num_teams, num_threads); }
;
380 __kmp_assert_valid_gtid(global_tid);
381 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
382}
383
384/*!
385@ingroup PARALLEL
386@param loc source location information
387@param global_tid global thread number
388@param num_teams_lb lower bound on number of teams requested for the teams
389construct
390@param num_teams_ub upper bound on number of teams requested for the teams
391construct
392@param num_threads number of threads per team requested for the teams construct
393
394Set the number of teams to be used by the teams construct. The number of initial
395teams cretaed will be greater than or equal to the lower bound and less than or
396equal to the upper bound.
397This call is only required if the teams construct has a `num_teams` clause
398or a `thread_limit` clause (or both).
399*/
400void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
401 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
402 kmp_int32 num_threads) {
403 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
" num_teams_ub=%d num_threads=%d\n", global_tid, num_teams_lb
, num_teams_ub, num_threads); }
404 " num_teams_ub=%d num_threads=%d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
" num_teams_ub=%d num_threads=%d\n", global_tid, num_teams_lb
, num_teams_ub, num_threads); }
405 global_tid, num_teams_lb, num_teams_ub, num_threads))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
" num_teams_ub=%d num_threads=%d\n", global_tid, num_teams_lb
, num_teams_ub, num_threads); }
;
406 __kmp_assert_valid_gtid(global_tid);
407 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
408 num_threads);
409}
410
411/*!
412@ingroup PARALLEL
413@param loc source location information
414@param argc total number of arguments in the ellipsis
415@param microtask pointer to callback routine consisting of outlined teams
416construct
417@param ... pointers to shared variables that aren't global
418
419Do the actual fork and call the microtask in the relevant number of threads.
420*/
421void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
422 ...) {
423 int gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
424 kmp_info_t *this_thr = __kmp_threads[gtid];
425 va_list ap;
426 va_start(ap, microtask)__builtin_va_start(ap, microtask);
427
428#if KMP_STATS_ENABLED0
429 KMP_COUNT_BLOCK(OMP_TEAMS)((void)0);
430 stats_state_e previous_state = KMP_GET_THREAD_STATE()((void)0);
431 if (previous_state == stats_state_e::SERIAL_REGION) {
432 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
433 } else {
434 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead)((void)0);
435 }
436#endif
437
438 // remember teams entry point and nesting level
439 this_thr->th.th_teams_microtask = microtask;
440 this_thr->th.th_teams_level =
441 this_thr->th.th_team->t.t_level; // AC: can be >0 on host
442
443#if OMPT_SUPPORT1
444 kmp_team_t *parent_team = this_thr->th.th_team;
445 int tid = __kmp_tid_from_gtid(gtid);
446 if (ompt_enabled.enabled) {
447 parent_team->t.t_implicit_task_taskdata[tid]
448 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
449 }
450 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
451#endif
452
453 // check if __kmpc_push_num_teams called, set default number of teams
454 // otherwise
455 if (this_thr->th.th_teams_size.nteams == 0) {
456 __kmp_push_num_teams(loc, gtid, 0, 0);
457 }
458 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1)if (!(this_thr->th.th_set_nproc >= 1)) { __kmp_debug_assert
("this_thr->th.th_set_nproc >= 1", "openmp/runtime/src/kmp_csupport.cpp"
, 458); }
;
459 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1)if (!(this_thr->th.th_teams_size.nteams >= 1)) { __kmp_debug_assert
("this_thr->th.th_teams_size.nteams >= 1", "openmp/runtime/src/kmp_csupport.cpp"
, 459); }
;
460 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1)if (!(this_thr->th.th_teams_size.nth >= 1)) { __kmp_debug_assert
("this_thr->th.th_teams_size.nth >= 1", "openmp/runtime/src/kmp_csupport.cpp"
, 460); }
;
461
462 __kmp_fork_call(
463 loc, gtid, fork_context_intel, argc,
464 VOLATILE_CAST(microtask_t)(microtask_t) __kmp_teams_master, // "wrapped" task
465 VOLATILE_CAST(launch_t)(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap)(&(ap)));
466 __kmp_join_call(loc, gtid
467#if OMPT_SUPPORT1
468 ,
469 fork_context_intel
470#endif
471 );
472
473 // Pop current CG root off list
474 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots)if (!(this_thr->th.th_cg_roots)) { __kmp_debug_assert("this_thr->th.th_cg_roots"
, "openmp/runtime/src/kmp_csupport.cpp", 474); }
;
475 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
476 this_thr->th.th_cg_roots = tmp->up;
477 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
" to node %p. cg_nthreads was %d\n", this_thr, tmp, this_thr
->th.th_cg_roots, tmp->cg_nthreads); }
478 " to node %p. cg_nthreads was %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
" to node %p. cg_nthreads was %d\n", this_thr, tmp, this_thr
->th.th_cg_roots, tmp->cg_nthreads); }
479 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
" to node %p. cg_nthreads was %d\n", this_thr, tmp, this_thr
->th.th_cg_roots, tmp->cg_nthreads); }
;
480 KMP_DEBUG_ASSERT(tmp->cg_nthreads)if (!(tmp->cg_nthreads)) { __kmp_debug_assert("tmp->cg_nthreads"
, "openmp/runtime/src/kmp_csupport.cpp", 480); }
;
481 int i = tmp->cg_nthreads--;
482 if (i == 1) { // check is we are the last thread in CG (not always the case)
483 __kmp_free(tmp)___kmp_free((tmp), "openmp/runtime/src/kmp_csupport.cpp", 483
)
;
484 }
485 // Restore current task's thread_limit from CG root
486 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots)if (!(this_thr->th.th_cg_roots)) { __kmp_debug_assert("this_thr->th.th_cg_roots"
, "openmp/runtime/src/kmp_csupport.cpp", 486); }
;
487 this_thr->th.th_current_task->td_icvs.thread_limit =
488 this_thr->th.th_cg_roots->cg_thread_limit;
489
490 this_thr->th.th_teams_microtask = NULL__null;
491 this_thr->th.th_teams_level = 0;
492 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
493 va_end(ap)__builtin_va_end(ap);
494#if KMP_STATS_ENABLED0
495 if (previous_state == stats_state_e::SERIAL_REGION) {
496 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
497 KMP_SET_THREAD_STATE(previous_state)((void)0);
498 } else {
499 KMP_POP_PARTITIONED_TIMER()((void)0);
500 }
501#endif // KMP_STATS_ENABLED
502}
503
504// I don't think this function should ever have been exported.
505// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
506// openmp code ever called it, but it's been exported from the RTL for so
507// long that I'm afraid to remove the definition.
508int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
509
510/*!
511@ingroup PARALLEL
512@param loc source location information
513@param global_tid global thread number
514
515Enter a serialized parallel construct. This interface is used to handle a
516conditional parallel region, like this,
517@code
518#pragma omp parallel if (condition)
519@endcode
520when the condition is false.
521*/
522void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
523 // The implementation is now in kmp_runtime.cpp so that it can share static
524 // functions with kmp_fork_call since the tasks to be done are similar in
525 // each case.
526 __kmp_assert_valid_gtid(global_tid);
527#if OMPT_SUPPORT1
528 OMPT_STORE_RETURN_ADDRESS(global_tid)OmptReturnAddressGuard ReturnAddressGuard{global_tid, __builtin_return_address
(0)};
;
529#endif
530 __kmp_serialized_parallel(loc, global_tid);
531}
532
533/*!
534@ingroup PARALLEL
535@param loc source location information
536@param global_tid global thread number
537
538Leave a serialized parallel construct.
539*/
540void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
541 kmp_internal_control_t *top;
542 kmp_info_t *this_thr;
543 kmp_team_t *serial_team;
544
545 KC_TRACE(10,if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_end_serialized_parallel: called by T#%d\n"
, global_tid); }
546 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_end_serialized_parallel: called by T#%d\n"
, global_tid); }
;
547
548 /* skip all this code for autopar serialized loops since it results in
549 unacceptable overhead */
550 if (loc != NULL__null && (loc->flags & KMP_IDENT_AUTOPAR))
551 return;
552
553 // Not autopar code
554 __kmp_assert_valid_gtid(global_tid);
555 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
556 __kmp_parallel_initialize();
557
558 __kmp_resume_if_soft_paused();
559
560 this_thr = __kmp_threads[global_tid];
561 serial_team = this_thr->th.th_serial_team;
562
563 kmp_task_team_t *task_team = this_thr->th.th_task_team;
564 // we need to wait for the proxy tasks before finishing the thread
565 if (task_team != NULL__null && (task_team->tt.tt_found_proxy_tasks ||
566 task_team->tt.tt_hidden_helper_task_encountered))
567 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL), __null);
568
569 KMP_MB();
570 KMP_DEBUG_ASSERT(serial_team)if (!(serial_team)) { __kmp_debug_assert("serial_team", "openmp/runtime/src/kmp_csupport.cpp"
, 570); }
;
571 KMP_ASSERT(serial_team->t.t_serialized)if (!(serial_team->t.t_serialized)) { __kmp_debug_assert("serial_team->t.t_serialized"
, "openmp/runtime/src/kmp_csupport.cpp", 571); }
;
572 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team)if (!(this_thr->th.th_team == serial_team)) { __kmp_debug_assert
("this_thr->th.th_team == serial_team", "openmp/runtime/src/kmp_csupport.cpp"
, 572); }
;
573 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team)if (!(serial_team != this_thr->th.th_root->r.r_root_team
)) { __kmp_debug_assert("serial_team != this_thr->th.th_root->r.r_root_team"
, "openmp/runtime/src/kmp_csupport.cpp", 573); }
;
574 KMP_DEBUG_ASSERT(serial_team->t.t_threads)if (!(serial_team->t.t_threads)) { __kmp_debug_assert("serial_team->t.t_threads"
, "openmp/runtime/src/kmp_csupport.cpp", 574); }
;
575 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr)if (!(serial_team->t.t_threads[0] == this_thr)) { __kmp_debug_assert
("serial_team->t.t_threads[0] == this_thr", "openmp/runtime/src/kmp_csupport.cpp"
, 575); }
;
576
577#if OMPT_SUPPORT1
578 if (ompt_enabled.enabled &&
579 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
580 OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info))->frame.exit_frame = ompt_data_none{0};
581 if (ompt_enabled.ompt_callback_implicit_task) {
582 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
583 ompt_scope_end, NULL__null, OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data
))
, 1,
584 OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info))->thread_num, ompt_task_implicit);
585 }
586
587 // reset clear the task id only after unlinking the task
588 ompt_data_t *parent_task_data;
589 __ompt_get_task_info_internal(1, NULL__null, &parent_task_data, NULL__null, NULL__null, NULL__null);
590
591 if (ompt_enabled.ompt_callback_parallel_end) {
592 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)ompt_callback_parallel_end_callback(
593 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
594 ompt_parallel_invoker_program | ompt_parallel_team,
595 OMPT_LOAD_RETURN_ADDRESS(global_tid)__ompt_load_return_address(global_tid));
596 }
597 __ompt_lw_taskteam_unlink(this_thr);
598 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
599 }
600#endif
601
602 /* If necessary, pop the internal control stack values and replace the team
603 * values */
604 top = serial_team->t.t_control_stack_top;
605 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
606 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
607 serial_team->t.t_control_stack_top = top->next;
608 __kmp_free(top)___kmp_free((top), "openmp/runtime/src/kmp_csupport.cpp", 608
)
;
609 }
610
611 /* pop dispatch buffers stack */
612 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer)if (!(serial_team->t.t_dispatch->th_disp_buffer)) { __kmp_debug_assert
("serial_team->t.t_dispatch->th_disp_buffer", "openmp/runtime/src/kmp_csupport.cpp"
, 612); }
;
613 {
614 dispatch_private_info_t *disp_buffer =
615 serial_team->t.t_dispatch->th_disp_buffer;
616 serial_team->t.t_dispatch->th_disp_buffer =
617 serial_team->t.t_dispatch->th_disp_buffer->next;
618 __kmp_free(disp_buffer)___kmp_free((disp_buffer), "openmp/runtime/src/kmp_csupport.cpp"
, 618)
;
619 }
620 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
621
622 --serial_team->t.t_serialized;
623 if (serial_team->t.t_serialized == 0) {
624
625 /* return to the parallel section */
626
627#if KMP_ARCH_X860 || KMP_ARCH_X86_641
628 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
629 __kmp_clear_x87_fpu_status_word();
630 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
631 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
632 }
633#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
634
635 __kmp_pop_current_task_from_thread(this_thr);
636#if OMPD_SUPPORT1
637 if (ompd_state & OMPD_ENABLE_BP0x1)
638 ompd_bp_parallel_end();
639#endif
640
641 this_thr->th.th_team = serial_team->t.t_parent;
642 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
643
644 /* restore values cached in the thread */
645 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
646 this_thr->th.th_team_master =
647 serial_team->t.t_parent->t.t_threads[0]; /* JPH */
648 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
649
650 /* TODO the below shouldn't need to be adjusted for serialized teams */
651 this_thr->th.th_dispatch =
652 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
653
654 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0)if (!(this_thr->th.th_current_task->td_flags.executing ==
0)) { __kmp_debug_assert("this_thr->th.th_current_task->td_flags.executing == 0"
, "openmp/runtime/src/kmp_csupport.cpp", 654); }
;
655 this_thr->th.th_current_task->td_flags.executing = 1;
656
657 if (__kmp_tasking_mode != tskm_immediate_exec) {
658 // Copy the task team from the new child / old parent team to the thread.
659 this_thr->th.th_task_team =
660 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
661 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
"team %p\n", global_tid, this_thr->th.th_task_team, this_thr
->th.th_team); }
662 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
"team %p\n", global_tid, this_thr->th.th_task_team, this_thr
->th.th_team); }
663 "team %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
"team %p\n", global_tid, this_thr->th.th_task_team, this_thr
->th.th_team); }
664 global_tid, this_thr->th.th_task_team, this_thr->th.th_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
"team %p\n", global_tid, this_thr->th.th_task_team, this_thr
->th.th_team); }
;
665 }
666#if KMP_AFFINITY_SUPPORTED1
667 if (this_thr->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
668 __kmp_reset_root_init_mask(global_tid);
669 }
670#endif
671 } else {
672 if (__kmp_tasking_mode != tskm_immediate_exec) {
673 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
"depth of serial team %p to %d\n", global_tid, serial_team, serial_team
->t.t_serialized); }
674 "depth of serial team %p to %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
"depth of serial team %p to %d\n", global_tid, serial_team, serial_team
->t.t_serialized); }
675 global_tid, serial_team, serial_team->t.t_serialized))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
"depth of serial team %p to %d\n", global_tid, serial_team, serial_team
->t.t_serialized); }
;
676 }
677 }
678
679 serial_team->t.t_level--;
680 if (__kmp_env_consistency_check)
681 __kmp_pop_parallel(global_tid, NULL__null);
682#if OMPT_SUPPORT1
683 if (ompt_enabled.enabled)
684 this_thr->th.ompt_thread_info.state =
685 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
686 : ompt_state_work_parallel);
687#endif
688}
689
690/*!
691@ingroup SYNCHRONIZATION
692@param loc source location information.
693
694Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
695depending on the memory ordering convention obeyed by the compiler
696even that may not be necessary).
697*/
698void __kmpc_flush(ident_t *loc) {
699 KC_TRACE(10, ("__kmpc_flush: called\n"))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_flush: called\n"
); }
;
700
701 /* need explicit __mf() here since use volatile instead in library */
702 KMP_MFENCE()if (__builtin_expect(!!(!__kmp_cpuinfo.initialized), 0)) { __kmp_query_cpuid
(&__kmp_cpuinfo); } if (__kmp_cpuinfo.flags.sse2) { __sync_synchronize
(); }
; /* Flush all pending memory write invalidates. */
703
704#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
705 if (ompt_enabled.ompt_callback_flush) {
706 ompt_callbacks.ompt_callback(ompt_callback_flush)ompt_callback_flush_callback(
707 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
708 }
709#endif
710}
711
712/* -------------------------------------------------------------------------- */
713/*!
714@ingroup SYNCHRONIZATION
715@param loc source location information
716@param global_tid thread id.
717
718Execute a barrier.
719*/
720void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
721 KMP_COUNT_BLOCK(OMP_BARRIER)((void)0);
722 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_barrier: called T#%d\n"
, global_tid); }
;
723 __kmp_assert_valid_gtid(global_tid);
724
725 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
726 __kmp_parallel_initialize();
727
728 __kmp_resume_if_soft_paused();
729
730 if (__kmp_env_consistency_check) {
731 if (loc == 0) {
732 KMP_WARNING(ConstructIdentInvalid)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_ConstructIdentInvalid
), __kmp_msg_null)
; // ??? What does it mean for the user?
733 }
734 __kmp_check_barrier(global_tid, ct_barrier, loc);
735 }
736
737#if OMPT_SUPPORT1
738 ompt_frame_t *ompt_frame;
739 if (ompt_enabled.enabled) {
740 __ompt_get_task_info_internal(0, NULL__null, NULL__null, &ompt_frame, NULL__null, NULL__null);
741 if (ompt_frame->enter_frame.ptr == NULL__null)
742 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
743 }
744 OMPT_STORE_RETURN_ADDRESS(global_tid)OmptReturnAddressGuard ReturnAddressGuard{global_tid, __builtin_return_address
(0)};
;
745#endif
746 __kmp_threads[global_tid]->th.th_ident = loc;
747 // TODO: explicit barrier_wait_id:
748 // this function is called when 'barrier' directive is present or
749 // implicit barrier at the end of a worksharing construct.
750 // 1) better to add a per-thread barrier counter to a thread data structure
751 // 2) set to 0 when a new team is created
752 // 4) no sync is required
753
754 __kmp_barrier(bs_plain_barrier, global_tid, FALSE0, 0, NULL__null, NULL__null);
755#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
756 if (ompt_enabled.enabled) {
757 ompt_frame->enter_frame = ompt_data_none{0};
758 }
759#endif
760}
761
762/* The BARRIER for a MASTER section is always explicit */
763/*!
764@ingroup WORK_SHARING
765@param loc source location information.
766@param global_tid global thread number .
767@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
768*/
769kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
770 int status = 0;
771
772 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_master: called T#%d\n"
, global_tid); }
;
773 __kmp_assert_valid_gtid(global_tid);
774
775 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
776 __kmp_parallel_initialize();
777
778 __kmp_resume_if_soft_paused();
779
780 if (KMP_MASTER_GTID(global_tid)(0 == __kmp_tid_from_gtid((global_tid)))) {
781 KMP_COUNT_BLOCK(OMP_MASTER)((void)0);
782 KMP_PUSH_PARTITIONED_TIMER(OMP_master)((void)0);
783 status = 1;
784 }
785
786#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
787 if (status) {
788 if (ompt_enabled.ompt_callback_masked) {
789 kmp_info_t *this_thr = __kmp_threads[global_tid];
790 kmp_team_t *team = this_thr->th.th_team;
791
792 int tid = __kmp_tid_from_gtid(global_tid);
793 ompt_callbacks.ompt_callback(ompt_callback_masked)ompt_callback_masked_callback(
794 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
795 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
796 OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
797 }
798 }
799#endif
800
801 if (__kmp_env_consistency_check) {
802#if KMP_USE_DYNAMIC_LOCK1
803 if (status)
804 __kmp_push_sync(global_tid, ct_master, loc, NULL__null, 0);
805 else
806 __kmp_check_sync(global_tid, ct_master, loc, NULL__null, 0);
807#else
808 if (status)
809 __kmp_push_sync(global_tid, ct_master, loc, NULL__null);
810 else
811 __kmp_check_sync(global_tid, ct_master, loc, NULL__null);
812#endif
813 }
814
815 return status;
816}
817
818/*!
819@ingroup WORK_SHARING
820@param loc source location information.
821@param global_tid global thread number .
822
823Mark the end of a <tt>master</tt> region. This should only be called by the
824thread that executes the <tt>master</tt> region.
825*/
826void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
827 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_end_master: called T#%d\n"
, global_tid); }
;
828 __kmp_assert_valid_gtid(global_tid);
829 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid))if (!((0 == __kmp_tid_from_gtid((global_tid))))) { __kmp_debug_assert
("(0 == __kmp_tid_from_gtid((global_tid)))", "openmp/runtime/src/kmp_csupport.cpp"
, 829); }
;
830 KMP_POP_PARTITIONED_TIMER()((void)0);
831
832#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
833 kmp_info_t *this_thr = __kmp_threads[global_tid];
834 kmp_team_t *team = this_thr->th.th_team;
835 if (ompt_enabled.ompt_callback_masked) {
836 int tid = __kmp_tid_from_gtid(global_tid);
837 ompt_callbacks.ompt_callback(ompt_callback_masked)ompt_callback_masked_callback(
838 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
839 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
840 OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
841 }
842#endif
843
844 if (__kmp_env_consistency_check) {
845 if (KMP_MASTER_GTID(global_tid)(0 == __kmp_tid_from_gtid((global_tid))))
846 __kmp_pop_sync(global_tid, ct_master, loc);
847 }
848}
849
850/*!
851@ingroup WORK_SHARING
852@param loc source location information.
853@param global_tid global thread number.
854@param filter result of evaluating filter clause on thread global_tid, or zero
855if no filter clause present
856@return 1 if this thread should execute the <tt>masked</tt> block, 0 otherwise.
857*/
858kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) {
859 int status = 0;
860 int tid;
861 KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_masked: called T#%d\n"
, global_tid); }
;
862 __kmp_assert_valid_gtid(global_tid);
863
864 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
865 __kmp_parallel_initialize();
866
867 __kmp_resume_if_soft_paused();
868
869 tid = __kmp_tid_from_gtid(global_tid);
870 if (tid == filter) {
871 KMP_COUNT_BLOCK(OMP_MASKED)((void)0);
872 KMP_PUSH_PARTITIONED_TIMER(OMP_masked)((void)0);
873 status = 1;
874 }
875
876#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
877 if (status) {
878 if (ompt_enabled.ompt_callback_masked) {
879 kmp_info_t *this_thr = __kmp_threads[global_tid];
880 kmp_team_t *team = this_thr->th.th_team;
881 ompt_callbacks.ompt_callback(ompt_callback_masked)ompt_callback_masked_callback(
882 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
883 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
884 OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
885 }
886 }
887#endif
888
889 if (__kmp_env_consistency_check) {
890#if KMP_USE_DYNAMIC_LOCK1
891 if (status)
892 __kmp_push_sync(global_tid, ct_masked, loc, NULL__null, 0);
893 else
894 __kmp_check_sync(global_tid, ct_masked, loc, NULL__null, 0);
895#else
896 if (status)
897 __kmp_push_sync(global_tid, ct_masked, loc, NULL__null);
898 else
899 __kmp_check_sync(global_tid, ct_masked, loc, NULL__null);
900#endif
901 }
902
903 return status;
904}
905
906/*!
907@ingroup WORK_SHARING
908@param loc source location information.
909@param global_tid global thread number .
910
911Mark the end of a <tt>masked</tt> region. This should only be called by the
912thread that executes the <tt>masked</tt> region.
913*/
914void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) {
915 KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_end_masked: called T#%d\n"
, global_tid); }
;
916 __kmp_assert_valid_gtid(global_tid);
917 KMP_POP_PARTITIONED_TIMER()((void)0);
918
919#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
920 kmp_info_t *this_thr = __kmp_threads[global_tid];
921 kmp_team_t *team = this_thr->th.th_team;
922 if (ompt_enabled.ompt_callback_masked) {
923 int tid = __kmp_tid_from_gtid(global_tid);
924 ompt_callbacks.ompt_callback(ompt_callback_masked)ompt_callback_masked_callback(
925 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
926 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
927 OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
928 }
929#endif
930
931 if (__kmp_env_consistency_check) {
932 __kmp_pop_sync(global_tid, ct_masked, loc);
933 }
934}
935
936/*!
937@ingroup WORK_SHARING
938@param loc source location information.
939@param gtid global thread number.
940
941Start execution of an <tt>ordered</tt> construct.
942*/
943void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
944 int cid = 0;
945 kmp_info_t *th;
946 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_csupport.cpp", 946); }
;
947
948 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_ordered: called T#%d\n"
, gtid); }
;
949 __kmp_assert_valid_gtid(gtid);
950
951 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
952 __kmp_parallel_initialize();
953
954 __kmp_resume_if_soft_paused();
955
956#if USE_ITT_BUILD1
957 __kmp_itt_ordered_prep(gtid);
958// TODO: ordered_wait_id
959#endif /* USE_ITT_BUILD */
960
961 th = __kmp_threads[gtid];
962
963#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
964 kmp_team_t *team;
965 ompt_wait_id_t lck;
966 void *codeptr_ra;
967 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
968 if (ompt_enabled.enabled) {
969 team = __kmp_team_from_gtid(gtid);
970 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
971 /* OMPT state update */
972 th->th.ompt_thread_info.wait_id = lck;
973 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
974
975 /* OMPT event callback */
976 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
977 if (ompt_enabled.ompt_callback_mutex_acquire) {
978 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
979 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
980 codeptr_ra);
981 }
982 }
983#endif
984
985 if (th->th.th_dispatch->th_deo_fcn != 0)
986 (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
987 else
988 __kmp_parallel_deo(&gtid, &cid, loc);
989
990#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
991 if (ompt_enabled.enabled) {
992 /* OMPT state update */
993 th->th.ompt_thread_info.state = ompt_state_work_parallel;
994 th->th.ompt_thread_info.wait_id = 0;
995
996 /* OMPT event callback */
997 if (ompt_enabled.ompt_callback_mutex_acquired) {
998 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
999 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1000 }
1001 }
1002#endif
1003
1004#if USE_ITT_BUILD1
1005 __kmp_itt_ordered_start(gtid);
1006#endif /* USE_ITT_BUILD */
1007}
1008
1009/*!
1010@ingroup WORK_SHARING
1011@param loc source location information.
1012@param gtid global thread number.
1013
1014End execution of an <tt>ordered</tt> construct.
1015*/
1016void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
1017 int cid = 0;
1018 kmp_info_t *th;
1019
1020 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_end_ordered: called T#%d\n"
, gtid); }
;
1021 __kmp_assert_valid_gtid(gtid);
1022
1023#if USE_ITT_BUILD1
1024 __kmp_itt_ordered_end(gtid);
1025// TODO: ordered_wait_id
1026#endif /* USE_ITT_BUILD */
1027
1028 th = __kmp_threads[gtid];
1029
1030 if (th->th.th_dispatch->th_dxo_fcn != 0)
1031 (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
1032 else
1033 __kmp_parallel_dxo(&gtid, &cid, loc);
1034
1035#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1036 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
1037 if (ompt_enabled.ompt_callback_mutex_released) {
1038 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)ompt_callback_mutex_released_callback(
1039 ompt_mutex_ordered,
1040 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
1041 ->t.t_ordered.dt.t_value,
1042 OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid));
1043 }
1044#endif
1045}
1046
1047#if KMP_USE_DYNAMIC_LOCK1
1048
1049static __forceinline__inline void
1050__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
1051 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
1052 // Pointer to the allocated indirect lock is written to crit, while indexing
1053 // is ignored.
1054 void *idx;
1055 kmp_indirect_lock_t **lck;
1056 lck = (kmp_indirect_lock_t **)crit;
1057 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
1058 KMP_I_LOCK_FUNC(ilk, init)__kmp_indirect_init[((kmp_indirect_lock_t *)(ilk))->type](ilk->lock);
1059 KMP_SET_I_LOCK_LOCATION(ilk, loc){ if (__kmp_indirect_set_location[(ilk)->type] != __null) __kmp_indirect_set_location
[(ilk)->type]((ilk)->lock, loc); }
;
1060 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section){ if (__kmp_indirect_set_flags[(ilk)->type] != __null) __kmp_indirect_set_flags
[(ilk)->type]((ilk)->lock, 1); }
;
1061 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n"
, tag); }
1062 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n"
, tag); }
;
1063#if USE_ITT_BUILD1
1064 __kmp_itt_critical_creating(ilk->lock, loc);
1065#endif
1066 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk)__sync_bool_compare_and_swap((void *volatile *)(lck), (void *
)(nullptr), (void *)(ilk))
;
1067 if (status == 0) {
1068#if USE_ITT_BUILD1
1069 __kmp_itt_critical_destroyed(ilk->lock);
1070#endif
1071 // We don't really need to destroy the unclaimed lock here since it will be
1072 // cleaned up at program exit.
1073 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
1074 }
1075 KMP_DEBUG_ASSERT(*lck != NULL)if (!(*lck != __null)) { __kmp_debug_assert("*lck != __null",
"openmp/runtime/src/kmp_csupport.cpp", 1075); }
;
1076}
1077
1078// Fast-path acquire tas lock
1079#define KMP_ACQUIRE_TAS_LOCK(lock, gtid){ kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; kmp_int32 tas_free
= (locktag_tas); kmp_int32 tas_busy = ((gtid + 1) << 8
| locktag_tas); if ((&l->lk.poll)->load(std::memory_order_relaxed
) != tas_free || !__kmp_atomic_compare_store_acq(&l->lk
.poll, tas_free, tas_busy)) { kmp_uint32 spins; (!__kmp_itt_fsync_prepare_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)(l)); {
(spins) = __kmp_yield_init; }; kmp_backoff_t backoff = __kmp_spin_backoff_params
; do { if ((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
: __kmp_xproc)) { { __kmp_x86_pause(); if (((!0)) &&
(((__kmp_use_yield == 1) || (__kmp_use_yield == 2 &&
(((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
))))))) __kmp_yield(); }; } else { { __kmp_x86_pause(); if ((
(__kmp_use_yield == 1) || (__kmp_use_yield == 2 && ((
(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
)))))) { (spins) -= 2; if (!(spins)) { __kmp_yield(); (spins)
= __kmp_yield_next; } } }; } __kmp_spin_backoff(&backoff
); } while ( (&l->lk.poll)->load(std::memory_order_relaxed
) != tas_free || !__kmp_atomic_compare_store_acq(&l->lk
.poll, tas_free, tas_busy)); } (!__kmp_itt_fsync_acquired_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0((void *)(l));
}
\
1080 { \
1081 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1082 kmp_int32 tas_free = KMP_LOCK_FREE(tas)(locktag_tas); \
1083 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas)((gtid + 1) << 8 | locktag_tas); \
1084 if (KMP_ATOMIC_LD_RLX(&l->lk.poll)(&l->lk.poll)->load(std::memory_order_relaxed) != tas_free || \
1085 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1086 kmp_uint32 spins; \
1087 KMP_FSYNC_PREPARE(l)(!__kmp_itt_fsync_prepare_ptr__3_0) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0
((void *)(l))
; \
1088 KMP_INIT_YIELD(spins){ (spins) = __kmp_yield_init; }; \
1089 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1090 do { \
1091 if (TCR_4(__kmp_nth)(__kmp_nth) > \
1092 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1093 KMP_YIELD(TRUE){ __kmp_x86_pause(); if (((!0)) && (((__kmp_use_yield
== 1) || (__kmp_use_yield == 2 && (((__kmp_nth) >
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))))))) __kmp_yield
(); }
; \
1094 } else { \
1095 KMP_YIELD_SPIN(spins){ __kmp_x86_pause(); if (((__kmp_use_yield == 1) || (__kmp_use_yield
== 2 && (((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
: __kmp_xproc)))))) { (spins) -= 2; if (!(spins)) { __kmp_yield
(); (spins) = __kmp_yield_next; } } }
; \
1096 } \
1097 __kmp_spin_backoff(&backoff); \
1098 } while ( \
1099 KMP_ATOMIC_LD_RLX(&l->lk.poll)(&l->lk.poll)->load(std::memory_order_relaxed) != tas_free || \
1100 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1101 } \
1102 KMP_FSYNC_ACQUIRED(l)(!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)(l))
; \
1103 }
1104
1105// Fast-path test tas lock
1106#define KMP_TEST_TAS_LOCK(lock, gtid, rc){ kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; kmp_int32 tas_free
= (locktag_tas); kmp_int32 tas_busy = ((gtid + 1) << 8
| locktag_tas); rc = (&l->lk.poll)->load(std::memory_order_relaxed
) == tas_free && __kmp_atomic_compare_store_acq(&
l->lk.poll, tas_free, tas_busy); }
\
1107 { \
1108 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1109 kmp_int32 tas_free = KMP_LOCK_FREE(tas)(locktag_tas); \
1110 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas)((gtid + 1) << 8 | locktag_tas); \
1111 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll)(&l->lk.poll)->load(std::memory_order_relaxed) == tas_free && \
1112 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1113 }
1114
1115// Fast-path release tas lock
1116#define KMP_RELEASE_TAS_LOCK(lock, gtid){ (&((kmp_tas_lock_t *)lock)->lk.poll)->store((locktag_tas
), std::memory_order_release); }
\
1117 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas))(&((kmp_tas_lock_t *)lock)->lk.poll)->store((locktag_tas
), std::memory_order_release)
; }
1118
1119#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
1120
1121#include <sys/syscall.h>
1122#include <unistd.h>
1123#ifndef FUTEX_WAIT0
1124#define FUTEX_WAIT0 0
1125#endif
1126#ifndef FUTEX_WAKE1
1127#define FUTEX_WAKE1 1
1128#endif
1129
1130// Fast-path acquire futex lock
1131#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid){ kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; kmp_int32
gtid_code = (gtid + 1) << 1; ; (!__kmp_itt_fsync_prepare_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)(ftx))
; kmp_int32 poll_val; while ((poll_val = __sync_val_compare_and_swap
((volatile kmp_uint32 *)(&(ftx->lk.poll)), (kmp_uint32
)((locktag_futex)), (kmp_uint32)(((gtid_code) << 8 | locktag_futex
)))) != (locktag_futex)) { kmp_int32 cond = ((poll_val) >>
8) & 1; if (!cond) { if (!__sync_val_compare_and_swap((volatile
kmp_uint32 *)(&(ftx->lk.poll)), (kmp_uint32)(poll_val
), (kmp_uint32)(poll_val | ((1) << 8 | locktag_futex)))
) { continue; } poll_val |= ((1) << 8 | locktag_futex);
} kmp_int32 rc; if ((rc = syscall(202, &(ftx->lk.poll
), 0, poll_val, __null, __null, 0)) != 0) { continue; } gtid_code
|= 1; } (!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)(ftx)); }
\
1132 { \
1133 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1134 kmp_int32 gtid_code = (gtid + 1) << 1; \
1135 KMP_MB(); \
1136 KMP_FSYNC_PREPARE(ftx)(!__kmp_itt_fsync_prepare_ptr__3_0) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0
((void *)(ftx))
; \
1137 kmp_int32 poll_val; \
1138 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \__sync_val_compare_and_swap((volatile kmp_uint32 *)(&(ftx
->lk.poll)), (kmp_uint32)((locktag_futex)), (kmp_uint32)((
(gtid_code) << 8 | locktag_futex)))
1139 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \__sync_val_compare_and_swap((volatile kmp_uint32 *)(&(ftx
->lk.poll)), (kmp_uint32)((locktag_futex)), (kmp_uint32)((
(gtid_code) << 8 | locktag_futex)))
1140 KMP_LOCK_BUSY(gtid_code, futex))__sync_val_compare_and_swap((volatile kmp_uint32 *)(&(ftx
->lk.poll)), (kmp_uint32)((locktag_futex)), (kmp_uint32)((
(gtid_code) << 8 | locktag_futex)))
) != KMP_LOCK_FREE(futex)(locktag_futex)) { \
1141 kmp_int32 cond = KMP_LOCK_STRIP(poll_val)((poll_val) >> 8) & 1; \
1142 if (!cond) { \
1143 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \__sync_val_compare_and_swap((volatile kmp_uint32 *)(&(ftx
->lk.poll)), (kmp_uint32)(poll_val), (kmp_uint32)(poll_val
| ((1) << 8 | locktag_futex)))
1144 poll_val | \__sync_val_compare_and_swap((volatile kmp_uint32 *)(&(ftx
->lk.poll)), (kmp_uint32)(poll_val), (kmp_uint32)(poll_val
| ((1) << 8 | locktag_futex)))
1145 KMP_LOCK_BUSY(1, futex))__sync_val_compare_and_swap((volatile kmp_uint32 *)(&(ftx
->lk.poll)), (kmp_uint32)(poll_val), (kmp_uint32)(poll_val
| ((1) << 8 | locktag_futex)))
) { \
1146 continue; \
1147 } \
1148 poll_val |= KMP_LOCK_BUSY(1, futex)((1) << 8 | locktag_futex); \
1149 } \
1150 kmp_int32 rc; \
1151 if ((rc = syscall(__NR_futex202, &(ftx->lk.poll), FUTEX_WAIT0, poll_val, \
1152 NULL__null, NULL__null, 0)) != 0) { \
1153 continue; \
1154 } \
1155 gtid_code |= 1; \
1156 } \
1157 KMP_FSYNC_ACQUIRED(ftx)(!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)(ftx))
; \
1158 }
1159
1160// Fast-path test futex lock
1161#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc){ kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; if (__sync_bool_compare_and_swap
((volatile kmp_uint32 *)(&(ftx->lk.poll)), (kmp_uint32
)((locktag_futex)), (kmp_uint32)(((gtid + 1 << 1) <<
8 | locktag_futex)))) { (!__kmp_itt_fsync_acquired_ptr__3_0)
? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0((void *)(ftx))
; rc = (!0); } else { rc = 0; } }
\
1162 { \
1163 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1164 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(ftx
->lk.poll)), (kmp_uint32)((locktag_futex)), (kmp_uint32)((
(gtid + 1 << 1) << 8 | locktag_futex)))
1165 KMP_LOCK_BUSY(gtid + 1 << 1, futex))__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(ftx
->lk.poll)), (kmp_uint32)((locktag_futex)), (kmp_uint32)((
(gtid + 1 << 1) << 8 | locktag_futex)))
) { \
1166 KMP_FSYNC_ACQUIRED(ftx)(!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)(ftx))
; \
1167 rc = TRUE(!0); \
1168 } else { \
1169 rc = FALSE0; \
1170 } \
1171 }
1172
1173// Fast-path release futex lock
1174#define KMP_RELEASE_FUTEX_LOCK(lock, gtid){ kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; ; (!__kmp_itt_fsync_releasing_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_releasing_ptr__3_0((void *)(ftx
)); kmp_int32 poll_val = __sync_lock_test_and_set((volatile kmp_uint32
*)(&(ftx->lk.poll)), (kmp_uint32)((locktag_futex))); if
(((poll_val) >> 8) & 1) { syscall(202, &(ftx->
lk.poll), 1, ((1) << 8 | locktag_futex), __null, __null
, 0); } ; { __kmp_x86_pause(); if ((((__kmp_use_yield == 1 ||
__kmp_use_yield == 2) && (((__kmp_nth) > (__kmp_avail_proc
? __kmp_avail_proc : __kmp_xproc)))))) __kmp_yield(); }; }
\
1175 { \
1176 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1177 KMP_MB(); \
1178 KMP_FSYNC_RELEASING(ftx)(!__kmp_itt_fsync_releasing_ptr__3_0) ? (void)0 : __kmp_itt_fsync_releasing_ptr__3_0
((void *)(ftx))
; \
1179 kmp_int32 poll_val = \
1180 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex))__sync_lock_test_and_set((volatile kmp_uint32 *)(&(ftx->
lk.poll)), (kmp_uint32)((locktag_futex)))
; \
1181 if (KMP_LOCK_STRIP(poll_val)((poll_val) >> 8) & 1) { \
1182 syscall(__NR_futex202, &(ftx->lk.poll), FUTEX_WAKE1, \
1183 KMP_LOCK_BUSY(1, futex)((1) << 8 | locktag_futex), NULL__null, NULL__null, 0); \
1184 } \
1185 KMP_MB(); \
1186 KMP_YIELD_OVERSUB(){ __kmp_x86_pause(); if ((((__kmp_use_yield == 1 || __kmp_use_yield
== 2) && (((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
: __kmp_xproc)))))) __kmp_yield(); }
; \
1187 }
1188
1189#endif // KMP_USE_FUTEX
1190
1191#else // KMP_USE_DYNAMIC_LOCK
1192
1193static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1194 ident_t const *loc,
1195 kmp_int32 gtid) {
1196 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1197
1198 // Because of the double-check, the following load doesn't need to be volatile
1199 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp)((void *)(*lck_pp));
1200
1201 if (lck == NULL__null) {
1202 void *idx;
1203
1204 // Allocate & initialize the lock.
1205 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1206 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section1);
1207 __kmp_init_user_lock_with_checks(lck);
1208 __kmp_set_user_lock_location(lck, loc);
1209#if USE_ITT_BUILD1
1210 __kmp_itt_critical_creating(lck);
1211// __kmp_itt_critical_creating() should be called *before* the first usage
1212// of underlying lock. It is the only place where we can guarantee it. There
1213// are chances the lock will destroyed with no usage, but it is not a
1214// problem, because this is not real event seen by user but rather setting
1215// name for object (lock). See more details in kmp_itt.h.
1216#endif /* USE_ITT_BUILD */
1217
1218 // Use a cmpxchg instruction to slam the start of the critical section with
1219 // the lock pointer. If another thread beat us to it, deallocate the lock,
1220 // and use the lock that the other thread allocated.
1221 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck)__sync_bool_compare_and_swap((void *volatile *)(lck_pp), (void
*)(0), (void *)(lck))
;
1222
1223 if (status == 0) {
1224// Deallocate the lock and reload the value.
1225#if USE_ITT_BUILD1
1226 __kmp_itt_critical_destroyed(lck);
1227// Let ITT know the lock is destroyed and the same memory location may be reused
1228// for another purpose.
1229#endif /* USE_ITT_BUILD */
1230 __kmp_destroy_user_lock_with_checks(lck);
1231 __kmp_user_lock_free(&idx, gtid, lck);
1232 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp)((void *)(*lck_pp));
1233 KMP_DEBUG_ASSERT(lck != NULL)if (!(lck != __null)) { __kmp_debug_assert("lck != __null", "openmp/runtime/src/kmp_csupport.cpp"
, 1233); }
;
1234 }
1235 }
1236 return lck;
1237}
1238
1239#endif // KMP_USE_DYNAMIC_LOCK
1240
1241/*!
1242@ingroup WORK_SHARING
1243@param loc source location information.
1244@param global_tid global thread number.
1245@param crit identity of the critical section. This could be a pointer to a lock
1246associated with the critical section, or some other suitably unique value.
1247
1248Enter code protected by a `critical` construct.
1249This function blocks until the executing thread can enter the critical section.
1250*/
1251void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1252 kmp_critical_name *crit) {
1253#if KMP_USE_DYNAMIC_LOCK1
1254#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1255 OMPT_STORE_RETURN_ADDRESS(global_tid)OmptReturnAddressGuard ReturnAddressGuard{global_tid, __builtin_return_address
(0)};
;
1256#endif // OMPT_SUPPORT
1257 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1258#else
1259 KMP_COUNT_BLOCK(OMP_CRITICAL)((void)0);
1260#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1261 ompt_state_t prev_state = ompt_state_undefined;
1262 ompt_thread_info_t ti;
1263#endif
1264 kmp_user_lock_p lck;
1265
1266 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_critical: called T#%d\n"
, global_tid); }
;
1267 __kmp_assert_valid_gtid(global_tid);
1268
1269 // TODO: add THR_OVHD_STATE
1270
1271 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait)((void)0);
1272 KMP_CHECK_USER_LOCK_INIT();
1273
1274 if ((__kmp_user_lock_kind == lk_tas) &&
1275 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZEsizeof(void *))) {
1276 lck = (kmp_user_lock_p)crit;
1277 }
1278#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
1279 else if ((__kmp_user_lock_kind == lk_futex) &&
1280 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZEsizeof(void *))) {
1281 lck = (kmp_user_lock_p)crit;
1282 }
1283#endif
1284 else { // ticket, queuing or drdpa
1285 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1286 }
1287
1288 if (__kmp_env_consistency_check)
1289 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1290
1291 // since the critical directive binds to all threads, not just the current
1292 // team we have to check this even if we are in a serialized team.
1293 // also, even if we are the uber thread, we still have to conduct the lock,
1294 // as we have to contend with sibling threads.
1295
1296#if USE_ITT_BUILD1
1297 __kmp_itt_critical_acquiring(lck);
1298#endif /* USE_ITT_BUILD */
1299#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1300 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
1301 void *codeptr_ra = NULL__null;
1302 if (ompt_enabled.enabled) {
1303 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1304 /* OMPT state update */
1305 prev_state = ti.state;
1306 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1307 ti.state = ompt_state_wait_critical;
1308
1309 /* OMPT event callback */
1310 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
1311 if (ompt_enabled.ompt_callback_mutex_acquire) {
1312 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
1313 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1314 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1315 }
1316 }
1317#endif
1318 // Value of 'crit' should be good for using as a critical_id of the critical
1319 // section directive.
1320 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1321
1322#if USE_ITT_BUILD1
1323 __kmp_itt_critical_acquired(lck);
1324#endif /* USE_ITT_BUILD */
1325#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1326 if (ompt_enabled.enabled) {
1327 /* OMPT state update */
1328 ti.state = prev_state;
1329 ti.wait_id = 0;
1330
1331 /* OMPT event callback */
1332 if (ompt_enabled.ompt_callback_mutex_acquired) {
1333 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
1334 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1335 }
1336 }
1337#endif
1338 KMP_POP_PARTITIONED_TIMER()((void)0);
1339
1340 KMP_PUSH_PARTITIONED_TIMER(OMP_critical)((void)0);
1341 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmpc_critical: done T#%d\n"
, global_tid); }
;
1342#endif // KMP_USE_DYNAMIC_LOCK
1343}
1344
1345#if KMP_USE_DYNAMIC_LOCK1
1346
1347// Converts the given hint to an internal lock implementation
1348static __forceinline__inline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1349#if KMP_USE_TSX(0 || 1) && !0
1350#define KMP_TSX_LOCK(seq)lockseq_seq lockseq_##seq
1351#else
1352#define KMP_TSX_LOCK(seq)lockseq_seq __kmp_user_lock_seq
1353#endif
1354
1355#if KMP_ARCH_X860 || KMP_ARCH_X86_641
1356#define KMP_CPUINFO_RTM(__kmp_cpuinfo.flags.rtm) (__kmp_cpuinfo.flags.rtm)
1357#else
1358#define KMP_CPUINFO_RTM(__kmp_cpuinfo.flags.rtm) 0
1359#endif
1360
1361 // Hints that do not require further logic
1362 if (hint & kmp_lock_hint_hle)
1363 return KMP_TSX_LOCK(hle)lockseq_hle;
1364 if (hint & kmp_lock_hint_rtm)
1365 return KMP_CPUINFO_RTM(__kmp_cpuinfo.flags.rtm) ? KMP_TSX_LOCK(rtm_queuing)lockseq_rtm_queuing : __kmp_user_lock_seq;
1366 if (hint & kmp_lock_hint_adaptive)
1367 return KMP_CPUINFO_RTM(__kmp_cpuinfo.flags.rtm) ? KMP_TSX_LOCK(adaptive)lockseq_adaptive : __kmp_user_lock_seq;
1368
1369 // Rule out conflicting hints first by returning the default lock
1370 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1371 return __kmp_user_lock_seq;
1372 if ((hint & omp_lock_hint_speculative) &&
1373 (hint & omp_lock_hint_nonspeculative))
1374 return __kmp_user_lock_seq;
1375
1376 // Do not even consider speculation when it appears to be contended
1377 if (hint & omp_lock_hint_contended)
1378 return lockseq_queuing;
1379
1380 // Uncontended lock without speculation
1381 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1382 return lockseq_tas;
1383
1384 // Use RTM lock for speculation
1385 if (hint & omp_lock_hint_speculative)
1386 return KMP_CPUINFO_RTM(__kmp_cpuinfo.flags.rtm) ? KMP_TSX_LOCK(rtm_spin)lockseq_rtm_spin : __kmp_user_lock_seq;
1387
1388 return __kmp_user_lock_seq;
1389}
1390
1391#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1392#if KMP_USE_DYNAMIC_LOCK1
1393static kmp_mutex_impl_t
1394__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1395 if (user_lock) {
1396 switch (KMP_EXTRACT_D_TAG(user_lock)(*((kmp_dyna_lock_t *)(user_lock)) & ((1 << 8) - 1)
& -(*((kmp_dyna_lock_t *)(user_lock)) & 1))
) {
1397 case 0:
1398 break;
1399#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
1400 case locktag_futex:
1401 return kmp_mutex_impl_queuing;
1402#endif
1403 case locktag_tas:
1404 return kmp_mutex_impl_spin;
1405#if KMP_USE_TSX(0 || 1) && !0
1406 case locktag_hle:
1407 case locktag_rtm_spin:
1408 return kmp_mutex_impl_speculative;
1409#endif
1410 default:
1411 return kmp_mutex_impl_none;
1412 }
1413 ilock = KMP_LOOKUP_I_LOCK(user_lock)((sizeof(int) < sizeof(void *)) ? __kmp_get_i_lock((*(kmp_lock_index_t
*)(user_lock) >> 1)) : *((kmp_indirect_lock_t **)(user_lock
)))
;
1414 }
1415 KMP_ASSERT(ilock)if (!(ilock)) { __kmp_debug_assert("ilock", "openmp/runtime/src/kmp_csupport.cpp"
, 1415); }
;
1416 switch (ilock->type) {
1417#if KMP_USE_TSX(0 || 1) && !0
1418 case locktag_adaptive:
1419 case locktag_rtm_queuing:
1420 return kmp_mutex_impl_speculative;
1421#endif
1422 case locktag_nested_tas:
1423 return kmp_mutex_impl_spin;
1424#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
1425 case locktag_nested_futex:
1426#endif
1427 case locktag_ticket:
1428 case locktag_queuing:
1429 case locktag_drdpa:
1430 case locktag_nested_ticket:
1431 case locktag_nested_queuing:
1432 case locktag_nested_drdpa:
1433 return kmp_mutex_impl_queuing;
1434 default:
1435 return kmp_mutex_impl_none;
1436 }
1437}
1438#else
1439// For locks without dynamic binding
1440static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1441 switch (__kmp_user_lock_kind) {
1442 case lk_tas:
1443 return kmp_mutex_impl_spin;
1444#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
1445 case lk_futex:
1446#endif
1447 case lk_ticket:
1448 case lk_queuing:
1449 case lk_drdpa:
1450 return kmp_mutex_impl_queuing;
1451#if KMP_USE_TSX(0 || 1) && !0
1452 case lk_hle:
1453 case lk_rtm_queuing:
1454 case lk_rtm_spin:
1455 case lk_adaptive:
1456 return kmp_mutex_impl_speculative;
1457#endif
1458 default:
1459 return kmp_mutex_impl_none;
1460 }
1461}
1462#endif // KMP_USE_DYNAMIC_LOCK
1463#endif // OMPT_SUPPORT && OMPT_OPTIONAL
1464
1465/*!
1466@ingroup WORK_SHARING
1467@param loc source location information.
1468@param global_tid global thread number.
1469@param crit identity of the critical section. This could be a pointer to a lock
1470associated with the critical section, or some other suitably unique value.
1471@param hint the lock hint.
1472
1473Enter code protected by a `critical` construct with a hint. The hint value is
1474used to suggest a lock implementation. This function blocks until the executing
1475thread can enter the critical section unless the hint suggests use of
1476speculative execution and the hardware supports it.
1477*/
1478void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1479 kmp_critical_name *crit, uint32_t hint) {
1480 KMP_COUNT_BLOCK(OMP_CRITICAL)((void)0);
1481 kmp_user_lock_p lck;
1482#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1483 ompt_state_t prev_state = ompt_state_undefined;
1484 ompt_thread_info_t ti;
1485 // This is the case, if called from __kmpc_critical:
1486 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid)__ompt_load_return_address(global_tid);
1487 if (!codeptr)
1488 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
1489#endif
1490
1491 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_critical: called T#%d\n"
, global_tid); }
;
1492 __kmp_assert_valid_gtid(global_tid);
1493
1494 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1495 // Check if it is initialized.
1496 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait)((void)0);
1497 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
1498 if (*lk == 0) {
1499 if (KMP_IS_D_LOCK(lockseq)((lockseq) >= lockseq_tas && (lockseq) <= lockseq_rtm_spin
)
) {
1500 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,__sync_bool_compare_and_swap((volatile kmp_uint32 *)((volatile
kmp_int32 *)crit), (kmp_uint32)(0), (kmp_uint32)(((lockseq) <<
1 | 1)))
1501 KMP_GET_D_TAG(lockseq))__sync_bool_compare_and_swap((volatile kmp_uint32 *)((volatile
kmp_int32 *)crit), (kmp_uint32)(0), (kmp_uint32)(((lockseq) <<
1 | 1)))
;
1502 } else {
1503 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq)(kmp_indirect_locktag_t)((lockseq)-lockseq_ticket));
1504 }
1505 }
1506 // Branch for accessing the actual lock object and set operation. This
1507 // branching is inevitable since this lock initialization does not follow the
1508 // normal dispatch path (lock table is not used).
1509 if (KMP_EXTRACT_D_TAG(lk)(*((kmp_dyna_lock_t *)(lk)) & ((1 << 8) - 1) & -
(*((kmp_dyna_lock_t *)(lk)) & 1))
!= 0) {
1510 lck = (kmp_user_lock_p)lk;
1511 if (__kmp_env_consistency_check) {
1512 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1513 __kmp_map_hint_to_lock(hint));
1514 }
1515#if USE_ITT_BUILD1
1516 __kmp_itt_critical_acquiring(lck);
1517#endif
1518#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1519 if (ompt_enabled.enabled) {
1520 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1521 /* OMPT state update */
1522 prev_state = ti.state;
1523 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1524 ti.state = ompt_state_wait_critical;
1525
1526 /* OMPT event callback */
1527 if (ompt_enabled.ompt_callback_mutex_acquire) {
1528 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
1529 ompt_mutex_critical, (unsigned int)hint,
1530 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1531 codeptr);
1532 }
1533 }
1534#endif
1535#if KMP_USE_INLINED_TAS(1 && (0 || 1 || KMP_ARCH_ARM)) && 1
1536 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
1537 KMP_ACQUIRE_TAS_LOCK(lck, global_tid){ kmp_tas_lock_t *l = (kmp_tas_lock_t *)lck; kmp_int32 tas_free
= (locktag_tas); kmp_int32 tas_busy = ((global_tid + 1) <<
8 | locktag_tas); if ((&l->lk.poll)->load(std::memory_order_relaxed
) != tas_free || !__kmp_atomic_compare_store_acq(&l->lk
.poll, tas_free, tas_busy)) { kmp_uint32 spins; (!__kmp_itt_fsync_prepare_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)(l)); {
(spins) = __kmp_yield_init; }; kmp_backoff_t backoff = __kmp_spin_backoff_params
; do { if ((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
: __kmp_xproc)) { { __kmp_x86_pause(); if (((!0)) &&
(((__kmp_use_yield == 1) || (__kmp_use_yield == 2 &&
(((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
))))))) __kmp_yield(); }; } else { { __kmp_x86_pause(); if ((
(__kmp_use_yield == 1) || (__kmp_use_yield == 2 && ((
(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
)))))) { (spins) -= 2; if (!(spins)) { __kmp_yield(); (spins)
= __kmp_yield_next; } } }; } __kmp_spin_backoff(&backoff
); } while ( (&l->lk.poll)->load(std::memory_order_relaxed
) != tas_free || !__kmp_atomic_compare_store_acq(&l->lk
.poll, tas_free, tas_busy)); } (!__kmp_itt_fsync_acquired_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0((void *)(l));
}
;
1538 } else
1539#elif KMP_USE_INLINED_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0)) && 0
1540 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
1541 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid){ kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lck; kmp_int32 gtid_code
= (global_tid + 1) << 1; ; (!__kmp_itt_fsync_prepare_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)(ftx))
; kmp_int32 poll_val; while ((poll_val = __sync_val_compare_and_swap
((volatile kmp_uint32 *)(&(ftx->lk.poll)), (kmp_uint32
)((locktag_futex)), (kmp_uint32)(((gtid_code) << 8 | locktag_futex
)))) != (locktag_futex)) { kmp_int32 cond = ((poll_val) >>
8) & 1; if (!cond) { if (!__sync_val_compare_and_swap((volatile
kmp_uint32 *)(&(ftx->lk.poll)), (kmp_uint32)(poll_val
), (kmp_uint32)(poll_val | ((1) << 8 | locktag_futex)))
) { continue; } poll_val |= ((1) << 8 | locktag_futex);
} kmp_int32 rc; if ((rc = syscall(202, &(ftx->lk.poll
), 0, poll_val, __null, __null, 0)) != 0) { continue; } gtid_code
|= 1; } (!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)(ftx)); }
;
1542 } else
1543#endif
1544 {
1545 KMP_D_LOCK_FUNC(lk, set)__kmp_direct_set[(*((kmp_dyna_lock_t *)(lk)) & ((1 <<
8) - 1) & -(*((kmp_dyna_lock_t *)(lk)) & 1))]
(lk, global_tid);
1546 }
1547 } else {
1548 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1549 lck = ilk->lock;
1550 if (__kmp_env_consistency_check) {
1551 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1552 __kmp_map_hint_to_lock(hint));
1553 }
1554#if USE_ITT_BUILD1
1555 __kmp_itt_critical_acquiring(lck);
1556#endif
1557#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1558 if (ompt_enabled.enabled) {
1559 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1560 /* OMPT state update */
1561 prev_state = ti.state;
1562 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1563 ti.state = ompt_state_wait_critical;
1564
1565 /* OMPT event callback */
1566 if (ompt_enabled.ompt_callback_mutex_acquire) {
1567 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
1568 ompt_mutex_critical, (unsigned int)hint,
1569 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1570 codeptr);
1571 }
1572 }
1573#endif
1574 KMP_I_LOCK_FUNC(ilk, set)__kmp_indirect_set[((kmp_indirect_lock_t *)(ilk))->type](lck, global_tid);
1575 }
1576 KMP_POP_PARTITIONED_TIMER()((void)0);
1577
1578#if USE_ITT_BUILD1
1579 __kmp_itt_critical_acquired(lck);
1580#endif /* USE_ITT_BUILD */
1581#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1582 if (ompt_enabled.enabled) {
1583 /* OMPT state update */
1584 ti.state = prev_state;
1585 ti.wait_id = 0;
1586
1587 /* OMPT event callback */
1588 if (ompt_enabled.ompt_callback_mutex_acquired) {
1589 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
1590 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1591 }
1592 }
1593#endif
1594
1595 KMP_PUSH_PARTITIONED_TIMER(OMP_critical)((void)0);
1596 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmpc_critical: done T#%d\n"
, global_tid); }
;
1597} // __kmpc_critical_with_hint
1598
1599#endif // KMP_USE_DYNAMIC_LOCK
1600
1601/*!
1602@ingroup WORK_SHARING
1603@param loc source location information.
1604@param global_tid global thread number .
1605@param crit identity of the critical section. This could be a pointer to a lock
1606associated with the critical section, or some other suitably unique value.
1607
1608Leave a critical section, releasing any lock that was held during its execution.
1609*/
1610void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1611 kmp_critical_name *crit) {
1612 kmp_user_lock_p lck;
1613
1614 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_end_critical: called T#%d\n"
, global_tid); }
;
1615
1616#if KMP_USE_DYNAMIC_LOCK1
1617 int locktag = KMP_EXTRACT_D_TAG(crit)(*((kmp_dyna_lock_t *)(crit)) & ((1 << 8) - 1) &
-(*((kmp_dyna_lock_t *)(crit)) & 1))
;
1618 if (locktag) {
1619 lck = (kmp_user_lock_p)crit;
1620 KMP_ASSERT(lck != NULL)if (!(lck != __null)) { __kmp_debug_assert("lck != NULL", "openmp/runtime/src/kmp_csupport.cpp"
, 1620); }
;
1621 if (__kmp_env_consistency_check) {
1622 __kmp_pop_sync(global_tid, ct_critical, loc);
1623 }
1624#if USE_ITT_BUILD1
1625 __kmp_itt_critical_releasing(lck);
1626#endif
1627#if KMP_USE_INLINED_TAS(1 && (0 || 1 || KMP_ARCH_ARM)) && 1
1628 if (locktag == locktag_tas && !__kmp_env_consistency_check) {
1629 KMP_RELEASE_TAS_LOCK(lck, global_tid){ (&((kmp_tas_lock_t *)lck)->lk.poll)->store((locktag_tas
), std::memory_order_release); }
;
1630 } else
1631#elif KMP_USE_INLINED_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0)) && 0
1632 if (locktag == locktag_futex && !__kmp_env_consistency_check) {
1633 KMP_RELEASE_FUTEX_LOCK(lck, global_tid){ kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lck; ; (!__kmp_itt_fsync_releasing_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_releasing_ptr__3_0((void *)(ftx
)); kmp_int32 poll_val = __sync_lock_test_and_set((volatile kmp_uint32
*)(&(ftx->lk.poll)), (kmp_uint32)((locktag_futex))); if
(((poll_val) >> 8) & 1) { syscall(202, &(ftx->
lk.poll), 1, ((1) << 8 | locktag_futex), __null, __null
, 0); } ; { __kmp_x86_pause(); if ((((__kmp_use_yield == 1 ||
__kmp_use_yield == 2) && (((__kmp_nth) > (__kmp_avail_proc
? __kmp_avail_proc : __kmp_xproc)))))) __kmp_yield(); }; }
;
1634 } else
1635#endif
1636 {
1637 KMP_D_LOCK_FUNC(lck, unset)__kmp_direct_unset[(*((kmp_dyna_lock_t *)(lck)) & ((1 <<
8) - 1) & -(*((kmp_dyna_lock_t *)(lck)) & 1))]
((kmp_dyna_lock_t *)lck, global_tid);
1638 }
1639 } else {
1640 kmp_indirect_lock_t *ilk =
1641 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit))((void *)(*((kmp_indirect_lock_t **)crit)));
1642 KMP_ASSERT(ilk != NULL)if (!(ilk != __null)) { __kmp_debug_assert("ilk != NULL", "openmp/runtime/src/kmp_csupport.cpp"
, 1642); }
;
1643 lck = ilk->lock;
1644 if (__kmp_env_consistency_check) {
1645 __kmp_pop_sync(global_tid, ct_critical, loc);
1646 }
1647#if USE_ITT_BUILD1
1648 __kmp_itt_critical_releasing(lck);
1649#endif
1650 KMP_I_LOCK_FUNC(ilk, unset)__kmp_indirect_unset[((kmp_indirect_lock_t *)(ilk))->type](lck, global_tid);
1651 }
1652
1653#else // KMP_USE_DYNAMIC_LOCK
1654
1655 if ((__kmp_user_lock_kind == lk_tas) &&
1656 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZEsizeof(void *))) {
1657 lck = (kmp_user_lock_p)crit;
1658 }
1659#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
1660 else if ((__kmp_user_lock_kind == lk_futex) &&
1661 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZEsizeof(void *))) {
1662 lck = (kmp_user_lock_p)crit;
1663 }
1664#endif
1665 else { // ticket, queuing or drdpa
1666 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit))((void *)(*((kmp_user_lock_p *)crit)));
1667 }
1668
1669 KMP_ASSERT(lck != NULL)if (!(lck != __null)) { __kmp_debug_assert("lck != NULL", "openmp/runtime/src/kmp_csupport.cpp"
, 1669); }
;
1670
1671 if (__kmp_env_consistency_check)
1672 __kmp_pop_sync(global_tid, ct_critical, loc);
1673
1674#if USE_ITT_BUILD1
1675 __kmp_itt_critical_releasing(lck);
1676#endif /* USE_ITT_BUILD */
1677 // Value of 'crit' should be good for using as a critical_id of the critical
1678 // section directive.
1679 __kmp_release_user_lock_with_checks(lck, global_tid);
1680
1681#endif // KMP_USE_DYNAMIC_LOCK
1682
1683#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1684 /* OMPT release event triggers after lock is released; place here to trigger
1685 * for all #if branches */
1686 OMPT_STORE_RETURN_ADDRESS(global_tid)OmptReturnAddressGuard ReturnAddressGuard{global_tid, __builtin_return_address
(0)};
;
1687 if (ompt_enabled.ompt_callback_mutex_released) {
1688 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)ompt_callback_mutex_released_callback(
1689 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1690 OMPT_LOAD_RETURN_ADDRESS(0)__ompt_load_return_address(0));
1691 }
1692#endif
1693
1694 KMP_POP_PARTITIONED_TIMER()((void)0);
1695 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmpc_end_critical: done T#%d\n"
, global_tid); }
;
1696}
1697
1698/*!
1699@ingroup SYNCHRONIZATION
1700@param loc source location information
1701@param global_tid thread id.
1702@return one if the thread should execute the master block, zero otherwise
1703
1704Start execution of a combined barrier and master. The barrier is executed inside
1705this function.
1706*/
1707kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1708 int status;
1709 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_barrier_master: called T#%d\n"
, global_tid); }
;
1710 __kmp_assert_valid_gtid(global_tid);
1711
1712 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
1713 __kmp_parallel_initialize();
1714
1715 __kmp_resume_if_soft_paused();
1716
1717 if (__kmp_env_consistency_check)
1718 __kmp_check_barrier(global_tid, ct_barrier, loc);
1719
1720#if OMPT_SUPPORT1
1721 ompt_frame_t *ompt_frame;
1722 if (ompt_enabled.enabled) {
1723 __ompt_get_task_info_internal(0, NULL__null, NULL__null, &ompt_frame, NULL__null, NULL__null);
1724 if (ompt_frame->enter_frame.ptr == NULL__null)
1725 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
1726 }
1727 OMPT_STORE_RETURN_ADDRESS(global_tid)OmptReturnAddressGuard ReturnAddressGuard{global_tid, __builtin_return_address
(0)};
;
1728#endif
1729#if USE_ITT_NOTIFY1
1730 __kmp_threads[global_tid]->th.th_ident = loc;
1731#endif
1732 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE(!0), 0, NULL__null, NULL__null);
1733#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1734 if (ompt_enabled.enabled) {
1735 ompt_frame->enter_frame = ompt_data_none{0};
1736 }
1737#endif
1738
1739 return (status != 0) ? 0 : 1;
1740}
1741
1742/*!
1743@ingroup SYNCHRONIZATION
1744@param loc source location information
1745@param global_tid thread id.
1746
1747Complete the execution of a combined barrier and master. This function should
1748only be called at the completion of the <tt>master</tt> code. Other threads will
1749still be waiting at the barrier and this call releases them.
1750*/
1751void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1752 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_end_barrier_master: called T#%d\n"
, global_tid); }
;
1753 __kmp_assert_valid_gtid(global_tid);
1754 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1755}
1756
1757/*!
1758@ingroup SYNCHRONIZATION
1759@param loc source location information
1760@param global_tid thread id.
1761@return one if the thread should execute the master block, zero otherwise
1762
1763Start execution of a combined barrier and master(nowait) construct.
1764The barrier is executed inside this function.
1765There is no equivalent "end" function, since the
1766*/
1767kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1768 kmp_int32 ret;
1769 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_barrier_master_nowait: called T#%d\n"
, global_tid); }
;
1770 __kmp_assert_valid_gtid(global_tid);
1771
1772 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
1773 __kmp_parallel_initialize();
1774
1775 __kmp_resume_if_soft_paused();
1776
1777 if (__kmp_env_consistency_check) {
1778 if (loc == 0) {
1779 KMP_WARNING(ConstructIdentInvalid)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_ConstructIdentInvalid
), __kmp_msg_null)
; // ??? What does it mean for the user?
1780 }
1781 __kmp_check_barrier(global_tid, ct_barrier, loc);
1782 }
1783
1784#if OMPT_SUPPORT1
1785 ompt_frame_t *ompt_frame;
1786 if (ompt_enabled.enabled) {
1787 __ompt_get_task_info_internal(0, NULL__null, NULL__null, &ompt_frame, NULL__null, NULL__null);
1788 if (ompt_frame->enter_frame.ptr == NULL__null)
1789 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
1790 }
1791 OMPT_STORE_RETURN_ADDRESS(global_tid)OmptReturnAddressGuard ReturnAddressGuard{global_tid, __builtin_return_address
(0)};
;
1792#endif
1793#if USE_ITT_NOTIFY1
1794 __kmp_threads[global_tid]->th.th_ident = loc;
1795#endif
1796 __kmp_barrier(bs_plain_barrier, global_tid, FALSE0, 0, NULL__null, NULL__null);
1797#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1798 if (ompt_enabled.enabled) {
1799 ompt_frame->enter_frame = ompt_data_none{0};
1800 }
1801#endif
1802
1803 ret = __kmpc_master(loc, global_tid);
1804
1805 if (__kmp_env_consistency_check) {
1806 /* there's no __kmpc_end_master called; so the (stats) */
1807 /* actions of __kmpc_end_master are done here */
1808 if (ret) {
1809 /* only one thread should do the pop since only */
1810 /* one did the push (see __kmpc_master()) */
1811 __kmp_pop_sync(global_tid, ct_master, loc);
1812 }
1813 }
1814
1815 return (ret);
1816}
1817
1818/* The BARRIER for a SINGLE process section is always explicit */
1819/*!
1820@ingroup WORK_SHARING
1821@param loc source location information
1822@param global_tid global thread number
1823@return One if this thread should execute the single construct, zero otherwise.
1824
1825Test whether to execute a <tt>single</tt> construct.
1826There are no implicit barriers in the two "single" calls, rather the compiler
1827should introduce an explicit barrier if it is required.
1828*/
1829
1830kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1831 __kmp_assert_valid_gtid(global_tid);
1832 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE(!0));
1833
1834 if (rc) {
1835 // We are going to execute the single statement, so we should count it.
1836 KMP_COUNT_BLOCK(OMP_SINGLE)((void)0);
1837 KMP_PUSH_PARTITIONED_TIMER(OMP_single)((void)0);
1838 }
1839
1840#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1841 kmp_info_t *this_thr = __kmp_threads[global_tid];
1842 kmp_team_t *team = this_thr->th.th_team;
1843 int tid = __kmp_tid_from_gtid(global_tid);
1844
1845 if (ompt_enabled.enabled) {
1846 if (rc) {
1847 if (ompt_enabled.ompt_callback_work) {
1848 ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback(
1849 ompt_work_single_executor, ompt_scope_begin,
1850 &(team->t.ompt_team_info.parallel_data),
1851 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1852 1, OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
1853 }
1854 } else {
1855 if (ompt_enabled.ompt_callback_work) {
1856 ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback(
1857 ompt_work_single_other, ompt_scope_begin,
1858 &(team->t.ompt_team_info.parallel_data),
1859 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1860 1, OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
1861 ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback(
1862 ompt_work_single_other, ompt_scope_end,
1863 &(team->t.ompt_team_info.parallel_data),
1864 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1865 1, OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
1866 }
1867 }
1868 }
1869#endif
1870
1871 return rc;
1872}
1873
1874/*!
1875@ingroup WORK_SHARING
1876@param loc source location information
1877@param global_tid global thread number
1878
1879Mark the end of a <tt>single</tt> construct. This function should
1880only be called by the thread that executed the block of code protected
1881by the `single` construct.
1882*/
1883void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1884 __kmp_assert_valid_gtid(global_tid);
1885 __kmp_exit_single(global_tid);
1886 KMP_POP_PARTITIONED_TIMER()((void)0);
1887
1888#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1889 kmp_info_t *this_thr = __kmp_threads[global_tid];
1890 kmp_team_t *team = this_thr->th.th_team;
1891 int tid = __kmp_tid_from_gtid(global_tid);
1892
1893 if (ompt_enabled.ompt_callback_work) {
1894 ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback(
1895 ompt_work_single_executor, ompt_scope_end,
1896 &(team->t.ompt_team_info.parallel_data),
1897 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1898 OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
1899 }
1900#endif
1901}
1902
1903/*!
1904@ingroup WORK_SHARING
1905@param loc Source location
1906@param global_tid Global thread id
1907
1908Mark the end of a statically scheduled loop.
1909*/
1910void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1911 KMP_POP_PARTITIONED_TIMER()((void)0);
1912 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_for_static_fini called T#%d\n"
, global_tid); }
;
1913
1914#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1915 if (ompt_enabled.ompt_callback_work) {
1916 ompt_work_t ompt_work_type = ompt_work_loop;
1917 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL__null);
1918 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1919 // Determine workshare type
1920 if (loc != NULL__null) {
1921 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1922 ompt_work_type = ompt_work_loop;
1923 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1924 ompt_work_type = ompt_work_sections;
1925 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1926 ompt_work_type = ompt_work_distribute;
1927 } else {
1928 // use default set above.
1929 // a warning about this case is provided in __kmpc_for_static_init
1930 }
1931 KMP_DEBUG_ASSERT(ompt_work_type)if (!(ompt_work_type)) { __kmp_debug_assert("ompt_work_type",
"openmp/runtime/src/kmp_csupport.cpp", 1931); }
;
1932 }
1933 ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback(
1934 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1935 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
1936 }
1937#endif
1938 if (__kmp_env_consistency_check)
1939 __kmp_pop_workshare(global_tid, ct_pdo, loc);
1940}
1941
1942// User routines which take C-style arguments (call by value)
1943// different from the Fortran equivalent routines
1944
1945void ompc_set_num_threads(int arg) {
1946 // !!!!! TODO: check the per-task binding
1947 __kmp_set_num_threads(arg, __kmp_entry_gtid()__kmp_get_global_thread_id_reg());
1948}
1949
1950void ompc_set_dynamic(int flag) {
1951 kmp_info_t *thread;
1952
1953 /* For the thread-private implementation of the internal controls */
1954 thread = __kmp_entry_thread();
1955
1956 __kmp_save_internal_controls(thread);
1957
1958 set__dynamic(thread, flag ? true : false)(((thread)->th.th_current_task->td_icvs.dynamic) = (flag
? true : false))
;
1959}
1960
1961void ompc_set_nested(int flag) {
1962 kmp_info_t *thread;
1963
1964 /* For the thread-private internal controls implementation */
1965 thread = __kmp_entry_thread();
1966
1967 __kmp_save_internal_controls(thread);
1968
1969 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1)(((thread)->th.th_current_task->td_icvs.max_active_levels
) = (flag ? __kmp_dflt_max_active_levels : 1))
;
1970}
1971
1972void ompc_set_max_active_levels(int max_active_levels) {
1973 /* TO DO */
1974 /* we want per-task implementation of this internal control */
1975
1976 /* For the per-thread internal controls implementation */
1977 __kmp_set_max_active_levels(__kmp_entry_gtid()__kmp_get_global_thread_id_reg(), max_active_levels);
1978}
1979
1980void ompc_set_schedule(omp_sched_t kind, int modifier) {
1981 // !!!!! TODO: check the per-task binding
1982 __kmp_set_schedule(__kmp_entry_gtid()__kmp_get_global_thread_id_reg(), (kmp_sched_t)kind, modifier);
1983}
1984
1985int ompc_get_ancestor_thread_num(int level) {
1986 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid()__kmp_get_global_thread_id_reg(), level);
1987}
1988
1989int ompc_get_team_size(int level) {
1990 return __kmp_get_team_size(__kmp_entry_gtid()__kmp_get_global_thread_id_reg(), level);
1991}
1992
1993/* OpenMP 5.0 Affinity Format API */
1994void KMP_EXPAND_NAME(ompc_set_affinity_format)__kmp_api_ompc_set_affinity_format(char const *format) {
1995 if (!__kmp_init_serial) {
1996 __kmp_serial_initialize();
1997 }
1998 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1999 format, KMP_STRLENstrlen(format) + 1);
2000}
2001
2002size_t KMP_EXPAND_NAME(ompc_get_affinity_format)__kmp_api_ompc_get_affinity_format(char *buffer, size_t size) {
2003 size_t format_size;
2004 if (!__kmp_init_serial) {
2005 __kmp_serial_initialize();
2006 }
2007 format_size = KMP_STRLENstrlen(__kmp_affinity_format);
2008 if (buffer && size) {
2009 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
2010 format_size + 1);
2011 }
2012 return format_size;
2013}
2014
2015void KMP_EXPAND_NAME(ompc_display_affinity)__kmp_api_ompc_display_affinity(char const *format) {
2016 int gtid;
2017 if (!TCR_4(__kmp_init_middle)(__kmp_init_middle)) {
2018 __kmp_middle_initialize();
2019 }
2020 __kmp_assign_root_init_mask();
2021 gtid = __kmp_get_gtid()__kmp_get_global_thread_id();
2022#if KMP_AFFINITY_SUPPORTED1
2023 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2024 __kmp_affinity.flags.reset) {
2025 __kmp_reset_root_init_mask(gtid);
2026 }
2027#endif
2028 __kmp_aux_display_affinity(gtid, format);
2029}
2030
2031size_t KMP_EXPAND_NAME(ompc_capture_affinity)__kmp_api_ompc_capture_affinity(char *buffer, size_t buf_size,
2032 char const *format) {
2033 int gtid;
2034 size_t num_required;
2035 kmp_str_buf_t capture_buf;
2036 if (!TCR_4(__kmp_init_middle)(__kmp_init_middle)) {
2037 __kmp_middle_initialize();
2038 }
2039 __kmp_assign_root_init_mask();
2040 gtid = __kmp_get_gtid()__kmp_get_global_thread_id();
2041#if KMP_AFFINITY_SUPPORTED1
2042 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2043 __kmp_affinity.flags.reset) {
2044 __kmp_reset_root_init_mask(gtid);
2045 }
2046#endif
2047 __kmp_str_buf_init(&capture_buf){ (&capture_buf)->str = (&capture_buf)->bulk; (
&capture_buf)->size = sizeof((&capture_buf)->bulk
); (&capture_buf)->used = 0; (&capture_buf)->bulk
[0] = 0; }
;
2048 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
2049 if (buffer && buf_size) {
2050 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
2051 capture_buf.used + 1);
2052 }
2053 __kmp_str_buf_free(&capture_buf);
2054 return num_required;
2055}
2056
2057void kmpc_set_stacksize(int arg) {
2058 // __kmp_aux_set_stacksize initializes the library if needed
2059 __kmp_aux_set_stacksize(arg);
2060}
2061
2062void kmpc_set_stacksize_s(size_t arg) {
2063 // __kmp_aux_set_stacksize initializes the library if needed
2064 __kmp_aux_set_stacksize(arg);
2065}
2066
2067void kmpc_set_blocktime(int arg) {
2068 int gtid, tid;
2069 kmp_info_t *thread;
2070
2071 gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
2072 tid = __kmp_tid_from_gtid(gtid);
2073 thread = __kmp_thread_from_gtid(gtid);
2074
2075 __kmp_aux_set_blocktime(arg, thread, tid);
2076}
2077
2078void kmpc_set_library(int arg) {
2079 // __kmp_user_set_library initializes the library if needed
2080 __kmp_user_set_library((enum library_type)arg);
2081}
2082
2083void kmpc_set_defaults(char const *str) {
2084 // __kmp_aux_set_defaults initializes the library if needed
2085 __kmp_aux_set_defaults(str, KMP_STRLENstrlen(str));
2086}
2087
2088void kmpc_set_disp_num_buffers(int arg) {
2089 // ignore after initialization because some teams have already
2090 // allocated dispatch buffers
2091 if (__kmp_init_serial == FALSE0 && arg >= KMP_MIN_DISP_NUM_BUFF1 &&
2092 arg <= KMP_MAX_DISP_NUM_BUFF4096) {
2093 __kmp_dispatch_num_buffers = arg;
2094 }
2095}
2096
2097int kmpc_set_affinity_mask_proc(int proc, void **mask) {
2098#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED1
2099 return -1;
2100#else
2101 if (!TCR_4(__kmp_init_middle)(__kmp_init_middle)) {
2102 __kmp_middle_initialize();
2103 }
2104 __kmp_assign_root_init_mask();
2105 return __kmp_aux_set_affinity_mask_proc(proc, mask);
2106#endif
2107}
2108
2109int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
2110#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED1
2111 return -1;
2112#else
2113 if (!TCR_4(__kmp_init_middle)(__kmp_init_middle)) {
2114 __kmp_middle_initialize();
2115 }
2116 __kmp_assign_root_init_mask();
2117 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2118#endif
2119}
2120
2121int kmpc_get_affinity_mask_proc(int proc, void **mask) {
2122#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED1
2123 return -1;
2124#else
2125 if (!TCR_4(__kmp_init_middle)(__kmp_init_middle)) {
2126 __kmp_middle_initialize();
2127 }
2128 __kmp_assign_root_init_mask();
2129 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2130#endif
2131}
2132
2133/* -------------------------------------------------------------------------- */
2134/*!
2135@ingroup THREADPRIVATE
2136@param loc source location information
2137@param gtid global thread number
2138@param cpy_size size of the cpy_data buffer
2139@param cpy_data pointer to data to be copied
2140@param cpy_func helper function to call for copying data
2141@param didit flag variable: 1=single thread; 0=not single thread
2142
2143__kmpc_copyprivate implements the interface for the private data broadcast
2144needed for the copyprivate clause associated with a single region in an
2145OpenMP<sup>*</sup> program (both C and Fortran).
2146All threads participating in the parallel region call this routine.
2147One of the threads (called the single thread) should have the <tt>didit</tt>
2148variable set to 1 and all other threads should have that variable set to 0.
2149All threads pass a pointer to a data buffer (cpy_data) that they have built.
2150
2151The OpenMP specification forbids the use of nowait on the single region when a
2152copyprivate clause is present. However, @ref __kmpc_copyprivate implements a
2153barrier internally to avoid race conditions, so the code generation for the
2154single region should avoid generating a barrier after the call to @ref
2155__kmpc_copyprivate.
2156
2157The <tt>gtid</tt> parameter is the global thread id for the current thread.
2158The <tt>loc</tt> parameter is a pointer to source location information.
2159
2160Internal implementation: The single thread will first copy its descriptor
2161address (cpy_data) to a team-private location, then the other threads will each
2162call the function pointed to by the parameter cpy_func, which carries out the
2163copy by copying the data using the cpy_data buffer.
2164
2165The cpy_func routine used for the copy and the contents of the data area defined
2166by cpy_data and cpy_size may be built in any fashion that will allow the copy
2167to be done. For instance, the cpy_data buffer can hold the actual data to be
2168copied or it may hold a list of pointers to the data. The cpy_func routine must
2169interpret the cpy_data buffer appropriately.
2170
2171The interface to cpy_func is as follows:
2172@code
2173void cpy_func( void *destination, void *source )
2174@endcode
2175where void *destination is the cpy_data pointer for the thread being copied to
2176and void *source is the cpy_data pointer for the thread being copied from.
2177*/
2178void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2179 void *cpy_data, void (*cpy_func)(void *, void *),
2180 kmp_int32 didit) {
2181 void **data_ptr;
2182 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_copyprivate: called T#%d\n"
, gtid); }
;
2183 __kmp_assert_valid_gtid(gtid);
2184
2185 KMP_MB();
2186
2187 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2188
2189 if (__kmp_env_consistency_check) {
2190 if (loc == 0) {
2191 KMP_WARNING(ConstructIdentInvalid)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_ConstructIdentInvalid
), __kmp_msg_null)
;
2192 }
2193 }
2194
2195 // ToDo: Optimize the following two barriers into some kind of split barrier
2196
2197 if (didit)
2198 *data_ptr = cpy_data;
2199
2200#if OMPT_SUPPORT1
2201 ompt_frame_t *ompt_frame;
2202 if (ompt_enabled.enabled) {
2203 __ompt_get_task_info_internal(0, NULL__null, NULL__null, &ompt_frame, NULL__null, NULL__null);
2204 if (ompt_frame->enter_frame.ptr == NULL__null)
2205 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
2206 }
2207 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
2208#endif
2209/* This barrier is not a barrier region boundary */
2210#if USE_ITT_NOTIFY1
2211 __kmp_threads[gtid]->th.th_ident = loc;
2212#endif
2213 __kmp_barrier(bs_plain_barrier, gtid, FALSE0, 0, NULL__null, NULL__null);
2214
2215 if (!didit)
2216 (*cpy_func)(cpy_data, *data_ptr);
2217
2218 // Consider next barrier a user-visible barrier for barrier region boundaries
2219 // Nesting checks are already handled by the single construct checks
2220 {
2221#if OMPT_SUPPORT1
2222 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
2223#endif
2224#if USE_ITT_NOTIFY1
2225 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2226// tasks can overwrite the location)
2227#endif
2228 __kmp_barrier(bs_plain_barrier, gtid, FALSE0, 0, NULL__null, NULL__null);
2229#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2230 if (ompt_enabled.enabled) {
2231 ompt_frame->enter_frame = ompt_data_none{0};
2232 }
2233#endif
2234 }
2235}
2236
2237/* --------------------------------------------------------------------------*/
2238/*!
2239@ingroup THREADPRIVATE
2240@param loc source location information
2241@param gtid global thread number
2242@param cpy_data pointer to the data to be saved/copied or 0
2243@return the saved pointer to the data
2244
2245__kmpc_copyprivate_light is a lighter version of __kmpc_copyprivate:
2246__kmpc_copyprivate_light only saves the pointer it's given (if it's not 0, so
2247coming from single), and returns that pointer in all calls (for single thread
2248it's not needed). This version doesn't do any actual data copying. Data copying
2249has to be done somewhere else, e.g. inline in the generated code. Due to this,
2250this function doesn't have any barrier at the end of the function, like
2251__kmpc_copyprivate does, so generated code needs barrier after copying of all
2252data was done.
2253*/
2254void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) {
2255 void **data_ptr;
2256
2257 KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_copyprivate_light: called T#%d\n"
, gtid); }
;
2258
2259 KMP_MB();
2260
2261 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2262
2263 if (__kmp_env_consistency_check) {
2264 if (loc == 0) {
2265 KMP_WARNING(ConstructIdentInvalid)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_ConstructIdentInvalid
), __kmp_msg_null)
;
2266 }
2267 }
2268
2269 // ToDo: Optimize the following barrier
2270
2271 if (cpy_data)
2272 *data_ptr = cpy_data;
2273
2274#if OMPT_SUPPORT1
2275 ompt_frame_t *ompt_frame;
2276 if (ompt_enabled.enabled) {
2277 __ompt_get_task_info_internal(0, NULL__null, NULL__null, &ompt_frame, NULL__null, NULL__null);
2278 if (ompt_frame->enter_frame.ptr == NULL__null)
2279 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
2280 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
2281 }
2282#endif
2283/* This barrier is not a barrier region boundary */
2284#if USE_ITT_NOTIFY1
2285 __kmp_threads[gtid]->th.th_ident = loc;
2286#endif
2287 __kmp_barrier(bs_plain_barrier, gtid, FALSE0, 0, NULL__null, NULL__null);
2288
2289 return *data_ptr;
2290}
2291
2292/* -------------------------------------------------------------------------- */
2293
2294#define INIT_LOCK__kmp_init_user_lock_with_checks __kmp_init_user_lock_with_checks
2295#define INIT_NESTED_LOCK__kmp_init_nested_user_lock_with_checks __kmp_init_nested_user_lock_with_checks
2296#define ACQUIRE_LOCK__kmp_acquire_user_lock_with_checks __kmp_acquire_user_lock_with_checks
2297#define ACQUIRE_LOCK_TIMED__kmp_acquire_user_lock_with_checks_timed __kmp_acquire_user_lock_with_checks_timed
2298#define ACQUIRE_NESTED_LOCK__kmp_acquire_nested_user_lock_with_checks __kmp_acquire_nested_user_lock_with_checks
2299#define ACQUIRE_NESTED_LOCK_TIMED__kmp_acquire_nested_user_lock_with_checks_timed \
2300 __kmp_acquire_nested_user_lock_with_checks_timed
2301#define RELEASE_LOCK__kmp_release_user_lock_with_checks __kmp_release_user_lock_with_checks
2302#define RELEASE_NESTED_LOCK__kmp_release_nested_user_lock_with_checks __kmp_release_nested_user_lock_with_checks
2303#define TEST_LOCK__kmp_test_user_lock_with_checks __kmp_test_user_lock_with_checks
2304#define TEST_NESTED_LOCK__kmp_test_nested_user_lock_with_checks __kmp_test_nested_user_lock_with_checks
2305#define DESTROY_LOCK__kmp_destroy_user_lock_with_checks __kmp_destroy_user_lock_with_checks
2306#define DESTROY_NESTED_LOCK__kmp_destroy_nested_user_lock_with_checks __kmp_destroy_nested_user_lock_with_checks
2307
2308// TODO: Make check abort messages use location info & pass it into
2309// with_checks routines
2310
2311#if KMP_USE_DYNAMIC_LOCK1
2312
2313// internal lock initializer
2314static __forceinline__inline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2315 kmp_dyna_lockseq_t seq) {
2316 if (KMP_IS_D_LOCK(seq)((seq) >= lockseq_tas && (seq) <= lockseq_rtm_spin
)
) {
2317 KMP_INIT_D_LOCK(lock, seq)__kmp_direct_init[((seq) << 1 | 1)]((kmp_dyna_lock_t *)
lock, seq)
;
2318#if USE_ITT_BUILD1
2319 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL__null);
2320#endif
2321 } else {
2322 KMP_INIT_I_LOCK(lock, seq)__kmp_direct_init[0]((kmp_dyna_lock_t *)(lock), seq);
2323#if USE_ITT_BUILD1
2324 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock)((sizeof(int) < sizeof(void *)) ? __kmp_get_i_lock((*(kmp_lock_index_t
*)(lock) >> 1)) : *((kmp_indirect_lock_t **)(lock)))
;
2325 __kmp_itt_lock_creating(ilk->lock, loc);
2326#endif
2327 }
2328}
2329
2330// internal nest lock initializer
2331static __forceinline__inline void
2332__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2333 kmp_dyna_lockseq_t seq) {
2334#if KMP_USE_TSX(0 || 1) && !0
2335 // Don't have nested lock implementation for speculative locks
2336 if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2337 seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2338 seq = __kmp_user_lock_seq;
2339#endif
2340 switch (seq) {
2341 case lockseq_tas:
2342 seq = lockseq_nested_tas;
2343 break;
2344#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
2345 case lockseq_futex:
2346 seq = lockseq_nested_futex;
2347 break;
2348#endif
2349 case lockseq_ticket:
2350 seq = lockseq_nested_ticket;
2351 break;
2352 case lockseq_queuing:
2353 seq = lockseq_nested_queuing;
2354 break;
2355 case lockseq_drdpa:
2356 seq = lockseq_nested_drdpa;
2357 break;
2358 default:
2359 seq = lockseq_nested_queuing;
2360 }
2361 KMP_INIT_I_LOCK(lock, seq)__kmp_direct_init[0]((kmp_dyna_lock_t *)(lock), seq);
2362#if USE_ITT_BUILD1
2363 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock)((sizeof(int) < sizeof(void *)) ? __kmp_get_i_lock((*(kmp_lock_index_t
*)(lock) >> 1)) : *((kmp_indirect_lock_t **)(lock)))
;
2364 __kmp_itt_lock_creating(ilk->lock, loc);
2365#endif
2366}
2367
2368/* initialize the lock with a hint */
2369void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2370 uintptr_t hint) {
2371 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_csupport.cpp", 2371); }
;
2372 if (__kmp_env_consistency_check && user_lock == NULL__null) {
2373 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_LockIsUninitialized
, "omp_init_lock_with_hint"), __kmp_msg_null)
;
2374 }
2375
2376 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2377
2378#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2379 // This is the case, if called from omp_init_lock_with_hint:
2380 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2381 if (!codeptr)
2382 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2383 if (ompt_enabled.ompt_callback_lock_init) {
2384 ompt_callbacks.ompt_callback(ompt_callback_lock_init)ompt_callback_lock_init_callback(
2385 ompt_mutex_lock, (omp_lock_hint_t)hint,
2386 __ompt_get_mutex_impl_type(user_lock),
2387 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2388 }
2389#endif
2390}
2391
2392/* initialize the lock with a hint */
2393void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2394 void **user_lock, uintptr_t hint) {
2395 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_csupport.cpp", 2395); }
;
2396 if (__kmp_env_consistency_check && user_lock == NULL__null) {
2397 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_LockIsUninitialized
, "omp_init_nest_lock_with_hint"), __kmp_msg_null)
;
2398 }
2399
2400 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2401
2402#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2403 // This is the case, if called from omp_init_lock_with_hint:
2404 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2405 if (!codeptr)
2406 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2407 if (ompt_enabled.ompt_callback_lock_init) {
2408 ompt_callbacks.ompt_callback(ompt_callback_lock_init)ompt_callback_lock_init_callback(
2409 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2410 __ompt_get_mutex_impl_type(user_lock),
2411 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2412 }
2413#endif
2414}
2415
2416#endif // KMP_USE_DYNAMIC_LOCK
2417
2418/* initialize the lock */
2419void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2420#if KMP_USE_DYNAMIC_LOCK1
2421
2422 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_csupport.cpp", 2422); }
;
2423 if (__kmp_env_consistency_check && user_lock == NULL__null) {
2424 KMP_FATAL(LockIsUninitialized, "omp_init_lock")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_LockIsUninitialized
, "omp_init_lock"), __kmp_msg_null)
;
2425 }
2426 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2427
2428#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2429 // This is the case, if called from omp_init_lock_with_hint:
2430 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2431 if (!codeptr)
2432 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2433 if (ompt_enabled.ompt_callback_lock_init) {
2434 ompt_callbacks.ompt_callback(ompt_callback_lock_init)ompt_callback_lock_init_callback(
2435 ompt_mutex_lock, omp_lock_hint_none,
2436 __ompt_get_mutex_impl_type(user_lock),
2437 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2438 }
2439#endif
2440
2441#else // KMP_USE_DYNAMIC_LOCK
2442
2443 static char const *const func = "omp_init_lock";
2444 kmp_user_lock_p lck;
2445 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_csupport.cpp", 2445); }
;
2446
2447 if (__kmp_env_consistency_check) {
2448 if (user_lock == NULL__null) {
2449 KMP_FATAL(LockIsUninitialized, func)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_LockIsUninitialized
, func), __kmp_msg_null)
;
2450 }
2451 }
2452
2453 KMP_CHECK_USER_LOCK_INIT();
2454
2455 if ((__kmp_user_lock_kind == lk_tas) &&
2456 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
2457 lck = (kmp_user_lock_p)user_lock;
2458 }
2459#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
2460 else if ((__kmp_user_lock_kind == lk_futex) &&
2461 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
2462 lck = (kmp_user_lock_p)user_lock;
2463 }
2464#endif
2465 else {
2466 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2467 }
2468 INIT_LOCK__kmp_init_user_lock_with_checks(lck);
2469 __kmp_set_user_lock_location(lck, loc);
2470
2471#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2472 // This is the case, if called from omp_init_lock_with_hint:
2473 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2474 if (!codeptr)
2475 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2476 if (ompt_enabled.ompt_callback_lock_init) {
2477 ompt_callbacks.ompt_callback(ompt_callback_lock_init)ompt_callback_lock_init_callback(
2478 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2479 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2480 }
2481#endif
2482
2483#if USE_ITT_BUILD1
2484 __kmp_itt_lock_creating(lck);
2485#endif /* USE_ITT_BUILD */
2486
2487#endif // KMP_USE_DYNAMIC_LOCK
2488} // __kmpc_init_lock
2489
2490/* initialize the lock */
2491void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2492#if KMP_USE_DYNAMIC_LOCK1
2493
2494 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_csupport.cpp", 2494); }
;
2495 if (__kmp_env_consistency_check && user_lock == NULL__null) {
2496 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_LockIsUninitialized
, "omp_init_nest_lock"), __kmp_msg_null)
;
2497 }
2498 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2499
2500#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2501 // This is the case, if called from omp_init_lock_with_hint:
2502 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2503 if (!codeptr)
2504 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2505 if (ompt_enabled.ompt_callback_lock_init) {
2506 ompt_callbacks.ompt_callback(ompt_callback_lock_init)ompt_callback_lock_init_callback(
2507 ompt_mutex_nest_lock, omp_lock_hint_none,
2508 __ompt_get_mutex_impl_type(user_lock),
2509 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2510 }
2511#endif
2512
2513#else // KMP_USE_DYNAMIC_LOCK
2514
2515 static char const *const func = "omp_init_nest_lock";
2516 kmp_user_lock_p lck;
2517 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_csupport.cpp", 2517); }
;
2518
2519 if (__kmp_env_consistency_check) {
2520 if (user_lock == NULL__null) {
2521 KMP_FATAL(LockIsUninitialized, func)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_LockIsUninitialized
, func), __kmp_msg_null)
;
2522 }
2523 }
2524
2525 KMP_CHECK_USER_LOCK_INIT();
2526
2527 if ((__kmp_user_lock_kind == lk_tas) &&
2528 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2529 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
2530 lck = (kmp_user_lock_p)user_lock;
2531 }
2532#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
2533 else if ((__kmp_user_lock_kind == lk_futex) &&
2534 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2535 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
2536 lck = (kmp_user_lock_p)user_lock;
2537 }
2538#endif
2539 else {
2540 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2541 }
2542
2543 INIT_NESTED_LOCK__kmp_init_nested_user_lock_with_checks(lck);
2544 __kmp_set_user_lock_location(lck, loc);
2545
2546#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2547 // This is the case, if called from omp_init_lock_with_hint:
2548 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2549 if (!codeptr)
2550 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2551 if (ompt_enabled.ompt_callback_lock_init) {
2552 ompt_callbacks.ompt_callback(ompt_callback_lock_init)ompt_callback_lock_init_callback(
2553 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2554 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2555 }
2556#endif
2557
2558#if USE_ITT_BUILD1
2559 __kmp_itt_lock_creating(lck);
2560#endif /* USE_ITT_BUILD */
2561
2562#endif // KMP_USE_DYNAMIC_LOCK
2563} // __kmpc_init_nest_lock
2564
2565void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2566#if KMP_USE_DYNAMIC_LOCK1
2567
2568#if USE_ITT_BUILD1
2569 kmp_user_lock_p lck;
2570 if (KMP_EXTRACT_D_TAG(user_lock)(*((kmp_dyna_lock_t *)(user_lock)) & ((1 << 8) - 1)
& -(*((kmp_dyna_lock_t *)(user_lock)) & 1))
== 0) {
2571 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock)((sizeof(int) < sizeof(void *)) ? __kmp_get_i_lock((*(kmp_lock_index_t
*)(user_lock) >> 1)) : *((kmp_indirect_lock_t **)(user_lock
)))
)->lock;
2572 } else {
2573 lck = (kmp_user_lock_p)user_lock;
2574 }
2575 __kmp_itt_lock_destroyed(lck);
2576#endif
2577#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2578 // This is the case, if called from omp_init_lock_with_hint:
2579 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2580 if (!codeptr)
2581 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2582 if (ompt_enabled.ompt_callback_lock_destroy) {
2583 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)ompt_callback_lock_destroy_callback(
2584 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2585 }
2586#endif
2587 KMP_D_LOCK_FUNC(user_lock, destroy)__kmp_direct_destroy[(*((kmp_dyna_lock_t *)(user_lock)) &
((1 << 8) - 1) & -(*((kmp_dyna_lock_t *)(user_lock
)) & 1))]
((kmp_dyna_lock_t *)user_lock);
2588#else
2589 kmp_user_lock_p lck;
2590
2591 if ((__kmp_user_lock_kind == lk_tas) &&
2592 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
2593 lck = (kmp_user_lock_p)user_lock;
2594 }
2595#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
2596 else if ((__kmp_user_lock_kind == lk_futex) &&
2597 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
2598 lck = (kmp_user_lock_p)user_lock;
2599 }
2600#endif
2601 else {
2602 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2603 }
2604
2605#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2606 // This is the case, if called from omp_init_lock_with_hint:
2607 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2608 if (!codeptr)
2609 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2610 if (ompt_enabled.ompt_callback_lock_destroy) {
2611 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)ompt_callback_lock_destroy_callback(
2612 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2613 }
2614#endif
2615
2616#if USE_ITT_BUILD1
2617 __kmp_itt_lock_destroyed(lck);
2618#endif /* USE_ITT_BUILD */
2619 DESTROY_LOCK__kmp_destroy_user_lock_with_checks(lck);
2620
2621 if ((__kmp_user_lock_kind == lk_tas) &&
2622 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
2623 ;
2624 }
2625#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
2626 else if ((__kmp_user_lock_kind == lk_futex) &&
2627 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
2628 ;
2629 }
2630#endif
2631 else {
2632 __kmp_user_lock_free(user_lock, gtid, lck);
2633 }
2634#endif // KMP_USE_DYNAMIC_LOCK
2635} // __kmpc_destroy_lock
2636
2637/* destroy the lock */
2638void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2639#if KMP_USE_DYNAMIC_LOCK1
2640
2641#if USE_ITT_BUILD1
2642 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock)((sizeof(int) < sizeof(void *)) ? __kmp_get_i_lock((*(kmp_lock_index_t
*)(user_lock) >> 1)) : *((kmp_indirect_lock_t **)(user_lock
)))
;
2643 __kmp_itt_lock_destroyed(ilk->lock);
2644#endif
2645#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2646 // This is the case, if called from omp_init_lock_with_hint:
2647 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2648 if (!codeptr)
2649 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2650 if (ompt_enabled.ompt_callback_lock_destroy) {
2651 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)ompt_callback_lock_destroy_callback(
2652 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2653 }
2654#endif
2655 KMP_D_LOCK_FUNC(user_lock, destroy)__kmp_direct_destroy[(*((kmp_dyna_lock_t *)(user_lock)) &
((1 << 8) - 1) & -(*((kmp_dyna_lock_t *)(user_lock
)) & 1))]
((kmp_dyna_lock_t *)user_lock);
2656
2657#else // KMP_USE_DYNAMIC_LOCK
2658
2659 kmp_user_lock_p lck;
2660
2661 if ((__kmp_user_lock_kind == lk_tas) &&
2662 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2663 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
2664 lck = (kmp_user_lock_p)user_lock;
2665 }
2666#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
2667 else if ((__kmp_user_lock_kind == lk_futex) &&
2668 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2669 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
2670 lck = (kmp_user_lock_p)user_lock;
2671 }
2672#endif
2673 else {
2674 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2675 }
2676
2677#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2678 // This is the case, if called from omp_init_lock_with_hint:
2679 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2680 if (!codeptr)
2681 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2682 if (ompt_enabled.ompt_callback_lock_destroy) {
2683 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)ompt_callback_lock_destroy_callback(
2684 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2685 }
2686#endif
2687
2688#if USE_ITT_BUILD1
2689 __kmp_itt_lock_destroyed(lck);
2690#endif /* USE_ITT_BUILD */
2691
2692 DESTROY_NESTED_LOCK__kmp_destroy_nested_user_lock_with_checks(lck);
2693
2694 if ((__kmp_user_lock_kind == lk_tas) &&
2695 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2696 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
2697 ;
2698 }
2699#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
2700 else if ((__kmp_user_lock_kind == lk_futex) &&
2701 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2702 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
2703 ;
2704 }
2705#endif
2706 else {
2707 __kmp_user_lock_free(user_lock, gtid, lck);
2708 }
2709#endif // KMP_USE_DYNAMIC_LOCK
2710} // __kmpc_destroy_nest_lock
2711
2712void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2713 KMP_COUNT_BLOCK(OMP_set_lock)((void)0);
2714#if KMP_USE_DYNAMIC_LOCK1
2715 int tag = KMP_EXTRACT_D_TAG(user_lock)(*((kmp_dyna_lock_t *)(user_lock)) & ((1 << 8) - 1)
& -(*((kmp_dyna_lock_t *)(user_lock)) & 1))
;
2716#if USE_ITT_BUILD1
2717 __kmp_itt_lock_acquiring(
2718 (kmp_user_lock_p)
2719 user_lock); // itt function will get to the right lock object.
2720#endif
2721#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2722 // This is the case, if called from omp_init_lock_with_hint:
2723 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2724 if (!codeptr)
2725 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2726 if (ompt_enabled.ompt_callback_mutex_acquire) {
2727 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
2728 ompt_mutex_lock, omp_lock_hint_none,
2729 __ompt_get_mutex_impl_type(user_lock),
2730 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2731 }
2732#endif
2733#if KMP_USE_INLINED_TAS(1 && (0 || 1 || KMP_ARCH_ARM)) && 1
2734 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2735 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid){ kmp_tas_lock_t *l = (kmp_tas_lock_t *)user_lock; kmp_int32 tas_free
= (locktag_tas); kmp_int32 tas_busy = ((gtid + 1) << 8
| locktag_tas); if ((&l->lk.poll)->load(std::memory_order_relaxed
) != tas_free || !__kmp_atomic_compare_store_acq(&l->lk
.poll, tas_free, tas_busy)) { kmp_uint32 spins; (!__kmp_itt_fsync_prepare_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)(l)); {
(spins) = __kmp_yield_init; }; kmp_backoff_t backoff = __kmp_spin_backoff_params
; do { if ((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
: __kmp_xproc)) { { __kmp_x86_pause(); if (((!0)) &&
(((__kmp_use_yield == 1) || (__kmp_use_yield == 2 &&
(((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
))))))) __kmp_yield(); }; } else { { __kmp_x86_pause(); if ((
(__kmp_use_yield == 1) || (__kmp_use_yield == 2 && ((
(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
)))))) { (spins) -= 2; if (!(spins)) { __kmp_yield(); (spins)
= __kmp_yield_next; } } }; } __kmp_spin_backoff(&backoff
); } while ( (&l->lk.poll)->load(std::memory_order_relaxed
) != tas_free || !__kmp_atomic_compare_store_acq(&l->lk
.poll, tas_free, tas_busy)); } (!__kmp_itt_fsync_acquired_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0((void *)(l));
}
;
2736 } else
2737#elif KMP_USE_INLINED_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0)) && 0
2738 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2739 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid){ kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)user_lock; kmp_int32
gtid_code = (gtid + 1) << 1; ; (!__kmp_itt_fsync_prepare_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)(ftx))
; kmp_int32 poll_val; while ((poll_val = __sync_val_compare_and_swap
((volatile kmp_uint32 *)(&(ftx->lk.poll)), (kmp_uint32
)((locktag_futex)), (kmp_uint32)(((gtid_code) << 8 | locktag_futex
)))) != (locktag_futex)) { kmp_int32 cond = ((poll_val) >>
8) & 1; if (!cond) { if (!__sync_val_compare_and_swap((volatile
kmp_uint32 *)(&(ftx->lk.poll)), (kmp_uint32)(poll_val
), (kmp_uint32)(poll_val | ((1) << 8 | locktag_futex)))
) { continue; } poll_val |= ((1) << 8 | locktag_futex);
} kmp_int32 rc; if ((rc = syscall(202, &(ftx->lk.poll
), 0, poll_val, __null, __null, 0)) != 0) { continue; } gtid_code
|= 1; } (!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)(ftx)); }
;
2740 } else
2741#endif
2742 {
2743 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2744 }
2745#if USE_ITT_BUILD1
2746 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2747#endif
2748#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2749 if (ompt_enabled.ompt_callback_mutex_acquired) {
2750 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
2751 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2752 }
2753#endif
2754
2755#else // KMP_USE_DYNAMIC_LOCK
2756
2757 kmp_user_lock_p lck;
2758
2759 if ((__kmp_user_lock_kind == lk_tas) &&
2760 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
2761 lck = (kmp_user_lock_p)user_lock;
2762 }
2763#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
2764 else if ((__kmp_user_lock_kind == lk_futex) &&
2765 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
2766 lck = (kmp_user_lock_p)user_lock;
2767 }
2768#endif
2769 else {
2770 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2771 }
2772
2773#if USE_ITT_BUILD1
2774 __kmp_itt_lock_acquiring(lck);
2775#endif /* USE_ITT_BUILD */
2776#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2777 // This is the case, if called from omp_init_lock_with_hint:
2778 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2779 if (!codeptr)
2780 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2781 if (ompt_enabled.ompt_callback_mutex_acquire) {
2782 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
2783 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2784 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2785 }
2786#endif
2787
2788 ACQUIRE_LOCK__kmp_acquire_user_lock_with_checks(lck, gtid);
2789
2790#if USE_ITT_BUILD1
2791 __kmp_itt_lock_acquired(lck);
2792#endif /* USE_ITT_BUILD */
2793
2794#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2795 if (ompt_enabled.ompt_callback_mutex_acquired) {
2796 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
2797 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2798 }
2799#endif
2800
2801#endif // KMP_USE_DYNAMIC_LOCK
2802}
2803
2804void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2805#if KMP_USE_DYNAMIC_LOCK1
2806
2807#if USE_ITT_BUILD1
2808 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2809#endif
2810#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2811 // This is the case, if called from omp_init_lock_with_hint:
2812 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2813 if (!codeptr)
2814 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2815 if (ompt_enabled.enabled) {
2816 if (ompt_enabled.ompt_callback_mutex_acquire) {
2817 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
2818 ompt_mutex_nest_lock, omp_lock_hint_none,
2819 __ompt_get_mutex_impl_type(user_lock),
2820 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2821 }
2822 }
2823#endif
2824 int acquire_status =
2825 KMP_D_LOCK_FUNC(user_lock, set)__kmp_direct_set[(*((kmp_dyna_lock_t *)(user_lock)) & ((1
<< 8) - 1) & -(*((kmp_dyna_lock_t *)(user_lock)) &
1))]
((kmp_dyna_lock_t *)user_lock, gtid);
2826 (void)acquire_status;
2827#if USE_ITT_BUILD1
2828 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2829#endif
2830
2831#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2832 if (ompt_enabled.enabled) {
2833 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST1) {
2834 if (ompt_enabled.ompt_callback_mutex_acquired) {
2835 // lock_first
2836 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
2837 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2838 codeptr);
2839 }
2840 } else {
2841 if (ompt_enabled.ompt_callback_nest_lock) {
2842 // lock_next
2843 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)ompt_callback_nest_lock_callback(
2844 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2845 }
2846 }
2847 }
2848#endif
2849
2850#else // KMP_USE_DYNAMIC_LOCK
2851 int acquire_status;
2852 kmp_user_lock_p lck;
2853
2854 if ((__kmp_user_lock_kind == lk_tas) &&
2855 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2856 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
2857 lck = (kmp_user_lock_p)user_lock;
2858 }
2859#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
2860 else if ((__kmp_user_lock_kind == lk_futex) &&
2861 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2862 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
2863 lck = (kmp_user_lock_p)user_lock;
2864 }
2865#endif
2866 else {
2867 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2868 }
2869
2870#if USE_ITT_BUILD1
2871 __kmp_itt_lock_acquiring(lck);
2872#endif /* USE_ITT_BUILD */
2873#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2874 // This is the case, if called from omp_init_lock_with_hint:
2875 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2876 if (!codeptr)
2877 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2878 if (ompt_enabled.enabled) {
2879 if (ompt_enabled.ompt_callback_mutex_acquire) {
2880 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
2881 ompt_mutex_nest_lock, omp_lock_hint_none,
2882 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2883 codeptr);
2884 }
2885 }
2886#endif
2887
2888 ACQUIRE_NESTED_LOCK__kmp_acquire_nested_user_lock_with_checks(lck, gtid, &acquire_status);
2889
2890#if USE_ITT_BUILD1
2891 __kmp_itt_lock_acquired(lck);
2892#endif /* USE_ITT_BUILD */
2893
2894#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2895 if (ompt_enabled.enabled) {
2896 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST1) {
2897 if (ompt_enabled.ompt_callback_mutex_acquired) {
2898 // lock_first
2899 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
2900 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2901 }
2902 } else {
2903 if (ompt_enabled.ompt_callback_nest_lock) {
2904 // lock_next
2905 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)ompt_callback_nest_lock_callback(
2906 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2907 }
2908 }
2909 }
2910#endif
2911
2912#endif // KMP_USE_DYNAMIC_LOCK
2913}
2914
2915void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2916#if KMP_USE_DYNAMIC_LOCK1
2917
2918 int tag = KMP_EXTRACT_D_TAG(user_lock)(*((kmp_dyna_lock_t *)(user_lock)) & ((1 << 8) - 1)
& -(*((kmp_dyna_lock_t *)(user_lock)) & 1))
;
2919#if USE_ITT_BUILD1
2920 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2921#endif
2922#if KMP_USE_INLINED_TAS(1 && (0 || 1 || KMP_ARCH_ARM)) && 1
2923 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2924 KMP_RELEASE_TAS_LOCK(user_lock, gtid){ (&((kmp_tas_lock_t *)user_lock)->lk.poll)->store(
(locktag_tas), std::memory_order_release); }
;
2925 } else
2926#elif KMP_USE_INLINED_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0)) && 0
2927 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2928 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid){ kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)user_lock; ; (!
__kmp_itt_fsync_releasing_ptr__3_0) ? (void)0 : __kmp_itt_fsync_releasing_ptr__3_0
((void *)(ftx)); kmp_int32 poll_val = __sync_lock_test_and_set
((volatile kmp_uint32 *)(&(ftx->lk.poll)), (kmp_uint32
)((locktag_futex))); if (((poll_val) >> 8) & 1) { syscall
(202, &(ftx->lk.poll), 1, ((1) << 8 | locktag_futex
), __null, __null, 0); } ; { __kmp_x86_pause(); if ((((__kmp_use_yield
== 1 || __kmp_use_yield == 2) && (((__kmp_nth) > (
__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)))))) __kmp_yield
(); }; }
;
2929 } else
2930#endif
2931 {
2932 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2933 }
2934
2935#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2936 // This is the case, if called from omp_init_lock_with_hint:
2937 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2938 if (!codeptr)
2939 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2940 if (ompt_enabled.ompt_callback_mutex_released) {
2941 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)ompt_callback_mutex_released_callback(
2942 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2943 }
2944#endif
2945
2946#else // KMP_USE_DYNAMIC_LOCK
2947
2948 kmp_user_lock_p lck;
2949
2950 /* Can't use serial interval since not block structured */
2951 /* release the lock */
2952
2953 if ((__kmp_user_lock_kind == lk_tas) &&
2954 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
2955#if KMP_OS_LINUX1 && \
2956 (KMP_ARCH_X860 || KMP_ARCH_X86_641 || KMP_ARCH_ARM || KMP_ARCH_AARCH640)
2957// "fast" path implemented to fix customer performance issue
2958#if USE_ITT_BUILD1
2959 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2960#endif /* USE_ITT_BUILD */
2961 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0)(((kmp_user_lock_p)user_lock)->tas.lk.poll) = (0);
2962 KMP_MB();
2963
2964#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2965 // This is the case, if called from omp_init_lock_with_hint:
2966 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2967 if (!codeptr)
2968 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2969 if (ompt_enabled.ompt_callback_mutex_released) {
2970 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)ompt_callback_mutex_released_callback(
2971 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2972 }
2973#endif
2974
2975 return;
2976#else
2977 lck = (kmp_user_lock_p)user_lock;
2978#endif
2979 }
2980#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
2981 else if ((__kmp_user_lock_kind == lk_futex) &&
2982 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
2983 lck = (kmp_user_lock_p)user_lock;
2984 }
2985#endif
2986 else {
2987 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2988 }
2989
2990#if USE_ITT_BUILD1
2991 __kmp_itt_lock_releasing(lck);
2992#endif /* USE_ITT_BUILD */
2993
2994 RELEASE_LOCK__kmp_release_user_lock_with_checks(lck, gtid);
2995
2996#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2997 // This is the case, if called from omp_init_lock_with_hint:
2998 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2999 if (!codeptr)
3000 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
3001 if (ompt_enabled.ompt_callback_mutex_released) {
3002 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)ompt_callback_mutex_released_callback(
3003 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3004 }
3005#endif
3006
3007#endif // KMP_USE_DYNAMIC_LOCK
3008}
3009
3010/* release the lock */
3011void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3012#if KMP_USE_DYNAMIC_LOCK1
3013
3014#if USE_ITT_BUILD1
3015 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3016#endif
3017 int release_status =
3018 KMP_D_LOCK_FUNC(user_lock, unset)__kmp_direct_unset[(*((kmp_dyna_lock_t *)(user_lock)) & (
(1 << 8) - 1) & -(*((kmp_dyna_lock_t *)(user_lock))
& 1))]
((kmp_dyna_lock_t *)user_lock, gtid);
3019 (void)release_status;
3020
3021#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3022 // This is the case, if called from omp_init_lock_with_hint:
3023 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
3024 if (!codeptr)
3025 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
3026 if (ompt_enabled.enabled) {
3027 if (release_status == KMP_LOCK_RELEASED1) {
3028 if (ompt_enabled.ompt_callback_mutex_released) {
3029 // release_lock_last
3030 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)ompt_callback_mutex_released_callback(
3031 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3032 codeptr);
3033 }
3034 } else if (ompt_enabled.ompt_callback_nest_lock) {
3035 // release_lock_prev
3036 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)ompt_callback_nest_lock_callback(
3037 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3038 }
3039 }
3040#endif
3041
3042#else // KMP_USE_DYNAMIC_LOCK
3043
3044 kmp_user_lock_p lck;
3045
3046 /* Can't use serial interval since not block structured */
3047
3048 if ((__kmp_user_lock_kind == lk_tas) &&
3049 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3050 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
3051#if KMP_OS_LINUX1 && \
3052 (KMP_ARCH_X860 || KMP_ARCH_X86_641 || KMP_ARCH_ARM || KMP_ARCH_AARCH640)
3053 // "fast" path implemented to fix customer performance issue
3054 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
3055#if USE_ITT_BUILD1
3056 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3057#endif /* USE_ITT_BUILD */
3058
3059#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3060 int release_status = KMP_LOCK_STILL_HELD0;
3061#endif
3062
3063 if (--(tl->lk.depth_locked) == 0) {
3064 TCW_4(tl->lk.poll, 0)(tl->lk.poll) = (0);
3065#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3066 release_status = KMP_LOCK_RELEASED1;
3067#endif
3068 }
3069 KMP_MB();
3070
3071#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3072 // This is the case, if called from omp_init_lock_with_hint:
3073 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
3074 if (!codeptr)
3075 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
3076 if (ompt_enabled.enabled) {
3077 if (release_status == KMP_LOCK_RELEASED1) {
3078 if (ompt_enabled.ompt_callback_mutex_released) {
3079 // release_lock_last
3080 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)ompt_callback_mutex_released_callback(
3081 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3082 }
3083 } else if (ompt_enabled.ompt_callback_nest_lock) {
3084 // release_lock_previous
3085 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)ompt_callback_nest_lock_callback(
3086 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3087 }
3088 }
3089#endif
3090
3091 return;
3092#else
3093 lck = (kmp_user_lock_p)user_lock;
3094#endif
3095 }
3096#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
3097 else if ((__kmp_user_lock_kind == lk_futex) &&
3098 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3099 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
3100 lck = (kmp_user_lock_p)user_lock;
3101 }
3102#endif
3103 else {
3104 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
3105 }
3106
3107#if USE_ITT_BUILD1
3108 __kmp_itt_lock_releasing(lck);
3109#endif /* USE_ITT_BUILD */
3110
3111 int release_status;
3112 release_status = RELEASE_NESTED_LOCK__kmp_release_nested_user_lock_with_checks(lck, gtid);
3113#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3114 // This is the case, if called from omp_init_lock_with_hint:
3115 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
3116 if (!codeptr)
3117 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
3118 if (ompt_enabled.enabled) {
3119 if (release_status == KMP_LOCK_RELEASED1) {
3120 if (ompt_enabled.ompt_callback_mutex_released) {
3121 // release_lock_last
3122 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)ompt_callback_mutex_released_callback(
3123 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3124 }
3125 } else if (ompt_enabled.ompt_callback_nest_lock) {
3126 // release_lock_previous
3127 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)ompt_callback_nest_lock_callback(
3128 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3129 }
3130 }
3131#endif
3132
3133#endif // KMP_USE_DYNAMIC_LOCK
3134}
3135
3136/* try to acquire the lock */
3137int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3138 KMP_COUNT_BLOCK(OMP_test_lock)((void)0);
3139
3140#if KMP_USE_DYNAMIC_LOCK1
3141 int rc;
3142 int tag = KMP_EXTRACT_D_TAG(user_lock)(*((kmp_dyna_lock_t *)(user_lock)) & ((1 << 8) - 1)
& -(*((kmp_dyna_lock_t *)(user_lock)) & 1))
;
3143#if USE_ITT_BUILD1
3144 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3145#endif
3146#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3147 // This is the case, if called from omp_init_lock_with_hint:
3148 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
3149 if (!codeptr)
3150 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
3151 if (ompt_enabled.ompt_callback_mutex_acquire) {
3152 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
3153 ompt_mutex_lock, omp_lock_hint_none,
3154 __ompt_get_mutex_impl_type(user_lock),
3155 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3156 }
3157#endif
3158#if KMP_USE_INLINED_TAS(1 && (0 || 1 || KMP_ARCH_ARM)) && 1
3159 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3160 KMP_TEST_TAS_LOCK(user_lock, gtid, rc){ kmp_tas_lock_t *l = (kmp_tas_lock_t *)user_lock; kmp_int32 tas_free
= (locktag_tas); kmp_int32 tas_busy = ((gtid + 1) << 8
| locktag_tas); rc = (&l->lk.poll)->load(std::memory_order_relaxed
) == tas_free && __kmp_atomic_compare_store_acq(&
l->lk.poll, tas_free, tas_busy); }
;
3161 } else
3162#elif KMP_USE_INLINED_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0)) && 0
3163 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3164 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc){ kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)user_lock; if (
__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(ftx
->lk.poll)), (kmp_uint32)((locktag_futex)), (kmp_uint32)((
(gtid + 1 << 1) << 8 | locktag_futex)))) { (!__kmp_itt_fsync_acquired_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0((void *)(ftx)
); rc = (!0); } else { rc = 0; } }
;
3165 } else
3166#endif
3167 {
3168 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3169 }
3170 if (rc) {
3171#if USE_ITT_BUILD1
3172 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3173#endif
3174#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3175 if (ompt_enabled.ompt_callback_mutex_acquired) {
3176 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
3177 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3178 }
3179#endif
3180 return FTN_TRUE(!0);
3181 } else {
3182#if USE_ITT_BUILD1
3183 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3184#endif
3185 return FTN_FALSE0;
3186 }
3187
3188#else // KMP_USE_DYNAMIC_LOCK
3189
3190 kmp_user_lock_p lck;
3191 int rc;
3192
3193 if ((__kmp_user_lock_kind == lk_tas) &&
3194 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
3195 lck = (kmp_user_lock_p)user_lock;
3196 }
3197#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
3198 else if ((__kmp_user_lock_kind == lk_futex) &&
3199 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
3200 lck = (kmp_user_lock_p)user_lock;
3201 }
3202#endif
3203 else {
3204 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3205 }
3206
3207#if USE_ITT_BUILD1
3208 __kmp_itt_lock_acquiring(lck);
3209#endif /* USE_ITT_BUILD */
3210#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3211 // This is the case, if called from omp_init_lock_with_hint:
3212 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
3213 if (!codeptr)
3214 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
3215 if (ompt_enabled.ompt_callback_mutex_acquire) {
3216 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
3217 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3218 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3219 }
3220#endif
3221
3222 rc = TEST_LOCK__kmp_test_user_lock_with_checks(lck, gtid);
3223#if USE_ITT_BUILD1
3224 if (rc) {
3225 __kmp_itt_lock_acquired(lck);
3226 } else {
3227 __kmp_itt_lock_cancelled(lck);
3228 }
3229#endif /* USE_ITT_BUILD */
3230#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3231 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3232 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
3233 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3234 }
3235#endif
3236
3237 return (rc ? FTN_TRUE(!0) : FTN_FALSE0);
3238
3239 /* Can't use serial interval since not block structured */
3240
3241#endif // KMP_USE_DYNAMIC_LOCK
3242}
3243
3244/* try to acquire the lock */
3245int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3246#if KMP_USE_DYNAMIC_LOCK1
3247 int rc;
3248#if USE_ITT_BUILD1
3249 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3250#endif
3251#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3252 // This is the case, if called from omp_init_lock_with_hint:
3253 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
3254 if (!codeptr)
3255 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
3256 if (ompt_enabled.ompt_callback_mutex_acquire) {
3257 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
3258 ompt_mutex_nest_lock, omp_lock_hint_none,
3259 __ompt_get_mutex_impl_type(user_lock),
3260 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3261 }
3262#endif
3263 rc = KMP_D_LOCK_FUNC(user_lock, test)__kmp_direct_test[(*((kmp_dyna_lock_t *)(user_lock)) & ((
1 << 8) - 1) & -(*((kmp_dyna_lock_t *)(user_lock)) &
1))]
((kmp_dyna_lock_t *)user_lock, gtid);
3264#if USE_ITT_BUILD1
3265 if (rc) {
3266 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3267 } else {
3268 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3269 }
3270#endif
3271#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3272 if (ompt_enabled.enabled && rc) {
3273 if (rc == 1) {
3274 if (ompt_enabled.ompt_callback_mutex_acquired) {
3275 // lock_first
3276 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
3277 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3278 codeptr);
3279 }
3280 } else {
3281 if (ompt_enabled.ompt_callback_nest_lock) {
3282 // lock_next
3283 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)ompt_callback_nest_lock_callback(
3284 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3285 }
3286 }
3287 }
3288#endif
3289 return rc;
3290
3291#else // KMP_USE_DYNAMIC_LOCK
3292
3293 kmp_user_lock_p lck;
3294 int rc;
3295
3296 if ((__kmp_user_lock_kind == lk_tas) &&
3297 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3298 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
3299 lck = (kmp_user_lock_p)user_lock;
3300 }
3301#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
3302 else if ((__kmp_user_lock_kind == lk_futex) &&
3303 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3304 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
3305 lck = (kmp_user_lock_p)user_lock;
3306 }
3307#endif
3308 else {
3309 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3310 }
3311
3312#if USE_ITT_BUILD1
3313 __kmp_itt_lock_acquiring(lck);
3314#endif /* USE_ITT_BUILD */
3315
3316#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3317 // This is the case, if called from omp_init_lock_with_hint:
3318 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
3319 if (!codeptr)
3320 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
3321 if (ompt_enabled.enabled) &&
3322 ompt_enabled.ompt_callback_mutex_acquire) {
3323 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
3324 ompt_mutex_nest_lock, omp_lock_hint_none,
3325 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3326 codeptr);
3327 }
3328#endif
3329
3330 rc = TEST_NESTED_LOCK__kmp_test_nested_user_lock_with_checks(lck, gtid);
3331#if USE_ITT_BUILD1
3332 if (rc) {
3333 __kmp_itt_lock_acquired(lck);
3334 } else {
3335 __kmp_itt_lock_cancelled(lck);
3336 }
3337#endif /* USE_ITT_BUILD */
3338#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3339 if (ompt_enabled.enabled && rc) {
3340 if (rc == 1) {
3341 if (ompt_enabled.ompt_callback_mutex_acquired) {
3342 // lock_first
3343 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
3344 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3345 }
3346 } else {
3347 if (ompt_enabled.ompt_callback_nest_lock) {
3348 // lock_next
3349 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)ompt_callback_nest_lock_callback(
3350 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3351 }
3352 }
3353 }
3354#endif
3355 return rc;
3356
3357 /* Can't use serial interval since not block structured */
3358
3359#endif // KMP_USE_DYNAMIC_LOCK
3360}
3361
3362// Interface to fast scalable reduce methods routines
3363
3364// keep the selected method in a thread local structure for cross-function
3365// usage: will be used in __kmpc_end_reduce* functions;
3366// another solution: to re-determine the method one more time in
3367// __kmpc_end_reduce* functions (new prototype required then)
3368// AT: which solution is better?
3369#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3370 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3371
3372#define __KMP_GET_REDUCTION_METHOD(gtid) \
3373 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3374
3375// description of the packed_reduction_method variable: look at the macros in
3376// kmp.h
3377
3378// used in a critical section reduce block
3379static __forceinline__inline void
3380__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3381 kmp_critical_name *crit) {
3382
3383 // this lock was visible to a customer and to the threading profile tool as a
3384 // serial overhead span (although it's used for an internal purpose only)
3385 // why was it visible in previous implementation?
3386 // should we keep it visible in new reduce block?
3387 kmp_user_lock_p lck;
3388
3389#if KMP_USE_DYNAMIC_LOCK1
3390
3391 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3392 // Check if it is initialized.
3393 if (*lk == 0) {
10
Assuming the condition is true
3394 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)((__kmp_user_lock_seq) >= lockseq_tas && (__kmp_user_lock_seq
) <= lockseq_rtm_spin)
) {
11
Taking true branch
12
Assuming '__kmp_user_lock_seq' is < lockseq_tas
3395 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,__sync_bool_compare_and_swap((volatile kmp_uint32 *)((volatile
kmp_int32 *)crit), (kmp_uint32)(0), (kmp_uint32)(((__kmp_user_lock_seq
) << 1 | 1)))
3396 KMP_GET_D_TAG(__kmp_user_lock_seq))__sync_bool_compare_and_swap((volatile kmp_uint32 *)((volatile
kmp_int32 *)crit), (kmp_uint32)(0), (kmp_uint32)(((__kmp_user_lock_seq
) << 1 | 1)))
;
3397 } else {
3398 __kmp_init_indirect_csptr(crit, loc, global_tid,
3399 KMP_GET_I_TAG(__kmp_user_lock_seq)(kmp_indirect_locktag_t)((__kmp_user_lock_seq)-lockseq_ticket
)
);
3400 }
3401 }
3402 // Branch for accessing the actual lock object and set operation. This
3403 // branching is inevitable since this lock initialization does not follow the
3404 // normal dispatch path (lock table is not used).
3405 if (KMP_EXTRACT_D_TAG(lk)(*((kmp_dyna_lock_t *)(lk)) & ((1 << 8) - 1) & -
(*((kmp_dyna_lock_t *)(lk)) & 1))
!= 0
) {
13
Assuming the condition is false
14
Taking false branch
3406 lck = (kmp_user_lock_p)lk;
3407 KMP_DEBUG_ASSERT(lck != NULL)if (!(lck != __null)) { __kmp_debug_assert("lck != __null", "openmp/runtime/src/kmp_csupport.cpp"
, 3407); }
;
3408 if (__kmp_env_consistency_check) {
3409 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3410 }
3411 KMP_D_LOCK_FUNC(lk, set)__kmp_direct_set[(*((kmp_dyna_lock_t *)(lk)) & ((1 <<
8) - 1) & -(*((kmp_dyna_lock_t *)(lk)) & 1))]
(lk, global_tid);
3412 } else {
3413 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
15
'ilk' initialized to a null pointer value
3414 lck = ilk->lock;
16
Access to field 'lock' results in a dereference of a null pointer (loaded from variable 'ilk')
3415 KMP_DEBUG_ASSERT(lck != NULL)if (!(lck != __null)) { __kmp_debug_assert("lck != __null", "openmp/runtime/src/kmp_csupport.cpp"
, 3415); }
;
3416 if (__kmp_env_consistency_check) {
3417 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3418 }
3419 KMP_I_LOCK_FUNC(ilk, set)__kmp_indirect_set[((kmp_indirect_lock_t *)(ilk))->type](lck, global_tid);
3420 }
3421
3422#else // KMP_USE_DYNAMIC_LOCK
3423
3424 // We know that the fast reduction code is only emitted by Intel compilers
3425 // with 32 byte critical sections. If there isn't enough space, then we
3426 // have to use a pointer.
3427 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE32) {
3428 lck = (kmp_user_lock_p)crit;
3429 } else {
3430 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3431 }
3432 KMP_DEBUG_ASSERT(lck != NULL)if (!(lck != __null)) { __kmp_debug_assert("lck != __null", "openmp/runtime/src/kmp_csupport.cpp"
, 3432); }
;
3433
3434 if (__kmp_env_consistency_check)
3435 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3436
3437 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3438
3439#endif // KMP_USE_DYNAMIC_LOCK
3440}
3441
3442// used in a critical section reduce block
3443static __forceinline__inline void
3444__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3445 kmp_critical_name *crit) {
3446
3447 kmp_user_lock_p lck;
3448
3449#if KMP_USE_DYNAMIC_LOCK1
3450
3451 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)((__kmp_user_lock_seq) >= lockseq_tas && (__kmp_user_lock_seq
) <= lockseq_rtm_spin)
) {
3452 lck = (kmp_user_lock_p)crit;
3453 if (__kmp_env_consistency_check)
3454 __kmp_pop_sync(global_tid, ct_critical, loc);
3455 KMP_D_LOCK_FUNC(lck, unset)__kmp_direct_unset[(*((kmp_dyna_lock_t *)(lck)) & ((1 <<
8) - 1) & -(*((kmp_dyna_lock_t *)(lck)) & 1))]
((kmp_dyna_lock_t *)lck, global_tid);
3456 } else {
3457 kmp_indirect_lock_t *ilk =
3458 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit))((void *)(*((kmp_indirect_lock_t **)crit)));
3459 if (__kmp_env_consistency_check)
3460 __kmp_pop_sync(global_tid, ct_critical, loc);
3461 KMP_I_LOCK_FUNC(ilk, unset)__kmp_indirect_unset[((kmp_indirect_lock_t *)(ilk))->type](ilk->lock, global_tid);
3462 }
3463
3464#else // KMP_USE_DYNAMIC_LOCK
3465
3466 // We know that the fast reduction code is only emitted by Intel compilers
3467 // with 32 byte critical sections. If there isn't enough space, then we have
3468 // to use a pointer.
3469 if (__kmp_base_user_lock_size > 32) {
3470 lck = *((kmp_user_lock_p *)crit);
3471 KMP_ASSERT(lck != NULL)if (!(lck != __null)) { __kmp_debug_assert("lck != NULL", "openmp/runtime/src/kmp_csupport.cpp"
, 3471); }
;
3472 } else {
3473 lck = (kmp_user_lock_p)crit;
3474 }
3475
3476 if (__kmp_env_consistency_check)
3477 __kmp_pop_sync(global_tid, ct_critical, loc);
3478
3479 __kmp_release_user_lock_with_checks(lck, global_tid);
3480
3481#endif // KMP_USE_DYNAMIC_LOCK
3482} // __kmp_end_critical_section_reduce_block
3483
3484static __forceinline__inline int
3485__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3486 int *task_state) {
3487 kmp_team_t *team;
3488
3489 // Check if we are inside the teams construct?
3490 if (th->th.th_teams_microtask) {
3491 *team_p = team = th->th.th_team;
3492 if (team->t.t_level == th->th.th_teams_level) {
3493 // This is reduction at teams construct.
3494 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid)if (!(!th->th.th_info.ds.ds_tid)) { __kmp_debug_assert("!th->th.th_info.ds.ds_tid"
, "openmp/runtime/src/kmp_csupport.cpp", 3494); }
; // AC: check that tid == 0
3495 // Let's swap teams temporarily for the reduction.
3496 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3497 th->th.th_team = team->t.t_parent;
3498 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3499 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3500 *task_state = th->th.th_task_state;
3501 th->th.th_task_state = 0;
3502
3503 return 1;
3504 }
3505 }
3506 return 0;
3507}
3508
3509static __forceinline__inline void
3510__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3511 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3512 th->th.th_info.ds.ds_tid = 0;
3513 th->th.th_team = team;
3514 th->th.th_team_nproc = team->t.t_nproc;
3515 th->th.th_task_team = team->t.t_task_team[task_state];
3516 __kmp_type_convert(task_state, &(th->th.th_task_state));
3517}
3518
3519/* 2.a.i. Reduce Block without a terminating barrier */
3520/*!
3521@ingroup SYNCHRONIZATION
3522@param loc source location information
3523@param global_tid global thread number
3524@param num_vars number of items (variables) to be reduced
3525@param reduce_size size of data in bytes to be reduced
3526@param reduce_data pointer to data to be reduced
3527@param reduce_func callback function providing reduction operation on two
3528operands and returning result of reduction in lhs_data
3529@param lck pointer to the unique lock data structure
3530@result 1 for the primary thread, 0 for all other team threads, 2 for all team
3531threads if atomic reduction needed
3532
3533The nowait version is used for a reduce clause with the nowait argument.
3534*/
3535kmp_int32
3536__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3537 size_t reduce_size, void *reduce_data,
3538 void (*reduce_func)(void *lhs_data, void *rhs_data),
3539 kmp_critical_name *lck) {
3540
3541 KMP_COUNT_BLOCK(REDUCE_nowait)((void)0);
3542 int retval = 0;
3543 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3544 kmp_info_t *th;
3545 kmp_team_t *team;
3546 int teams_swapped = 0, task_state;
3547 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduce_nowait() enter: called T#%d\n"
, global_tid); }
;
3548 __kmp_assert_valid_gtid(global_tid);
3549
3550 // why do we need this initialization here at all?
3551 // Reduction clause can not be used as a stand-alone directive.
3552
3553 // do not call __kmp_serial_initialize(), it will be called by
3554 // __kmp_parallel_initialize() if needed
3555 // possible detection of false-positive race by the threadchecker ???
3556 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
3557 __kmp_parallel_initialize();
3558
3559 __kmp_resume_if_soft_paused();
3560
3561// check correctness of reduce block nesting
3562#if KMP_USE_DYNAMIC_LOCK1
3563 if (__kmp_env_consistency_check)
3564 __kmp_push_sync(global_tid, ct_reduce, loc, NULL__null, 0);
3565#else
3566 if (__kmp_env_consistency_check)
3567 __kmp_push_sync(global_tid, ct_reduce, loc, NULL__null);
3568#endif
3569
3570 th = __kmp_thread_from_gtid(global_tid);
3571 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3572
3573 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3574 // the value should be kept in a variable
3575 // the variable should be either a construct-specific or thread-specific
3576 // property, not a team specific property
3577 // (a thread can reach the next reduce block on the next construct, reduce
3578 // method may differ on the next construct)
3579 // an ident_t "loc" parameter could be used as a construct-specific property
3580 // (what if loc == 0?)
3581 // (if both construct-specific and team-specific variables were shared,
3582 // then unness extra syncs should be needed)
3583 // a thread-specific variable is better regarding two issues above (next
3584 // construct and extra syncs)
3585 // a thread-specific "th_local.reduction_method" variable is used currently
3586 // each thread executes 'determine' and 'set' lines (no need to execute by one
3587 // thread, to avoid unness extra syncs)
3588
3589 packed_reduction_method = __kmp_determine_reduction_method(
3590 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3591 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3592
3593 OMPT_REDUCTION_DECL(th, global_tid)ompt_data_t *my_task_data = (&(th->th.th_current_task->
ompt_task_info.task_data)); ompt_data_t *my_parallel_data = (
&(th->th.th_team->t.ompt_team_info.parallel_data));
void *return_address = __ompt_load_return_address(global_tid
);
;
3594 if (packed_reduction_method == critical_reduce_block) {
3595
3596 OMPT_REDUCTION_BEGINif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_begin, my_parallel_data, my_task_data, return_address
); }
;
3597
3598 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3599 retval = 1;
3600
3601 } else if (packed_reduction_method == empty_reduce_block) {
3602
3603 OMPT_REDUCTION_BEGINif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_begin, my_parallel_data, my_task_data, return_address
); }
;
3604
3605 // usage: if team size == 1, no synchronization is required ( Intel
3606 // platforms only )
3607 retval = 1;
3608
3609 } else if (packed_reduction_method == atomic_reduce_block) {
3610
3611 retval = 2;
3612
3613 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3614 // won't be called by the code gen)
3615 // (it's not quite good, because the checking block has been closed by
3616 // this 'pop',
3617 // but atomic operation has not been executed yet, will be executed
3618 // slightly later, literally on next instruction)
3619 if (__kmp_env_consistency_check)
3620 __kmp_pop_sync(global_tid, ct_reduce, loc);
3621
3622 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,((((enum _reduction_method)((packed_reduction_method) & (
0x0000FF00)))) == (tree_reduce_block))
3623 tree_reduce_block)((((enum _reduction_method)((packed_reduction_method) & (
0x0000FF00)))) == (tree_reduce_block))
) {
3624
3625// AT: performance issue: a real barrier here
3626// AT: (if primary thread is slow, other threads are blocked here waiting for
3627// the primary thread to come and release them)
3628// AT: (it's not what a customer might expect specifying NOWAIT clause)
3629// AT: (specifying NOWAIT won't result in improvement of performance, it'll
3630// be confusing to a customer)
3631// AT: another implementation of *barrier_gather*nowait() (or some other design)
3632// might go faster and be more in line with sense of NOWAIT
3633// AT: TO DO: do epcc test and compare times
3634
3635// this barrier should be invisible to a customer and to the threading profile
3636// tool (it's neither a terminating barrier nor customer's code, it's
3637// used for an internal purpose)
3638#if OMPT_SUPPORT1
3639 // JP: can this barrier potentially leed to task scheduling?
3640 // JP: as long as there is a barrier in the implementation, OMPT should and
3641 // will provide the barrier events
3642 // so we set-up the necessary frame/return addresses.
3643 ompt_frame_t *ompt_frame;
3644 if (ompt_enabled.enabled) {
3645 __ompt_get_task_info_internal(0, NULL__null, NULL__null, &ompt_frame, NULL__null, NULL__null);
3646 if (ompt_frame->enter_frame.ptr == NULL__null)
3647 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
3648 }
3649 OMPT_STORE_RETURN_ADDRESS(global_tid)OmptReturnAddressGuard ReturnAddressGuard{global_tid, __builtin_return_address
(0)};
;
3650#endif
3651#if USE_ITT_NOTIFY1
3652 __kmp_threads[global_tid]->th.th_ident = loc;
3653#endif
3654 retval =
3655 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method)((enum barrier_type)((packed_reduction_method) & (0x000000FF
)))
,
3656 global_tid, FALSE0, reduce_size, reduce_data, reduce_func);
3657 retval = (retval != 0) ? (0) : (1);
3658#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3659 if (ompt_enabled.enabled) {
3660 ompt_frame->enter_frame = ompt_data_none{0};
3661 }
3662#endif
3663
3664 // all other workers except primary thread should do this pop here
3665 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3666 if (__kmp_env_consistency_check) {
3667 if (retval == 0) {
3668 __kmp_pop_sync(global_tid, ct_reduce, loc);
3669 }
3670 }
3671
3672 } else {
3673
3674 // should never reach this block
3675 KMP_ASSERT(0)if (!(0)) { __kmp_debug_assert("0", "openmp/runtime/src/kmp_csupport.cpp"
, 3675); }
; // "unexpected method"
3676 }
3677 if (teams_swapped) {
3678 __kmp_restore_swapped_teams(th, team, task_state);
3679 }
3680 KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n"
, global_tid, packed_reduction_method, retval); }
3681 10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n"
, global_tid, packed_reduction_method, retval); }
3682 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n"
, global_tid, packed_reduction_method, retval); }
3683 global_tid, packed_reduction_method, retval))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n"
, global_tid, packed_reduction_method, retval); }
;
3684
3685 return retval;
3686}
3687
3688/*!
3689@ingroup SYNCHRONIZATION
3690@param loc source location information
3691@param global_tid global thread id.
3692@param lck pointer to the unique lock data structure
3693
3694Finish the execution of a reduce nowait.
3695*/
3696void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3697 kmp_critical_name *lck) {
3698
3699 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3700
3701 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_end_reduce_nowait() enter: called T#%d\n"
, global_tid); }
;
3702 __kmp_assert_valid_gtid(global_tid);
3703
3704 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3705
3706 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid)ompt_data_t *my_task_data = (&(__kmp_thread_from_gtid(global_tid
)->th.th_current_task->ompt_task_info.task_data)); ompt_data_t
*my_parallel_data = (&(__kmp_thread_from_gtid(global_tid
)->th.th_team->t.ompt_team_info.parallel_data)); void *
return_address = __ompt_load_return_address(global_tid);
;
3707
3708 if (packed_reduction_method == critical_reduce_block) {
3709
3710 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3711 OMPT_REDUCTION_ENDif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_end, my_parallel_data, my_task_data, return_address
); }
;
3712
3713 } else if (packed_reduction_method == empty_reduce_block) {
3714
3715 // usage: if team size == 1, no synchronization is required ( on Intel
3716 // platforms only )
3717
3718 OMPT_REDUCTION_ENDif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_end, my_parallel_data, my_task_data, return_address
); }
;
3719
3720 } else if (packed_reduction_method == atomic_reduce_block) {
3721
3722 // neither primary thread nor other workers should get here
3723 // (code gen does not generate this call in case 2: atomic reduce block)
3724 // actually it's better to remove this elseif at all;
3725 // after removal this value will checked by the 'else' and will assert
3726
3727 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,((((enum _reduction_method)((packed_reduction_method) & (
0x0000FF00)))) == (tree_reduce_block))
3728 tree_reduce_block)((((enum _reduction_method)((packed_reduction_method) & (
0x0000FF00)))) == (tree_reduce_block))
) {
3729
3730 // only primary thread gets here
3731 // OMPT: tree reduction is annotated in the barrier code
3732
3733 } else {
3734
3735 // should never reach this block
3736 KMP_ASSERT(0)if (!(0)) { __kmp_debug_assert("0", "openmp/runtime/src/kmp_csupport.cpp"
, 3736); }
; // "unexpected method"
3737 }
3738
3739 if (__kmp_env_consistency_check)
3740 __kmp_pop_sync(global_tid, ct_reduce, loc);
3741
3742 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n"
, global_tid, packed_reduction_method); }
3743 global_tid, packed_reduction_method))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n"
, global_tid, packed_reduction_method); }
;
3744
3745 return;
3746}
3747
3748/* 2.a.ii. Reduce Block with a terminating barrier */
3749
3750/*!
3751@ingroup SYNCHRONIZATION
3752@param loc source location information
3753@param global_tid global thread number
3754@param num_vars number of items (variables) to be reduced
3755@param reduce_size size of data in bytes to be reduced
3756@param reduce_data pointer to data to be reduced
3757@param reduce_func callback function providing reduction operation on two
3758operands and returning result of reduction in lhs_data
3759@param lck pointer to the unique lock data structure
3760@result 1 for the primary thread, 0 for all other team threads, 2 for all team
3761threads if atomic reduction needed
3762
3763A blocking reduce that includes an implicit barrier.
3764*/
3765kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3766 size_t reduce_size, void *reduce_data,
3767 void (*reduce_func)(void *lhs_data, void *rhs_data),
3768 kmp_critical_name *lck) {
3769 KMP_COUNT_BLOCK(REDUCE_wait)((void)0);
3770 int retval = 0;
3771 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3772 kmp_info_t *th;
3773 kmp_team_t *team;
3774 int teams_swapped = 0, task_state;
3775
3776 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduce() enter: called T#%d\n"
, global_tid); }
;
1
Assuming 'kmp_a_debug' is < 10
2
Taking false branch
3777 __kmp_assert_valid_gtid(global_tid);
3778
3779 // why do we need this initialization here at all?
3780 // Reduction clause can not be a stand-alone directive.
3781
3782 // do not call __kmp_serial_initialize(), it will be called by
3783 // __kmp_parallel_initialize() if needed
3784 // possible detection of false-positive race by the threadchecker ???
3785 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
3
Assuming '__kmp_init_parallel' is not equal to 0
4
Taking false branch
3786 __kmp_parallel_initialize();
3787
3788 __kmp_resume_if_soft_paused();
3789
3790// check correctness of reduce block nesting
3791#if KMP_USE_DYNAMIC_LOCK1
3792 if (__kmp_env_consistency_check)
5
Assuming '__kmp_env_consistency_check' is 0
6
Taking false branch
3793 __kmp_push_sync(global_tid, ct_reduce, loc, NULL__null, 0);
3794#else
3795 if (__kmp_env_consistency_check)
3796 __kmp_push_sync(global_tid, ct_reduce, loc, NULL__null);
3797#endif
3798
3799 th = __kmp_thread_from_gtid(global_tid);
3800 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3801
3802 packed_reduction_method = __kmp_determine_reduction_method(
3803 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3804 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3805
3806 OMPT_REDUCTION_DECL(th, global_tid)ompt_data_t *my_task_data = (&(th->th.th_current_task->
ompt_task_info.task_data)); ompt_data_t *my_parallel_data = (
&(th->th.th_team->t.ompt_team_info.parallel_data));
void *return_address = __ompt_load_return_address(global_tid
);
;
3807
3808 if (packed_reduction_method == critical_reduce_block) {
7
Assuming 'packed_reduction_method' is equal to critical_reduce_block
3809
3810 OMPT_REDUCTION_BEGINif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_begin, my_parallel_data, my_task_data, return_address
); }
;
8
Assuming field 'enabled' is 0
3811 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
9
Calling '__kmp_enter_critical_section_reduce_block'
3812 retval = 1;
3813
3814 } else if (packed_reduction_method == empty_reduce_block) {
3815
3816 OMPT_REDUCTION_BEGINif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_begin, my_parallel_data, my_task_data, return_address
); }
;
3817 // usage: if team size == 1, no synchronization is required ( Intel
3818 // platforms only )
3819 retval = 1;
3820
3821 } else if (packed_reduction_method == atomic_reduce_block) {
3822
3823 retval = 2;
3824
3825 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,((((enum _reduction_method)((packed_reduction_method) & (
0x0000FF00)))) == (tree_reduce_block))
3826 tree_reduce_block)((((enum _reduction_method)((packed_reduction_method) & (
0x0000FF00)))) == (tree_reduce_block))
) {
3827
3828// case tree_reduce_block:
3829// this barrier should be visible to a customer and to the threading profile
3830// tool (it's a terminating barrier on constructs if NOWAIT not specified)
3831#if OMPT_SUPPORT1
3832 ompt_frame_t *ompt_frame;
3833 if (ompt_enabled.enabled) {
3834 __ompt_get_task_info_internal(0, NULL__null, NULL__null, &ompt_frame, NULL__null, NULL__null);
3835 if (ompt_frame->enter_frame.ptr == NULL__null)
3836 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
3837 }
3838 OMPT_STORE_RETURN_ADDRESS(global_tid)OmptReturnAddressGuard ReturnAddressGuard{global_tid, __builtin_return_address
(0)};
;
3839#endif
3840#if USE_ITT_NOTIFY1
3841 __kmp_threads[global_tid]->th.th_ident =
3842 loc; // needed for correct notification of frames
3843#endif
3844 retval =
3845 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method)((enum barrier_type)((packed_reduction_method) & (0x000000FF
)))
,
3846 global_tid, TRUE(!0), reduce_size, reduce_data, reduce_func);
3847 retval = (retval != 0) ? (0) : (1);
3848#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3849 if (ompt_enabled.enabled) {
3850 ompt_frame->enter_frame = ompt_data_none{0};
3851 }
3852#endif
3853
3854 // all other workers except primary thread should do this pop here
3855 // (none of other workers except primary will enter __kmpc_end_reduce())
3856 if (__kmp_env_consistency_check) {
3857 if (retval == 0) { // 0: all other workers; 1: primary thread
3858 __kmp_pop_sync(global_tid, ct_reduce, loc);
3859 }
3860 }
3861
3862 } else {
3863
3864 // should never reach this block
3865 KMP_ASSERT(0)if (!(0)) { __kmp_debug_assert("0", "openmp/runtime/src/kmp_csupport.cpp"
, 3865); }
; // "unexpected method"
3866 }
3867 if (teams_swapped) {
3868 __kmp_restore_swapped_teams(th, team, task_state);
3869 }
3870
3871 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n"
, global_tid, packed_reduction_method, retval); }
3872 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n"
, global_tid, packed_reduction_method, retval); }
3873 global_tid, packed_reduction_method, retval))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n"
, global_tid, packed_reduction_method, retval); }
;
3874 return retval;
3875}
3876
3877/*!
3878@ingroup SYNCHRONIZATION
3879@param loc source location information
3880@param global_tid global thread id.
3881@param lck pointer to the unique lock data structure
3882
3883Finish the execution of a blocking reduce.
3884The <tt>lck</tt> pointer must be the same as that used in the corresponding
3885start function.
3886*/
3887void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3888 kmp_critical_name *lck) {
3889
3890 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3891 kmp_info_t *th;
3892 kmp_team_t *team;
3893 int teams_swapped = 0, task_state;
3894
3895 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_end_reduce() enter: called T#%d\n"
, global_tid); }
;
3896 __kmp_assert_valid_gtid(global_tid);
3897
3898 th = __kmp_thread_from_gtid(global_tid);
3899 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3900
3901 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3902
3903 // this barrier should be visible to a customer and to the threading profile
3904 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3905 OMPT_REDUCTION_DECL(th, global_tid)ompt_data_t *my_task_data = (&(th->th.th_current_task->
ompt_task_info.task_data)); ompt_data_t *my_parallel_data = (
&(th->th.th_team->t.ompt_team_info.parallel_data));
void *return_address = __ompt_load_return_address(global_tid
);
;
3906
3907 if (packed_reduction_method == critical_reduce_block) {
3908 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3909
3910 OMPT_REDUCTION_ENDif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_end, my_parallel_data, my_task_data, return_address
); }
;
3911
3912// TODO: implicit barrier: should be exposed
3913#if OMPT_SUPPORT1
3914 ompt_frame_t *ompt_frame;
3915 if (ompt_enabled.enabled) {
3916 __ompt_get_task_info_internal(0, NULL__null, NULL__null, &ompt_frame, NULL__null, NULL__null);
3917 if (ompt_frame->enter_frame.ptr == NULL__null)
3918 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
3919 }
3920 OMPT_STORE_RETURN_ADDRESS(global_tid)OmptReturnAddressGuard ReturnAddressGuard{global_tid, __builtin_return_address
(0)};
;
3921#endif
3922#if USE_ITT_NOTIFY1
3923 __kmp_threads[global_tid]->th.th_ident = loc;
3924#endif
3925 __kmp_barrier(bs_plain_barrier, global_tid, FALSE0, 0, NULL__null, NULL__null);
3926#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3927 if (ompt_enabled.enabled) {
3928 ompt_frame->enter_frame = ompt_data_none{0};
3929 }
3930#endif
3931
3932 } else if (packed_reduction_method == empty_reduce_block) {
3933
3934 OMPT_REDUCTION_ENDif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_end, my_parallel_data, my_task_data, return_address
); }
;
3935
3936// usage: if team size==1, no synchronization is required (Intel platforms only)
3937
3938// TODO: implicit barrier: should be exposed
3939#if OMPT_SUPPORT1
3940 ompt_frame_t *ompt_frame;
3941 if (ompt_enabled.enabled) {
3942 __ompt_get_task_info_internal(0, NULL__null, NULL__null, &ompt_frame, NULL__null, NULL__null);
3943 if (ompt_frame->enter_frame.ptr == NULL__null)
3944 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
3945 }
3946 OMPT_STORE_RETURN_ADDRESS(global_tid)OmptReturnAddressGuard ReturnAddressGuard{global_tid, __builtin_return_address
(0)};
;
3947#endif
3948#if USE_ITT_NOTIFY1
3949 __kmp_threads[global_tid]->th.th_ident = loc;
3950#endif
3951 __kmp_barrier(bs_plain_barrier, global_tid, FALSE0, 0, NULL__null, NULL__null);
3952#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3953 if (ompt_enabled.enabled) {
3954 ompt_frame->enter_frame = ompt_data_none{0};
3955 }
3956#endif
3957
3958 } else if (packed_reduction_method == atomic_reduce_block) {
3959
3960#if OMPT_SUPPORT1
3961 ompt_frame_t *ompt_frame;
3962 if (ompt_enabled.enabled) {
3963 __ompt_get_task_info_internal(0, NULL__null, NULL__null, &ompt_frame, NULL__null, NULL__null);
3964 if (ompt_frame->enter_frame.ptr == NULL__null)
3965 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
3966 }
3967 OMPT_STORE_RETURN_ADDRESS(global_tid)OmptReturnAddressGuard ReturnAddressGuard{global_tid, __builtin_return_address
(0)};
;
3968#endif
3969// TODO: implicit barrier: should be exposed
3970#if USE_ITT_NOTIFY1
3971 __kmp_threads[global_tid]->th.th_ident = loc;
3972#endif
3973 __kmp_barrier(bs_plain_barrier, global_tid, FALSE0, 0, NULL__null, NULL__null);
3974#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3975 if (ompt_enabled.enabled) {
3976 ompt_frame->enter_frame = ompt_data_none{0};
3977 }
3978#endif
3979
3980 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,((((enum _reduction_method)((packed_reduction_method) & (
0x0000FF00)))) == (tree_reduce_block))
3981 tree_reduce_block)((((enum _reduction_method)((packed_reduction_method) & (
0x0000FF00)))) == (tree_reduce_block))
) {
3982
3983 // only primary thread executes here (primary releases all other workers)
3984 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method)((enum barrier_type)((packed_reduction_method) & (0x000000FF
)))
,
3985 global_tid);
3986
3987 } else {
3988
3989 // should never reach this block
3990 KMP_ASSERT(0)if (!(0)) { __kmp_debug_assert("0", "openmp/runtime/src/kmp_csupport.cpp"
, 3990); }
; // "unexpected method"
3991 }
3992 if (teams_swapped) {
3993 __kmp_restore_swapped_teams(th, team, task_state);
3994 }
3995
3996 if (__kmp_env_consistency_check)
3997 __kmp_pop_sync(global_tid, ct_reduce, loc);
3998
3999 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_end_reduce() exit: called T#%d: method %08x\n"
, global_tid, packed_reduction_method); }
4000 global_tid, packed_reduction_method))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_end_reduce() exit: called T#%d: method %08x\n"
, global_tid, packed_reduction_method); }
;
4001
4002 return;
4003}
4004
4005#undef __KMP_GET_REDUCTION_METHOD
4006#undef __KMP_SET_REDUCTION_METHOD
4007
4008/* end of interface to fast scalable reduce routines */
4009
4010kmp_uint64 __kmpc_get_taskid() {
4011
4012 kmp_int32 gtid;
4013 kmp_info_t *thread;
4014
4015 gtid = __kmp_get_gtid()__kmp_get_global_thread_id();
4016 if (gtid < 0) {
4017 return 0;
4018 }
4019 thread = __kmp_thread_from_gtid(gtid);
4020 return thread->th.th_current_task->td_task_id;
4021
4022} // __kmpc_get_taskid
4023
4024kmp_uint64 __kmpc_get_parent_taskid() {
4025
4026 kmp_int32 gtid;
4027 kmp_info_t *thread;
4028 kmp_taskdata_t *parent_task;
4029
4030 gtid = __kmp_get_gtid()__kmp_get_global_thread_id();
4031 if (gtid < 0) {
4032 return 0;
4033 }
4034 thread = __kmp_thread_from_gtid(gtid);
4035 parent_task = thread->th.th_current_task->td_parent;
4036 return (parent_task == NULL__null ? 0 : parent_task->td_task_id);
4037
4038} // __kmpc_get_parent_taskid
4039
4040/*!
4041@ingroup WORK_SHARING
4042@param loc source location information.
4043@param gtid global thread number.
4044@param num_dims number of associated doacross loops.
4045@param dims info on loops bounds.
4046
4047Initialize doacross loop information.
4048Expect compiler send us inclusive bounds,
4049e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
4050*/
4051void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
4052 const struct kmp_dim *dims) {
4053 __kmp_assert_valid_gtid(gtid);
4054 int j, idx;
4055 kmp_int64 last, trace_count;
4056 kmp_info_t *th = __kmp_threads[gtid];
4057 kmp_team_t *team = th->th.th_team;
4058 kmp_uint32 *flags;
4059 kmp_disp_t *pr_buf = th->th.th_dispatch;
4060 dispatch_shared_info_t *sh_buf;
4061
4062 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n"
, gtid, num_dims, !team->t.t_serialized); }
4063 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n"
, gtid, num_dims, !team->t.t_serialized); }
4064 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n"
, gtid, num_dims, !team->t.t_serialized); }
4065 gtid, num_dims, !team->t.t_serialized))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n"
, gtid, num_dims, !team->t.t_serialized); }
;
4066 KMP_DEBUG_ASSERT(dims != NULL)if (!(dims != __null)) { __kmp_debug_assert("dims != __null",
"openmp/runtime/src/kmp_csupport.cpp", 4066); }
;
4067 KMP_DEBUG_ASSERT(num_dims > 0)if (!(num_dims > 0)) { __kmp_debug_assert("num_dims > 0"
, "openmp/runtime/src/kmp_csupport.cpp", 4067); }
;
4068
4069 if (team->t.t_serialized) {
4070 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_init() exit: serialized team\n"
); }
;
4071 return; // no dependencies if team is serialized
4072 }
4073 KMP_DEBUG_ASSERT(team->t.t_nproc > 1)if (!(team->t.t_nproc > 1)) { __kmp_debug_assert("team->t.t_nproc > 1"
, "openmp/runtime/src/kmp_csupport.cpp", 4073); }
;
4074 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
4075 // the next loop
4076 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4077
4078 // Save bounds info into allocated private buffer
4079 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL)if (!(pr_buf->th_doacross_info == __null)) { __kmp_debug_assert
("pr_buf->th_doacross_info == __null", "openmp/runtime/src/kmp_csupport.cpp"
, 4079); }
;
4080 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(___kmp_thread_malloc((th), (sizeof(kmp_int64) * (4 * num_dims
+ 1)), "openmp/runtime/src/kmp_csupport.cpp", 4081)
4081 th, sizeof(kmp_int64) * (4 * num_dims + 1))___kmp_thread_malloc((th), (sizeof(kmp_int64) * (4 * num_dims
+ 1)), "openmp/runtime/src/kmp_csupport.cpp", 4081)
;
4082 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL)if (!(pr_buf->th_doacross_info != __null)) { __kmp_debug_assert
("pr_buf->th_doacross_info != __null", "openmp/runtime/src/kmp_csupport.cpp"
, 4082); }
;
4083 pr_buf->th_doacross_info[0] =
4084 (kmp_int64)num_dims; // first element is number of dimensions
4085 // Save also address of num_done in order to access it later without knowing
4086 // the buffer index
4087 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
4088 pr_buf->th_doacross_info[2] = dims[0].lo;
4089 pr_buf->th_doacross_info[3] = dims[0].up;
4090 pr_buf->th_doacross_info[4] = dims[0].st;
4091 last = 5;
4092 for (j = 1; j < num_dims; ++j) {
4093 kmp_int64
4094 range_length; // To keep ranges of all dimensions but the first dims[0]
4095 if (dims[j].st == 1) { // most common case
4096 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
4097 range_length = dims[j].up - dims[j].lo + 1;
4098 } else {
4099 if (dims[j].st > 0) {
4100 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo)if (!(dims[j].up > dims[j].lo)) { __kmp_debug_assert("dims[j].up > dims[j].lo"
, "openmp/runtime/src/kmp_csupport.cpp", 4100); }
;
4101 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
4102 } else { // negative increment
4103 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up)if (!(dims[j].lo > dims[j].up)) { __kmp_debug_assert("dims[j].lo > dims[j].up"
, "openmp/runtime/src/kmp_csupport.cpp", 4103); }
;
4104 range_length =
4105 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
4106 }
4107 }
4108 pr_buf->th_doacross_info[last++] = range_length;
4109 pr_buf->th_doacross_info[last++] = dims[j].lo;
4110 pr_buf->th_doacross_info[last++] = dims[j].up;
4111 pr_buf->th_doacross_info[last++] = dims[j].st;
4112 }
4113
4114 // Compute total trip count.
4115 // Start with range of dims[0] which we don't need to keep in the buffer.
4116 if (dims[0].st == 1) { // most common case
4117 trace_count = dims[0].up - dims[0].lo + 1;
4118 } else if (dims[0].st > 0) {
4119 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo)if (!(dims[0].up > dims[0].lo)) { __kmp_debug_assert("dims[0].up > dims[0].lo"
, "openmp/runtime/src/kmp_csupport.cpp", 4119); }
;
4120 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
4121 } else { // negative increment
4122 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up)if (!(dims[0].lo > dims[0].up)) { __kmp_debug_assert("dims[0].lo > dims[0].up"
, "openmp/runtime/src/kmp_csupport.cpp", 4122); }
;
4123 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
4124 }
4125 for (j = 1; j < num_dims; ++j) {
4126 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
4127 }
4128 KMP_DEBUG_ASSERT(trace_count > 0)if (!(trace_count > 0)) { __kmp_debug_assert("trace_count > 0"
, "openmp/runtime/src/kmp_csupport.cpp", 4128); }
;
4129
4130 // Check if shared buffer is not occupied by other loop (idx -
4131 // __kmp_dispatch_num_buffers)
4132 if (idx != sh_buf->doacross_buf_idx) {
4133 // Shared buffer is occupied, wait for it to be free
4134 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4135 __kmp_eq_4, NULL__null);
4136 }
4137#if KMP_32_BIT_ARCH(0 || KMP_ARCH_ARM || 0)
4138 // Check if we are the first thread. After the CAS the first thread gets 0,
4139 // others get 1 if initialization is in progress, allocated pointer otherwise.
4140 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
4141 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(__sync_val_compare_and_swap((volatile kmp_uint32 *)((volatile
kmp_int32 *)&sh_buf->doacross_flags), (kmp_uint32)(__null
), (kmp_uint32)(1))
4142 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1)__sync_val_compare_and_swap((volatile kmp_uint32 *)((volatile
kmp_int32 *)&sh_buf->doacross_flags), (kmp_uint32)(__null
), (kmp_uint32)(1))
;
4143#else
4144 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(__sync_val_compare_and_swap((volatile kmp_uint64 *)((volatile
kmp_int64 *)&sh_buf->doacross_flags), (kmp_uint64)(__null
), (kmp_uint64)(1LL))
4145 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL)__sync_val_compare_and_swap((volatile kmp_uint64 *)((volatile
kmp_int64 *)&sh_buf->doacross_flags), (kmp_uint64)(__null
), (kmp_uint64)(1LL))
;
4146#endif
4147 if (flags == NULL__null) {
4148 // we are the first thread, allocate the array of flags
4149 size_t size =
4150 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration
4151 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1)___kmp_thread_calloc((th), (size), (1), "openmp/runtime/src/kmp_csupport.cpp"
, 4151)
;
4152 KMP_MB();
4153 sh_buf->doacross_flags = flags;
4154 } else if (flags == (kmp_uint32 *)1) {
4155#if KMP_32_BIT_ARCH(0 || KMP_ARCH_ARM || 0)
4156 // initialization is still in progress, need to wait
4157 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4158#else
4159 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4160#endif
4161 KMP_YIELD(TRUE){ __kmp_x86_pause(); if (((!0)) && (((__kmp_use_yield
== 1) || (__kmp_use_yield == 2 && (((__kmp_nth) >
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))))))) __kmp_yield
(); }
;
4162 KMP_MB();
4163 } else {
4164 KMP_MB();
4165 }
4166 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1)if (!(sh_buf->doacross_flags > (kmp_uint32 *)1)) { __kmp_debug_assert
("sh_buf->doacross_flags > (kmp_uint32 *)1", "openmp/runtime/src/kmp_csupport.cpp"
, 4166); }
; // check ptr value
4167 pr_buf->th_doacross_flags =
4168 sh_buf->doacross_flags; // save private copy in order to not
4169 // touch shared buffer on each iteration
4170 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_init() exit: T#%d\n"
, gtid); }
;
4171}
4172
4173void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
4174 __kmp_assert_valid_gtid(gtid);
4175 kmp_int64 shft;
4176 size_t num_dims, i;
4177 kmp_uint32 flag;
4178 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4179 kmp_info_t *th = __kmp_threads[gtid];
4180 kmp_team_t *team = th->th.th_team;
4181 kmp_disp_t *pr_buf;
4182 kmp_int64 lo, up, st;
4183
4184 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() enter: called T#%d\n"
, gtid); }
;
4185 if (team->t.t_serialized) {
4186 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: serialized team\n"
); }
;
4187 return; // no dependencies if team is serialized
4188 }
4189
4190 // calculate sequential iteration number and check out-of-bounds condition
4191 pr_buf = th->th.th_dispatch;
4192 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL)if (!(pr_buf->th_doacross_info != __null)) { __kmp_debug_assert
("pr_buf->th_doacross_info != __null", "openmp/runtime/src/kmp_csupport.cpp"
, 4192); }
;
4193 num_dims = (size_t)pr_buf->th_doacross_info[0];
4194 lo = pr_buf->th_doacross_info[2];
4195 up = pr_buf->th_doacross_info[3];
4196 st = pr_buf->th_doacross_info[4];
4197#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
4198 ompt_dependence_t deps[num_dims];
4199#endif
4200 if (st == 1) { // most common case
4201 if (vec[0] < lo || vec[0] > up) {
4202 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[0], lo, up); }
4203 "bounds [%lld,%lld]\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[0], lo, up); }
4204 gtid, vec[0], lo, up))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[0], lo, up); }
;
4205 return;
4206 }
4207 iter_number = vec[0] - lo;
4208 } else if (st > 0) {
4209 if (vec[0] < lo || vec[0] > up) {
4210 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[0], lo, up); }
4211 "bounds [%lld,%lld]\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[0], lo, up); }
4212 gtid, vec[0], lo, up))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[0], lo, up); }
;
4213 return;
4214 }
4215 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4216 } else { // negative increment
4217 if (vec[0] > lo || vec[0] < up) {
4218 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[0], lo, up); }
4219 "bounds [%lld,%lld]\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[0], lo, up); }
4220 gtid, vec[0], lo, up))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[0], lo, up); }
;
4221 return;
4222 }
4223 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4224 }
4225#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
4226 deps[0].variable.value = iter_number;
4227 deps[0].dependence_type = ompt_dependence_type_sink;
4228#endif
4229 for (i = 1; i < num_dims; ++i) {
4230 kmp_int64 iter, ln;
4231 size_t j = i * 4;
4232 ln = pr_buf->th_doacross_info[j + 1];
4233 lo = pr_buf->th_doacross_info[j + 2];
4234 up = pr_buf->th_doacross_info[j + 3];
4235 st = pr_buf->th_doacross_info[j + 4];
4236 if (st == 1) {
4237 if (vec[i] < lo || vec[i] > up) {
4238 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[i], lo, up); }
4239 "bounds [%lld,%lld]\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[i], lo, up); }
4240 gtid, vec[i], lo, up))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[i], lo, up); }
;
4241 return;
4242 }
4243 iter = vec[i] - lo;
4244 } else if (st > 0) {
4245 if (vec[i] < lo || vec[i] > up) {
4246 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[i], lo, up); }
4247 "bounds [%lld,%lld]\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[i], lo, up); }
4248 gtid, vec[i], lo, up))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[i], lo, up); }
;
4249 return;
4250 }
4251 iter = (kmp_uint64)(vec[i] - lo) / st;
4252 } else { // st < 0
4253 if (vec[i] > lo || vec[i] < up) {
4254 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[i], lo, up); }
4255 "bounds [%lld,%lld]\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[i], lo, up); }
4256 gtid, vec[i], lo, up))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[i], lo, up); }
;
4257 return;
4258 }
4259 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4260 }
4261 iter_number = iter + ln * iter_number;
4262#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
4263 deps[i].variable.value = iter;
4264 deps[i].dependence_type = ompt_dependence_type_sink;
4265#endif
4266 }
4267 shft = iter_number % 32; // use 32-bit granularity
4268 iter_number >>= 5; // divided by 32
4269 flag = 1 << shft;
4270 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4271 KMP_YIELD(TRUE){ __kmp_x86_pause(); if (((!0)) && (((__kmp_use_yield
== 1) || (__kmp_use_yield == 2 && (((__kmp_nth) >
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))))))) __kmp_yield
(); }
;
4272 }
4273 KMP_MB();
4274#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
4275 if (ompt_enabled.ompt_callback_dependences) {
4276 ompt_callbacks.ompt_callback(ompt_callback_dependences)ompt_callback_dependences_callback(
4277 &(OMPT_CUR_TASK_INFO(th)(&(th->th.th_current_task->ompt_task_info))->task_data), deps, (kmp_uint32)num_dims);
4278 }
4279#endif
4280 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n"
, gtid, (iter_number << 5) + shft); }
4281 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n"
, gtid, (iter_number << 5) + shft); }
4282 gtid, (iter_number << 5) + shft))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n"
, gtid, (iter_number << 5) + shft); }
;
4283}
4284
4285void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4286 __kmp_assert_valid_gtid(gtid);
4287 kmp_int64 shft;
4288 size_t num_dims, i;
4289 kmp_uint32 flag;
4290 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4291 kmp_info_t *th = __kmp_threads[gtid];
4292 kmp_team_t *team = th->th.th_team;
4293 kmp_disp_t *pr_buf;
4294 kmp_int64 lo, st;
4295
4296 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_post() enter: called T#%d\n"
, gtid); }
;
4297 if (team->t.t_serialized) {
4298 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_post() exit: serialized team\n"
); }
;
4299 return; // no dependencies if team is serialized
4300 }
4301
4302 // calculate sequential iteration number (same as in "wait" but no
4303 // out-of-bounds checks)
4304 pr_buf = th->th.th_dispatch;
4305 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL)if (!(pr_buf->th_doacross_info != __null)) { __kmp_debug_assert
("pr_buf->th_doacross_info != __null", "openmp/runtime/src/kmp_csupport.cpp"
, 4305); }
;
4306 num_dims = (size_t)pr_buf->th_doacross_info[0];
4307 lo = pr_buf->th_doacross_info[2];
4308 st = pr_buf->th_doacross_info[4];
4309#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
4310 ompt_dependence_t deps[num_dims];
4311#endif
4312 if (st == 1) { // most common case
4313 iter_number = vec[0] - lo;
4314 } else if (st > 0) {
4315 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4316 } else { // negative increment
4317 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4318 }
4319#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
4320 deps[0].variable.value = iter_number;
4321 deps[0].dependence_type = ompt_dependence_type_source;
4322#endif
4323 for (i = 1; i < num_dims; ++i) {
4324 kmp_int64 iter, ln;
4325 size_t j = i * 4;
4326 ln = pr_buf->th_doacross_info[j + 1];
4327 lo = pr_buf->th_doacross_info[j + 2];
4328 st = pr_buf->th_doacross_info[j + 4];
4329 if (st == 1) {
4330 iter = vec[i] - lo;
4331 } else if (st > 0) {
4332 iter = (kmp_uint64)(vec[i] - lo) / st;
4333 } else { // st < 0
4334 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4335 }
4336 iter_number = iter + ln * iter_number;
4337#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
4338 deps[i].variable.value = iter;
4339 deps[i].dependence_type = ompt_dependence_type_source;
4340#endif
4341 }
4342#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
4343 if (ompt_enabled.ompt_callback_dependences) {
4344 ompt_callbacks.ompt_callback(ompt_callback_dependences)ompt_callback_dependences_callback(
4345 &(OMPT_CUR_TASK_INFO(th)(&(th->th.th_current_task->ompt_task_info))->task_data), deps, (kmp_uint32)num_dims);
4346 }
4347#endif
4348 shft = iter_number % 32; // use 32-bit granularity
4349 iter_number >>= 5; // divided by 32
4350 flag = 1 << shft;
4351 KMP_MB();
4352 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4353 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag)__sync_fetch_and_or((volatile kmp_uint32 *)(&pr_buf->th_doacross_flags
[iter_number]), (kmp_uint32)(flag))
;
4354 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n"
, gtid, (iter_number << 5) + shft); }
4355 (iter_number << 5) + shft))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n"
, gtid, (iter_number << 5) + shft); }
;
4356}
4357
4358void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4359 __kmp_assert_valid_gtid(gtid);
4360 kmp_int32 num_done;
4361 kmp_info_t *th = __kmp_threads[gtid];
4362 kmp_team_t *team = th->th.th_team;
4363 kmp_disp_t *pr_buf = th->th.th_dispatch;
4364
4365 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_fini() enter: called T#%d\n"
, gtid); }
;
4366 if (team->t.t_serialized) {
4367 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_fini() exit: serialized team %p\n"
, team); }
;
4368 return; // nothing to do
4369 }
4370 num_done =
4371 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1]))__sync_fetch_and_add((volatile kmp_int32 *)((kmp_uintptr_t)(pr_buf
->th_doacross_info[1])), 1)
+ 1;
4372 if (num_done == th->th.th_team_nproc) {
4373 // we are the last thread, need to free shared resources
4374 int idx = pr_buf->th_doacross_buf_idx - 1;
4375 dispatch_shared_info_t *sh_buf =
4376 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4377 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==if (!(pr_buf->th_doacross_info[1] == (kmp_int64)&sh_buf
->doacross_num_done)) { __kmp_debug_assert("pr_buf->th_doacross_info[1] == (kmp_int64)&sh_buf->doacross_num_done"
, "openmp/runtime/src/kmp_csupport.cpp", 4378); }
4378 (kmp_int64)&sh_buf->doacross_num_done)if (!(pr_buf->th_doacross_info[1] == (kmp_int64)&sh_buf
->doacross_num_done)) { __kmp_debug_assert("pr_buf->th_doacross_info[1] == (kmp_int64)&sh_buf->doacross_num_done"
, "openmp/runtime/src/kmp_csupport.cpp", 4378); }
;
4379 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done)if (!(num_done == sh_buf->doacross_num_done)) { __kmp_debug_assert
("num_done == sh_buf->doacross_num_done", "openmp/runtime/src/kmp_csupport.cpp"
, 4379); }
;
4380 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx)if (!(idx == sh_buf->doacross_buf_idx)) { __kmp_debug_assert
("idx == sh_buf->doacross_buf_idx", "openmp/runtime/src/kmp_csupport.cpp"
, 4380); }
;
4381 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags))___kmp_thread_free((th), (const_cast<kmp_uint32 *>(sh_buf
->doacross_flags)), "openmp/runtime/src/kmp_csupport.cpp",
4381)
;
4382 sh_buf->doacross_flags = NULL__null;
4383 sh_buf->doacross_num_done = 0;
4384 sh_buf->doacross_buf_idx +=
4385 __kmp_dispatch_num_buffers; // free buffer for future re-use
4386 }
4387 // free private resources (need to keep buffer index forever)
4388 pr_buf->th_doacross_flags = NULL__null;
4389 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info)___kmp_thread_free((th), ((void *)pr_buf->th_doacross_info
), "openmp/runtime/src/kmp_csupport.cpp", 4389)
;
4390 pr_buf->th_doacross_info = NULL__null;
4391 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_fini() exit: T#%d\n"
, gtid); }
;
4392}
4393
4394/* OpenMP 5.1 Memory Management routines */
4395void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4396 return __kmp_alloc(__kmp_entry_gtid()__kmp_get_global_thread_id_reg(), 0, size, allocator);
4397}
4398
4399void *omp_aligned_alloc(size_t align, size_t size,
4400 omp_allocator_handle_t allocator) {
4401 return __kmp_alloc(__kmp_entry_gtid()__kmp_get_global_thread_id_reg(), align, size, allocator);
4402}
4403
4404void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) {
4405 return __kmp_calloc(__kmp_entry_gtid()__kmp_get_global_thread_id_reg(), 0, nmemb, size, allocator);
4406}
4407
4408void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
4409 omp_allocator_handle_t allocator) {
4410 return __kmp_calloc(__kmp_entry_gtid()__kmp_get_global_thread_id_reg(), align, nmemb, size, allocator);
4411}
4412
4413void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
4414 omp_allocator_handle_t free_allocator) {
4415 return __kmp_realloc(__kmp_entry_gtid()__kmp_get_global_thread_id_reg(), ptr, size, allocator,
4416 free_allocator);
4417}
4418
4419void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4420 ___kmpc_free(__kmp_entry_gtid()__kmp_get_global_thread_id_reg(), ptr, allocator);
4421}
4422/* end of OpenMP 5.1 Memory Management routines */
4423
4424int __kmpc_get_target_offload(void) {
4425 if (!__kmp_init_serial) {
4426 __kmp_serial_initialize();
4427 }
4428 return __kmp_target_offload;
4429}
4430
4431int __kmpc_pause_resource(kmp_pause_status_t level) {
4432 if (!__kmp_init_serial) {
4433 return 1; // Can't pause if runtime is not initialized
4434 }
4435 return __kmp_pause_resource(level);
4436}
4437
4438void __kmpc_error(ident_t *loc, int severity, const char *message) {
4439 if (!__kmp_init_serial)
4440 __kmp_serial_initialize();
4441
4442 KMP_ASSERT(severity == severity_warning || severity == severity_fatal)if (!(severity == severity_warning || severity == severity_fatal
)) { __kmp_debug_assert("severity == severity_warning || severity == severity_fatal"
, "openmp/runtime/src/kmp_csupport.cpp", 4442); }
;
4443
4444#if OMPT_SUPPORT1
4445 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
4446 ompt_callbacks.ompt_callback(ompt_callback_error)ompt_callback_error_callback(
4447 (ompt_severity_t)severity, message, KMP_STRLENstrlen(message),
4448 OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
4449 }
4450#endif // OMPT_SUPPORT
4451
4452 char *src_loc;
4453 if (loc && loc->psource) {
4454 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
4455 src_loc =
4456 __kmp_str_format("%s:%d:%d", str_loc.file, str_loc.line, str_loc.col);
4457 __kmp_str_loc_free(&str_loc);
4458 } else {
4459 src_loc = __kmp_str_format("unknown");
4460 }
4461
4462 if (severity == severity_warning)
4463 KMP_WARNING(UserDirectedWarning, src_loc, message)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_UserDirectedWarning
, src_loc, message), __kmp_msg_null)
;
4464 else
4465 KMP_FATAL(UserDirectedError, src_loc, message)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_UserDirectedError, src_loc
, message), __kmp_msg_null)
;
4466
4467 __kmp_str_free(&src_loc);
4468}
4469
4470// Mark begin of scope directive.
4471void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4472// reserved is for extension of scope directive and not used.
4473#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
4474 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4475 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4476 int tid = __kmp_tid_from_gtid(gtid);
4477 ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback(
4478 ompt_work_scope, ompt_scope_begin,
4479 &(team->t.ompt_team_info.parallel_data),
4480 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4481 OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
4482 }
4483#endif // OMPT_SUPPORT && OMPT_OPTIONAL
4484}
4485
4486// Mark end of scope directive
4487void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4488// reserved is for extension of scope directive and not used.
4489#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
4490 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4491 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4492 int tid = __kmp_tid_from_gtid(gtid);
4493 ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback(
4494 ompt_work_scope, ompt_scope_end,
4495 &(team->t.ompt_team_info.parallel_data),
4496 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4497 OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
4498 }
4499#endif // OMPT_SUPPORT && OMPT_OPTIONAL
4500}
4501
4502#ifdef KMP_USE_VERSION_SYMBOLS
4503// For GOMP compatibility there are two versions of each omp_* API.
4504// One is the plain C symbol and one is the Fortran symbol with an appended
4505// underscore. When we implement a specific ompc_* version of an omp_*
4506// function, we want the plain GOMP versioned symbol to alias the ompc_* version
4507// instead of the Fortran versions in kmp_ftn_entry.h
4508extern "C" {
4509// Have to undef these from omp.h so they aren't translated into
4510// their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below
4511#ifdef omp_set_affinity_format
4512#undef omp_set_affinity_format
4513#endif
4514#ifdef omp_get_affinity_format
4515#undef omp_get_affinity_format
4516#endif
4517#ifdef omp_display_affinity
4518#undef omp_display_affinity
4519#endif
4520#ifdef omp_capture_affinity
4521#undef omp_capture_affinity
4522#endif
4523KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,__typeof__(__kmp_api_ompc_set_affinity_format) __kmp_api_ompc_set_affinity_format_50_alias
__attribute__((alias("__kmp_api_ompc_set_affinity_format")))
; __asm__(".symver " "__kmp_api_ompc_set_affinity_format" ","
"ompc_set_affinity_format" "@@" "VERSION" "\n\t"); __asm__( ".symver "
"__kmp_api_ompc_set_affinity_format_50_alias" "," "omp_set_affinity_format"
"@" "OMP_5.0" "\n\t")
4524 "OMP_5.0")__typeof__(__kmp_api_ompc_set_affinity_format) __kmp_api_ompc_set_affinity_format_50_alias
__attribute__((alias("__kmp_api_ompc_set_affinity_format")))
; __asm__(".symver " "__kmp_api_ompc_set_affinity_format" ","
"ompc_set_affinity_format" "@@" "VERSION" "\n\t"); __asm__( ".symver "
"__kmp_api_ompc_set_affinity_format_50_alias" "," "omp_set_affinity_format"
"@" "OMP_5.0" "\n\t")
;
4525KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,__typeof__(__kmp_api_ompc_get_affinity_format) __kmp_api_ompc_get_affinity_format_50_alias
__attribute__((alias("__kmp_api_ompc_get_affinity_format")))
; __asm__(".symver " "__kmp_api_ompc_get_affinity_format" ","
"ompc_get_affinity_format" "@@" "VERSION" "\n\t"); __asm__( ".symver "
"__kmp_api_ompc_get_affinity_format_50_alias" "," "omp_get_affinity_format"
"@" "OMP_5.0" "\n\t")
4526 "OMP_5.0")__typeof__(__kmp_api_ompc_get_affinity_format) __kmp_api_ompc_get_affinity_format_50_alias
__attribute__((alias("__kmp_api_ompc_get_affinity_format")))
; __asm__(".symver " "__kmp_api_ompc_get_affinity_format" ","
"ompc_get_affinity_format" "@@" "VERSION" "\n\t"); __asm__( ".symver "
"__kmp_api_ompc_get_affinity_format_50_alias" "," "omp_get_affinity_format"
"@" "OMP_5.0" "\n\t")
;
4527KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,__typeof__(__kmp_api_ompc_display_affinity) __kmp_api_ompc_display_affinity_50_alias
__attribute__((alias("__kmp_api_ompc_display_affinity"))); __asm__
(".symver " "__kmp_api_ompc_display_affinity" "," "ompc_display_affinity"
"@@" "VERSION" "\n\t"); __asm__( ".symver " "__kmp_api_ompc_display_affinity_50_alias"
"," "omp_display_affinity" "@" "OMP_5.0" "\n\t")
4528 "OMP_5.0")__typeof__(__kmp_api_ompc_display_affinity) __kmp_api_ompc_display_affinity_50_alias
__attribute__((alias("__kmp_api_ompc_display_affinity"))); __asm__
(".symver " "__kmp_api_ompc_display_affinity" "," "ompc_display_affinity"
"@@" "VERSION" "\n\t"); __asm__( ".symver " "__kmp_api_ompc_display_affinity_50_alias"
"," "omp_display_affinity" "@" "OMP_5.0" "\n\t")
;
4529KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,__typeof__(__kmp_api_ompc_capture_affinity) __kmp_api_ompc_capture_affinity_50_alias
__attribute__((alias("__kmp_api_ompc_capture_affinity"))); __asm__
(".symver " "__kmp_api_ompc_capture_affinity" "," "ompc_capture_affinity"
"@@" "VERSION" "\n\t"); __asm__( ".symver " "__kmp_api_ompc_capture_affinity_50_alias"
"," "omp_capture_affinity" "@" "OMP_5.0" "\n\t")
4530 "OMP_5.0")__typeof__(__kmp_api_ompc_capture_affinity) __kmp_api_ompc_capture_affinity_50_alias
__attribute__((alias("__kmp_api_ompc_capture_affinity"))); __asm__
(".symver " "__kmp_api_ompc_capture_affinity" "," "ompc_capture_affinity"
"@@" "VERSION" "\n\t"); __asm__( ".symver " "__kmp_api_ompc_capture_affinity_50_alias"
"," "omp_capture_affinity" "@" "OMP_5.0" "\n\t")
;
4531} // extern "C"
4532#endif