Bug Summary

File:projects/openmp/runtime/src/kmp_csupport.cpp
Warning:line 1611, column 29
Dereference of undefined pointer value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name kmp_csupport.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-8/lib/clang/8.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D omp_EXPORTS -I /build/llvm-toolchain-snapshot-8~svn345461/build-llvm/projects/openmp/runtime/src -I /build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src -I /build/llvm-toolchain-snapshot-8~svn345461/build-llvm/include -I /build/llvm-toolchain-snapshot-8~svn345461/include -I /build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/i18n -I /build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/include/50 -I /build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/thirdparty/ittnotify -U NDEBUG -D _GNU_SOURCE -D _REENTRANT -D _FORTIFY_SOURCE=2 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/include/clang/8.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-8/lib/clang/8.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -Wno-switch -Wno-missing-field-initializers -Wno-missing-braces -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-8~svn345461/build-llvm/projects/openmp/runtime/src -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -fno-rtti -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2018-10-27-211344-32123-1 -x c++ /build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp -faddrsig
1/*
2 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// The LLVM Compiler Infrastructure
8//
9// This file is dual licensed under the MIT and the University of Illinois Open
10// Source Licenses. See LICENSE.txt for details.
11//
12//===----------------------------------------------------------------------===//
13
14#define __KMP_IMP
15#include "omp.h" /* extern "C" declarations of user-visible routines */
16#include "kmp.h"
17#include "kmp_error.h"
18#include "kmp_i18n.h"
19#include "kmp_itt.h"
20#include "kmp_lock.h"
21#include "kmp_stats.h"
22
23#if OMPT_SUPPORT1
24#include "ompt-specific.h"
25#endif
26
27#define MAX_MESSAGE512 512
28
29// flags will be used in future, e.g. to implement openmp_strict library
30// restrictions
31
32/*!
33 * @ingroup STARTUP_SHUTDOWN
34 * @param loc in source location information
35 * @param flags in for future use (currently ignored)
36 *
37 * Initialize the runtime library. This call is optional; if it is not made then
38 * it will be implicitly called by attempts to use other library functions.
39 */
40void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
41 // By default __kmpc_begin() is no-op.
42 char *env;
43 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL__null &&
44 __kmp_str_match_true(env)) {
45 __kmp_middle_initialize();
46 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_begin: middle initialization called\n"
); }
;
47 } else if (__kmp_ignore_mppbeg() == FALSE0) {
48 // By default __kmp_ignore_mppbeg() returns TRUE.
49 __kmp_internal_begin();
50 KC_TRACE(10, ("__kmpc_begin: called\n"))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_begin: called\n"
); }
;
51 }
52}
53
54/*!
55 * @ingroup STARTUP_SHUTDOWN
56 * @param loc source location information
57 *
58 * Shutdown the runtime library. This is also optional, and even if called will
59 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to
60 * zero.
61 */
62void __kmpc_end(ident_t *loc) {
63 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
64 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
65 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
66 // returns FALSE and __kmpc_end() will unregister this root (it can cause
67 // library shut down).
68 if (__kmp_ignore_mppend() == FALSE0) {
69 KC_TRACE(10, ("__kmpc_end: called\n"))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_end: called\n"
); }
;
70 KA_TRACE(30, ("__kmpc_end\n"))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmpc_end\n"
); }
;
71
72 __kmp_internal_end_thread(-1);
73 }
74#if KMP_OS_WINDOWS0 && OMPT_SUPPORT1
75 // Normal exit process on Windows does not allow worker threads of the final
76 // parallel region to finish reporting their events, so shutting down the
77 // library here fixes the issue at least for the cases where __kmpc_end() is
78 // placed properly.
79 if (ompt_enabled.enabled)
80 __kmp_internal_end_library(__kmp_gtid_get_specific());
81#endif
82}
83
84/*!
85@ingroup THREAD_STATES
86@param loc Source location information.
87@return The global thread index of the active thread.
88
89This function can be called in any context.
90
91If the runtime has ony been entered at the outermost level from a
92single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is
93that which would be returned by omp_get_thread_num() in the outermost
94active parallel construct. (Or zero if there is no active parallel
95construct, since the master thread is necessarily thread zero).
96
97If multiple non-OpenMP threads all enter an OpenMP construct then this
98will be a unique thread identifier among all the threads created by
99the OpenMP runtime (but the value cannote be defined in terms of
100OpenMP thread ids returned by omp_get_thread_num()).
101*/
102kmp_int32 __kmpc_global_thread_num(ident_t *loc) {
103 kmp_int32 gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
104
105 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_global_thread_num: T#%d\n"
, gtid); }
;
106
107 return gtid;
108}
109
110/*!
111@ingroup THREAD_STATES
112@param loc Source location information.
113@return The number of threads under control of the OpenMP<sup>*</sup> runtime
114
115This function can be called in any context.
116It returns the total number of threads under the control of the OpenMP runtime.
117That is not a number that can be determined by any OpenMP standard calls, since
118the library may be called from more than one non-OpenMP thread, and this
119reflects the total over all such calls. Similarly the runtime maintains
120underlying threads even when they are not active (since the cost of creating
121and destroying OS threads is high), this call counts all such threads even if
122they are not waiting for work.
123*/
124kmp_int32 __kmpc_global_num_threads(ident_t *loc) {
125 KC_TRACE(10,if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_global_num_threads: num_threads = %d\n"
, __kmp_all_nth); }
126 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_global_num_threads: num_threads = %d\n"
, __kmp_all_nth); }
;
127
128 return TCR_4(__kmp_all_nth)(__kmp_all_nth);
129}
130
131/*!
132@ingroup THREAD_STATES
133@param loc Source location information.
134@return The thread number of the calling thread in the innermost active parallel
135construct.
136*/
137kmp_int32 __kmpc_bound_thread_num(ident_t *loc) {
138 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_bound_thread_num: called\n"
); }
;
139 return __kmp_tid_from_gtid(__kmp_entry_gtid()__kmp_get_global_thread_id_reg());
140}
141
142/*!
143@ingroup THREAD_STATES
144@param loc Source location information.
145@return The number of threads in the innermost active parallel construct.
146*/
147kmp_int32 __kmpc_bound_num_threads(ident_t *loc) {
148 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_bound_num_threads: called\n"
); }
;
149
150 return __kmp_entry_thread()->th.th_team->t.t_nproc;
151}
152
153/*!
154 * @ingroup DEPRECATED
155 * @param loc location description
156 *
157 * This function need not be called. It always returns TRUE.
158 */
159kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
160#ifndef KMP_DEBUG1
161
162 return TRUE(!0);
163
164#else
165
166 const char *semi2;
167 const char *semi3;
168 int line_no;
169
170 if (__kmp_par_range == 0) {
171 return TRUE(!0);
172 }
173 semi2 = loc->psource;
174 if (semi2 == NULL__null) {
175 return TRUE(!0);
176 }
177 semi2 = strchr(semi2, ';');
178 if (semi2 == NULL__null) {
179 return TRUE(!0);
180 }
181 semi2 = strchr(semi2 + 1, ';');
182 if (semi2 == NULL__null) {
183 return TRUE(!0);
184 }
185 if (__kmp_par_range_filename[0]) {
186 const char *name = semi2 - 1;
187 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
188 name--;
189 }
190 if ((*name == '/') || (*name == ';')) {
191 name++;
192 }
193 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
194 return __kmp_par_range < 0;
195 }
196 }
197 semi3 = strchr(semi2 + 1, ';');
198 if (__kmp_par_range_routine[0]) {
199 if ((semi3 != NULL__null) && (semi3 > semi2) &&
200 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
201 return __kmp_par_range < 0;
202 }
203 }
204 if (KMP_SSCANFsscanf(semi3 + 1, "%d", &line_no) == 1) {
205 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
206 return __kmp_par_range > 0;
207 }
208 return __kmp_par_range < 0;
209 }
210 return TRUE(!0);
211
212#endif /* KMP_DEBUG */
213}
214
215/*!
216@ingroup THREAD_STATES
217@param loc Source location information.
218@return 1 if this thread is executing inside an active parallel region, zero if
219not.
220*/
221kmp_int32 __kmpc_in_parallel(ident_t *loc) {
222 return __kmp_entry_thread()->th.th_root->r.r_active;
223}
224
225/*!
226@ingroup PARALLEL
227@param loc source location information
228@param global_tid global thread number
229@param num_threads number of threads requested for this parallel construct
230
231Set the number of threads to be used by the next fork spawned by this thread.
232This call is only required if the parallel construct has a `num_threads` clause.
233*/
234void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
235 kmp_int32 num_threads) {
236 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n"
, global_tid, num_threads); }
237 global_tid, num_threads))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n"
, global_tid, num_threads); }
;
238
239 __kmp_push_num_threads(loc, global_tid, num_threads);
240}
241
242void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
243 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_pop_num_threads: enter\n"
); }
;
244
245 /* the num_threads are automatically popped */
246}
247
248#if OMP_40_ENABLED(50 >= 40)
249
250void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
251 kmp_int32 proc_bind) {
252 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n"
, global_tid, proc_bind); }
253 proc_bind))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n"
, global_tid, proc_bind); }
;
254
255 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
256}
257
258#endif /* OMP_40_ENABLED */
259
260/*!
261@ingroup PARALLEL
262@param loc source location information
263@param argc total number of arguments in the ellipsis
264@param microtask pointer to callback routine consisting of outlined parallel
265construct
266@param ... pointers to shared variables that aren't global
267
268Do the actual fork and call the microtask in the relevant number of threads.
269*/
270void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
271 int gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
272
273#if (KMP_STATS_ENABLED0)
274 // If we were in a serial region, then stop the serial timer, record
275 // the event, and start parallel region timer
276 stats_state_e previous_state = KMP_GET_THREAD_STATE()((void)0);
277 if (previous_state == stats_state_e::SERIAL_REGION) {
278 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
279 } else {
280 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead)((void)0);
281 }
282 int inParallel = __kmpc_in_parallel(loc);
283 if (inParallel) {
284 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL)((void)0);
285 } else {
286 KMP_COUNT_BLOCK(OMP_PARALLEL)((void)0);
287 }
288#endif
289
290 // maybe to save thr_state is enough here
291 {
292 va_list ap;
293 va_start(ap, microtask)__builtin_va_start(ap, microtask);
294
295#if OMPT_SUPPORT1
296 omp_frame_t *ompt_frame;
297 if (ompt_enabled.enabled) {
298 kmp_info_t *master_th = __kmp_threads[gtid];
299 kmp_team_t *parent_team = master_th->th.th_team;
300 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
301 if (lwt)
302 ompt_frame = &(lwt->ompt_task_info.frame);
303 else {
304 int tid = __kmp_tid_from_gtid(gtid);
305 ompt_frame = &(
306 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
307 }
308 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1)__builtin_frame_address(1);
309 OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads
[gtid] && !__kmp_threads[gtid]->th.ompt_thread_info
.return_address) __kmp_threads[gtid]->th.ompt_thread_info.
return_address = __builtin_return_address(0)
;
310 }
311#endif
312
313#if INCLUDE_SSC_MARKS(1 && 1)
314 SSC_MARK_FORKING()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0xd693) : "%ebx")
;
315#endif
316 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
317 VOLATILE_CAST(microtask_t)(microtask_t) microtask, // "wrapped" task
318 VOLATILE_CAST(launch_t)(launch_t) __kmp_invoke_task_func,
319/* TODO: revert workaround for Intel(R) 64 tracker #96 */
320#if (KMP_ARCH_X86_641 || KMP_ARCH_ARM || KMP_ARCH_AARCH640) && KMP_OS_LINUX1
321 &ap
322#else
323 ap
324#endif
325 );
326#if INCLUDE_SSC_MARKS(1 && 1)
327 SSC_MARK_JOINING()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0xd694) : "%ebx")
;
328#endif
329 __kmp_join_call(loc, gtid
330#if OMPT_SUPPORT1
331 ,
332 fork_context_intel
333#endif
334 );
335
336 va_end(ap)__builtin_va_end(ap);
337 }
338
339#if KMP_STATS_ENABLED0
340 if (previous_state == stats_state_e::SERIAL_REGION) {
341 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
342 } else {
343 KMP_POP_PARTITIONED_TIMER()((void)0);
344 }
345#endif // KMP_STATS_ENABLED
346}
347
348#if OMP_40_ENABLED(50 >= 40)
349/*!
350@ingroup PARALLEL
351@param loc source location information
352@param global_tid global thread number
353@param num_teams number of teams requested for the teams construct
354@param num_threads number of threads per team requested for the teams construct
355
356Set the number of teams to be used by the teams construct.
357This call is only required if the teams construct has a `num_teams` clause
358or a `thread_limit` clause (or both).
359*/
360void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
361 kmp_int32 num_teams, kmp_int32 num_threads) {
362 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n"
, global_tid, num_teams, num_threads); }
363 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n"
, global_tid, num_teams, num_threads); }
364 global_tid, num_teams, num_threads))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n"
, global_tid, num_teams, num_threads); }
;
365
366 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
367}
368
369/*!
370@ingroup PARALLEL
371@param loc source location information
372@param argc total number of arguments in the ellipsis
373@param microtask pointer to callback routine consisting of outlined teams
374construct
375@param ... pointers to shared variables that aren't global
376
377Do the actual fork and call the microtask in the relevant number of threads.
378*/
379void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
380 ...) {
381 int gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
382 kmp_info_t *this_thr = __kmp_threads[gtid];
383 va_list ap;
384 va_start(ap, microtask)__builtin_va_start(ap, microtask);
385
386 KMP_COUNT_BLOCK(OMP_TEAMS)((void)0);
387
388 // remember teams entry point and nesting level
389 this_thr->th.th_teams_microtask = microtask;
390 this_thr->th.th_teams_level =
391 this_thr->th.th_team->t.t_level; // AC: can be >0 on host
392
393#if OMPT_SUPPORT1
394 kmp_team_t *parent_team = this_thr->th.th_team;
395 int tid = __kmp_tid_from_gtid(gtid);
396 if (ompt_enabled.enabled) {
397 parent_team->t.t_implicit_task_taskdata[tid]
398 .ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1)__builtin_frame_address(1);
399 }
400 OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads
[gtid] && !__kmp_threads[gtid]->th.ompt_thread_info
.return_address) __kmp_threads[gtid]->th.ompt_thread_info.
return_address = __builtin_return_address(0)
;
401#endif
402
403 // check if __kmpc_push_num_teams called, set default number of teams
404 // otherwise
405 if (this_thr->th.th_teams_size.nteams == 0) {
406 __kmp_push_num_teams(loc, gtid, 0, 0);
407 }
408 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1)if (!(this_thr->th.th_set_nproc >= 1)) { __kmp_debug_assert
("this_thr->th.th_set_nproc >= 1", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 408); }
;
409 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1)if (!(this_thr->th.th_teams_size.nteams >= 1)) { __kmp_debug_assert
("this_thr->th.th_teams_size.nteams >= 1", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 409); }
;
410 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1)if (!(this_thr->th.th_teams_size.nth >= 1)) { __kmp_debug_assert
("this_thr->th.th_teams_size.nth >= 1", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 410); }
;
411
412 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
413 VOLATILE_CAST(microtask_t)(microtask_t)
414 __kmp_teams_master, // "wrapped" task
415 VOLATILE_CAST(launch_t)(launch_t) __kmp_invoke_teams_master,
416#if (KMP_ARCH_X86_641 || KMP_ARCH_ARM || KMP_ARCH_AARCH640) && KMP_OS_LINUX1
417 &ap
418#else
419 ap
420#endif
421 );
422 __kmp_join_call(loc, gtid
423#if OMPT_SUPPORT1
424 ,
425 fork_context_intel
426#endif
427 );
428
429 this_thr->th.th_teams_microtask = NULL__null;
430 this_thr->th.th_teams_level = 0;
431 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
432 va_end(ap)__builtin_va_end(ap);
433}
434#endif /* OMP_40_ENABLED */
435
436// I don't think this function should ever have been exported.
437// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
438// openmp code ever called it, but it's been exported from the RTL for so
439// long that I'm afraid to remove the definition.
440int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
441
442/*!
443@ingroup PARALLEL
444@param loc source location information
445@param global_tid global thread number
446
447Enter a serialized parallel construct. This interface is used to handle a
448conditional parallel region, like this,
449@code
450#pragma omp parallel if (condition)
451@endcode
452when the condition is false.
453*/
454void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
455// The implementation is now in kmp_runtime.cpp so that it can share static
456// functions with kmp_fork_call since the tasks to be done are similar in
457// each case.
458#if OMPT_SUPPORT1
459 OMPT_STORE_RETURN_ADDRESS(global_tid)if (ompt_enabled.enabled && global_tid >= 0 &&
__kmp_threads[global_tid] && !__kmp_threads[global_tid
]->th.ompt_thread_info.return_address) __kmp_threads[global_tid
]->th.ompt_thread_info.return_address = __builtin_return_address
(0)
;
460#endif
461 __kmp_serialized_parallel(loc, global_tid);
462}
463
464/*!
465@ingroup PARALLEL
466@param loc source location information
467@param global_tid global thread number
468
469Leave a serialized parallel construct.
470*/
471void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
472 kmp_internal_control_t *top;
473 kmp_info_t *this_thr;
474 kmp_team_t *serial_team;
475
476 KC_TRACE(10,if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_end_serialized_parallel: called by T#%d\n"
, global_tid); }
477 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_end_serialized_parallel: called by T#%d\n"
, global_tid); }
;
478
479 /* skip all this code for autopar serialized loops since it results in
480 unacceptable overhead */
481 if (loc != NULL__null && (loc->flags & KMP_IDENT_AUTOPAR))
482 return;
483
484 // Not autopar code
485 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
486 __kmp_parallel_initialize();
487
488 this_thr = __kmp_threads[global_tid];
489 serial_team = this_thr->th.th_serial_team;
490
491#if OMP_45_ENABLED(50 >= 45)
492 kmp_task_team_t *task_team = this_thr->th.th_task_team;
493
494 // we need to wait for the proxy tasks before finishing the thread
495 if (task_team != NULL__null && task_team->tt.tt_found_proxy_tasks)
496 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL), __null);
497#endif
498
499 KMP_MB();
500 KMP_DEBUG_ASSERT(serial_team)if (!(serial_team)) { __kmp_debug_assert("serial_team", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 500); }
;
501 KMP_ASSERT(serial_team->t.t_serialized)if (!(serial_team->t.t_serialized)) { __kmp_debug_assert("serial_team->t.t_serialized"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 501); }
;
502 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team)if (!(this_thr->th.th_team == serial_team)) { __kmp_debug_assert
("this_thr->th.th_team == serial_team", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 502); }
;
503 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team)if (!(serial_team != this_thr->th.th_root->r.r_root_team
)) { __kmp_debug_assert("serial_team != this_thr->th.th_root->r.r_root_team"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 503); }
;
504 KMP_DEBUG_ASSERT(serial_team->t.t_threads)if (!(serial_team->t.t_threads)) { __kmp_debug_assert("serial_team->t.t_threads"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 504); }
;
505 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr)if (!(serial_team->t.t_threads[0] == this_thr)) { __kmp_debug_assert
("serial_team->t.t_threads[0] == this_thr", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 505); }
;
506
507#if OMPT_SUPPORT1
508 if (ompt_enabled.enabled &&
509 this_thr->th.ompt_thread_info.state != omp_state_overhead) {
510 OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info))->frame.exit_frame = NULL__null;
511 if (ompt_enabled.ompt_callback_implicit_task) {
512 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
513 ompt_scope_end, NULL__null, OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data
))
, 1,
514 OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info))->thread_num);
515 }
516
517 // reset clear the task id only after unlinking the task
518 ompt_data_t *parent_task_data;
519 __ompt_get_task_info_internal(1, NULL__null, &parent_task_data, NULL__null, NULL__null, NULL__null);
520
521 if (ompt_enabled.ompt_callback_parallel_end) {
522 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)ompt_callback_parallel_end_callback(
523 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
524 ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid)__ompt_load_return_address(global_tid));
525 }
526 __ompt_lw_taskteam_unlink(this_thr);
527 this_thr->th.ompt_thread_info.state = omp_state_overhead;
528 }
529#endif
530
531 /* If necessary, pop the internal control stack values and replace the team
532 * values */
533 top = serial_team->t.t_control_stack_top;
534 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
535 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
536 serial_team->t.t_control_stack_top = top->next;
537 __kmp_free(top)___kmp_free((top), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 537)
;
538 }
539
540 // if( serial_team -> t.t_serialized > 1 )
541 serial_team->t.t_level--;
542
543 /* pop dispatch buffers stack */
544 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer)if (!(serial_team->t.t_dispatch->th_disp_buffer)) { __kmp_debug_assert
("serial_team->t.t_dispatch->th_disp_buffer", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 544); }
;
545 {
546 dispatch_private_info_t *disp_buffer =
547 serial_team->t.t_dispatch->th_disp_buffer;
548 serial_team->t.t_dispatch->th_disp_buffer =
549 serial_team->t.t_dispatch->th_disp_buffer->next;
550 __kmp_free(disp_buffer)___kmp_free((disp_buffer), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 550)
;
551 }
552#if OMP_50_ENABLED(50 >= 50)
553 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
554#endif
555
556 --serial_team->t.t_serialized;
557 if (serial_team->t.t_serialized == 0) {
558
559/* return to the parallel section */
560
561#if KMP_ARCH_X860 || KMP_ARCH_X86_641
562 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
563 __kmp_clear_x87_fpu_status_word();
564 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
565 __kmp_load_mxcsr(&serial_team->t.t_mxcsr)_mm_setcsr(*(&serial_team->t.t_mxcsr));
566 }
567#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
568
569 this_thr->th.th_team = serial_team->t.t_parent;
570 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
571
572 /* restore values cached in the thread */
573 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
574 this_thr->th.th_team_master =
575 serial_team->t.t_parent->t.t_threads[0]; /* JPH */
576 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
577
578 /* TODO the below shouldn't need to be adjusted for serialized teams */
579 this_thr->th.th_dispatch =
580 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
581
582 __kmp_pop_current_task_from_thread(this_thr);
583
584 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0)if (!(this_thr->th.th_current_task->td_flags.executing ==
0)) { __kmp_debug_assert("this_thr->th.th_current_task->td_flags.executing == 0"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 584); }
;
585 this_thr->th.th_current_task->td_flags.executing = 1;
586
587 if (__kmp_tasking_mode != tskm_immediate_exec) {
588 // Copy the task team from the new child / old parent team to the thread.
589 this_thr->th.th_task_team =
590 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
591 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
"team %p\n", global_tid, this_thr->th.th_task_team, this_thr
->th.th_team); }
592 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
"team %p\n", global_tid, this_thr->th.th_task_team, this_thr
->th.th_team); }
593 "team %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
"team %p\n", global_tid, this_thr->th.th_task_team, this_thr
->th.th_team); }
594 global_tid, this_thr->th.th_task_team, this_thr->th.th_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
"team %p\n", global_tid, this_thr->th.th_task_team, this_thr
->th.th_team); }
;
595 }
596 } else {
597 if (__kmp_tasking_mode != tskm_immediate_exec) {
598 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
"depth of serial team %p to %d\n", global_tid, serial_team, serial_team
->t.t_serialized); }
599 "depth of serial team %p to %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
"depth of serial team %p to %d\n", global_tid, serial_team, serial_team
->t.t_serialized); }
600 global_tid, serial_team, serial_team->t.t_serialized))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
"depth of serial team %p to %d\n", global_tid, serial_team, serial_team
->t.t_serialized); }
;
601 }
602 }
603
604 if (__kmp_env_consistency_check)
605 __kmp_pop_parallel(global_tid, NULL__null);
606#if OMPT_SUPPORT1
607 if (ompt_enabled.enabled)
608 this_thr->th.ompt_thread_info.state =
609 ((this_thr->th.th_team_serialized) ? omp_state_work_serial
610 : omp_state_work_parallel);
611#endif
612}
613
614/*!
615@ingroup SYNCHRONIZATION
616@param loc source location information.
617
618Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
619depending on the memory ordering convention obeyed by the compiler
620even that may not be necessary).
621*/
622void __kmpc_flush(ident_t *loc) {
623 KC_TRACE(10, ("__kmpc_flush: called\n"))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_flush: called\n"
); }
;
624
625 /* need explicit __mf() here since use volatile instead in library */
626 KMP_MB(); /* Flush all pending memory write invalidates. */
627
628#if (KMP_ARCH_X860 || KMP_ARCH_X86_641)
629#if KMP_MIC0
630// fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
631// We shouldn't need it, though, since the ABI rules require that
632// * If the compiler generates NGO stores it also generates the fence
633// * If users hand-code NGO stores they should insert the fence
634// therefore no incomplete unordered stores should be visible.
635#else
636 // C74404
637 // This is to address non-temporal store instructions (sfence needed).
638 // The clflush instruction is addressed either (mfence needed).
639 // Probably the non-temporal load monvtdqa instruction should also be
640 // addressed.
641 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
642 if (!__kmp_cpuinfo.initialized) {
643 __kmp_query_cpuid(&__kmp_cpuinfo);
644 }
645 if (!__kmp_cpuinfo.sse2) {
646 // CPU cannot execute SSE2 instructions.
647 } else {
648#if KMP_COMPILER_ICC0
649 _mm_mfence();
650#elif KMP_COMPILER_MSVC0
651 MemoryBarrier();
652#else
653 __sync_synchronize();
654#endif // KMP_COMPILER_ICC
655 }
656#endif // KMP_MIC
657#elif (KMP_ARCH_ARM || KMP_ARCH_AARCH640 || KMP_ARCH_MIPS0 || KMP_ARCH_MIPS640)
658// Nothing to see here move along
659#elif KMP_ARCH_PPC64(0 || 0)
660// Nothing needed here (we have a real MB above).
661#if KMP_OS_CNK0
662 // The flushing thread needs to yield here; this prevents a
663 // busy-waiting thread from saturating the pipeline. flush is
664 // often used in loops like this:
665 // while (!flag) {
666 // #pragma omp flush(flag)
667 // }
668 // and adding the yield here is good for at least a 10x speedup
669 // when running >2 threads per core (on the NAS LU benchmark).
670 __kmp_yield(TRUE(!0));
671#endif
672#else
673#error Unknown or unsupported architecture
674#endif
675
676#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
677 if (ompt_enabled.ompt_callback_flush) {
678 ompt_callbacks.ompt_callback(ompt_callback_flush)ompt_callback_flush_callback(
679 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
680 }
681#endif
682}
683
684/* -------------------------------------------------------------------------- */
685/*!
686@ingroup SYNCHRONIZATION
687@param loc source location information
688@param global_tid thread id.
689
690Execute a barrier.
691*/
692void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
693 KMP_COUNT_BLOCK(OMP_BARRIER)((void)0);
694 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_barrier: called T#%d\n"
, global_tid); }
;
695
696 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
697 __kmp_parallel_initialize();
698
699 if (__kmp_env_consistency_check) {
700 if (loc == 0) {
701 KMP_WARNING(ConstructIdentInvalid)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_ConstructIdentInvalid
), __kmp_msg_null)
; // ??? What does it mean for the user?
702 }
703
704 __kmp_check_barrier(global_tid, ct_barrier, loc);
705 }
706
707#if OMPT_SUPPORT1
708 omp_frame_t *ompt_frame;
709 if (ompt_enabled.enabled) {
710 __ompt_get_task_info_internal(0, NULL__null, NULL__null, &ompt_frame, NULL__null, NULL__null);
711 if (ompt_frame->enter_frame == NULL__null)
712 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1)__builtin_frame_address(1);
713 OMPT_STORE_RETURN_ADDRESS(global_tid)if (ompt_enabled.enabled && global_tid >= 0 &&
__kmp_threads[global_tid] && !__kmp_threads[global_tid
]->th.ompt_thread_info.return_address) __kmp_threads[global_tid
]->th.ompt_thread_info.return_address = __builtin_return_address
(0)
;
714 }
715#endif
716 __kmp_threads[global_tid]->th.th_ident = loc;
717 // TODO: explicit barrier_wait_id:
718 // this function is called when 'barrier' directive is present or
719 // implicit barrier at the end of a worksharing construct.
720 // 1) better to add a per-thread barrier counter to a thread data structure
721 // 2) set to 0 when a new team is created
722 // 4) no sync is required
723
724 __kmp_barrier(bs_plain_barrier, global_tid, FALSE0, 0, NULL__null, NULL__null);
725#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
726 if (ompt_enabled.enabled) {
727 ompt_frame->enter_frame = NULL__null;
728 }
729#endif
730}
731
732/* The BARRIER for a MASTER section is always explicit */
733/*!
734@ingroup WORK_SHARING
735@param loc source location information.
736@param global_tid global thread number .
737@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
738*/
739kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
740 int status = 0;
741
742 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_master: called T#%d\n"
, global_tid); }
;
743
744 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
745 __kmp_parallel_initialize();
746
747 if (KMP_MASTER_GTID(global_tid)(__kmp_tid_from_gtid((global_tid)) == 0)) {
748 KMP_COUNT_BLOCK(OMP_MASTER)((void)0);
749 KMP_PUSH_PARTITIONED_TIMER(OMP_master)((void)0);
750 status = 1;
751 }
752
753#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
754 if (status) {
755 if (ompt_enabled.ompt_callback_master) {
756 kmp_info_t *this_thr = __kmp_threads[global_tid];
757 kmp_team_t *team = this_thr->th.th_team;
758
759 int tid = __kmp_tid_from_gtid(global_tid);
760 ompt_callbacks.ompt_callback(ompt_callback_master)ompt_callback_master_callback(
761 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
762 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
763 OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
764 }
765 }
766#endif
767
768 if (__kmp_env_consistency_check) {
769#if KMP_USE_DYNAMIC_LOCK1
770 if (status)
771 __kmp_push_sync(global_tid, ct_master, loc, NULL__null, 0);
772 else
773 __kmp_check_sync(global_tid, ct_master, loc, NULL__null, 0);
774#else
775 if (status)
776 __kmp_push_sync(global_tid, ct_master, loc, NULL__null);
777 else
778 __kmp_check_sync(global_tid, ct_master, loc, NULL__null);
779#endif
780 }
781
782 return status;
783}
784
785/*!
786@ingroup WORK_SHARING
787@param loc source location information.
788@param global_tid global thread number .
789
790Mark the end of a <tt>master</tt> region. This should only be called by the
791thread that executes the <tt>master</tt> region.
792*/
793void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
794 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_end_master: called T#%d\n"
, global_tid); }
;
795
796 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid))if (!((__kmp_tid_from_gtid((global_tid)) == 0))) { __kmp_debug_assert
("(__kmp_tid_from_gtid((global_tid)) == 0)", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 796); }
;
797 KMP_POP_PARTITIONED_TIMER()((void)0);
798
799#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
800 kmp_info_t *this_thr = __kmp_threads[global_tid];
801 kmp_team_t *team = this_thr->th.th_team;
802 if (ompt_enabled.ompt_callback_master) {
803 int tid = __kmp_tid_from_gtid(global_tid);
804 ompt_callbacks.ompt_callback(ompt_callback_master)ompt_callback_master_callback(
805 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
806 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
807 OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
808 }
809#endif
810
811 if (__kmp_env_consistency_check) {
812 if (global_tid < 0)
813 KMP_WARNING(ThreadIdentInvalid)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_ThreadIdentInvalid
), __kmp_msg_null)
;
814
815 if (KMP_MASTER_GTID(global_tid)(__kmp_tid_from_gtid((global_tid)) == 0))
816 __kmp_pop_sync(global_tid, ct_master, loc);
817 }
818}
819
820/*!
821@ingroup WORK_SHARING
822@param loc source location information.
823@param gtid global thread number.
824
825Start execution of an <tt>ordered</tt> construct.
826*/
827void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
828 int cid = 0;
829 kmp_info_t *th;
830 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 830); }
;
831
832 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_ordered: called T#%d\n"
, gtid); }
;
833
834 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
835 __kmp_parallel_initialize();
836
837#if USE_ITT_BUILD1
838 __kmp_itt_ordered_prep(gtid);
839// TODO: ordered_wait_id
840#endif /* USE_ITT_BUILD */
841
842 th = __kmp_threads[gtid];
843
844#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
845 kmp_team_t *team;
846 omp_wait_id_t lck;
847 void *codeptr_ra;
848 if (ompt_enabled.enabled) {
849 OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads
[gtid] && !__kmp_threads[gtid]->th.ompt_thread_info
.return_address) __kmp_threads[gtid]->th.ompt_thread_info.
return_address = __builtin_return_address(0)
;
850 team = __kmp_team_from_gtid(gtid);
851 lck = (omp_wait_id_t)&team->t.t_ordered.dt.t_value;
852 /* OMPT state update */
853 th->th.ompt_thread_info.wait_id = lck;
854 th->th.ompt_thread_info.state = omp_state_wait_ordered;
855
856 /* OMPT event callback */
857 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
858 if (ompt_enabled.ompt_callback_mutex_acquire) {
859 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
860 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin,
861 (omp_wait_id_t)lck, codeptr_ra);
862 }
863 }
864#endif
865
866 if (th->th.th_dispatch->th_deo_fcn != 0)
867 (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
868 else
869 __kmp_parallel_deo(&gtid, &cid, loc);
870
871#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
872 if (ompt_enabled.enabled) {
873 /* OMPT state update */
874 th->th.ompt_thread_info.state = omp_state_work_parallel;
875 th->th.ompt_thread_info.wait_id = 0;
876
877 /* OMPT event callback */
878 if (ompt_enabled.ompt_callback_mutex_acquired) {
879 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
880 ompt_mutex_ordered, (omp_wait_id_t)lck, codeptr_ra);
881 }
882 }
883#endif
884
885#if USE_ITT_BUILD1
886 __kmp_itt_ordered_start(gtid);
887#endif /* USE_ITT_BUILD */
888}
889
890/*!
891@ingroup WORK_SHARING
892@param loc source location information.
893@param gtid global thread number.
894
895End execution of an <tt>ordered</tt> construct.
896*/
897void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
898 int cid = 0;
899 kmp_info_t *th;
900
901 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_end_ordered: called T#%d\n"
, gtid); }
;
902
903#if USE_ITT_BUILD1
904 __kmp_itt_ordered_end(gtid);
905// TODO: ordered_wait_id
906#endif /* USE_ITT_BUILD */
907
908 th = __kmp_threads[gtid];
909
910 if (th->th.th_dispatch->th_dxo_fcn != 0)
911 (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
912 else
913 __kmp_parallel_dxo(&gtid, &cid, loc);
914
915#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
916 OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads
[gtid] && !__kmp_threads[gtid]->th.ompt_thread_info
.return_address) __kmp_threads[gtid]->th.ompt_thread_info.
return_address = __builtin_return_address(0)
;
917 if (ompt_enabled.ompt_callback_mutex_released) {
918 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)ompt_callback_mutex_released_callback(
919 ompt_mutex_ordered,
920 (omp_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value,
921 OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid));
922 }
923#endif
924}
925
926#if KMP_USE_DYNAMIC_LOCK1
927
928static __forceinline__inline void
929__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
930 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
931 // Pointer to the allocated indirect lock is written to crit, while indexing
932 // is ignored.
933 void *idx;
934 kmp_indirect_lock_t **lck;
935 lck = (kmp_indirect_lock_t **)crit;
936 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
937 KMP_I_LOCK_FUNC(ilk, init)__kmp_indirect_init[((kmp_indirect_lock_t *)(ilk))->type](ilk->lock);
938 KMP_SET_I_LOCK_LOCATION(ilk, loc){ if (__kmp_indirect_set_location[(ilk)->type] != __null) __kmp_indirect_set_location
[(ilk)->type]((ilk)->lock, loc); }
;
939 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section){ if (__kmp_indirect_set_flags[(ilk)->type] != __null) __kmp_indirect_set_flags
[(ilk)->type]((ilk)->lock, 1); }
;
940 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n"
, tag); }
941 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n"
, tag); }
;
942#if USE_ITT_BUILD1
943 __kmp_itt_critical_creating(ilk->lock, loc);
944#endif
945 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk)__sync_bool_compare_and_swap((void *volatile *)(lck), (void *
)(nullptr), (void *)(ilk))
;
946 if (status == 0) {
947#if USE_ITT_BUILD1
948 __kmp_itt_critical_destroyed(ilk->lock);
949#endif
950 // We don't really need to destroy the unclaimed lock here since it will be
951 // cleaned up at program exit.
952 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
953 }
954 KMP_DEBUG_ASSERT(*lck != NULL)if (!(*lck != __null)) { __kmp_debug_assert("*lck != __null",
"/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 954); }
;
955}
956
957// Fast-path acquire tas lock
958#define KMP_ACQUIRE_TAS_LOCK(lock, gtid){ kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; kmp_int32 tas_free
= (locktag_tas); kmp_int32 tas_busy = ((gtid + 1) << 8
| locktag_tas); if ((&l->lk.poll)->load(std::memory_order_relaxed
) != tas_free || !__kmp_atomic_compare_store_acq(&l->lk
.poll, tas_free, tas_busy)) { kmp_uint32 spins; (!__kmp_itt_fsync_prepare_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)(l)); {
(spins) = __kmp_yield_init; }; if ((__kmp_nth) > (__kmp_avail_proc
? __kmp_avail_proc : __kmp_xproc)) { { __kmp_x86_pause(); __kmp_yield
(((!0))); }; } else { { __kmp_x86_pause(); (spins) -= 2; if (
!(spins)) { __kmp_yield(1); (spins) = __kmp_yield_next; } }; }
kmp_backoff_t backoff = __kmp_spin_backoff_params; while ( (
&l->lk.poll)->load(std::memory_order_relaxed) != tas_free
|| !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free
, tas_busy)) { __kmp_spin_backoff(&backoff); if ((__kmp_nth
) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
{ __kmp_x86_pause(); __kmp_yield(((!0))); }; } else { { __kmp_x86_pause
(); (spins) -= 2; if (!(spins)) { __kmp_yield(1); (spins) = __kmp_yield_next
; } }; } } } (!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 :
__kmp_itt_fsync_acquired_ptr__3_0((void *)(l)); }
\
959 { \
960 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
961 kmp_int32 tas_free = KMP_LOCK_FREE(tas)(locktag_tas); \
962 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas)((gtid + 1) << 8 | locktag_tas); \
963 if (KMP_ATOMIC_LD_RLX(&l->lk.poll)(&l->lk.poll)->load(std::memory_order_relaxed) != tas_free || \
964 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
965 kmp_uint32 spins; \
966 KMP_FSYNC_PREPARE(l)(!__kmp_itt_fsync_prepare_ptr__3_0) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0
((void *)(l))
; \
967 KMP_INIT_YIELD(spins){ (spins) = __kmp_yield_init; }; \
968 if (TCR_4(__kmp_nth)(__kmp_nth) > \
969 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
970 KMP_YIELD(TRUE){ __kmp_x86_pause(); __kmp_yield(((!0))); }; \
971 } else { \
972 KMP_YIELD_SPIN(spins){ __kmp_x86_pause(); (spins) -= 2; if (!(spins)) { __kmp_yield
(1); (spins) = __kmp_yield_next; } }
; \
973 } \
974 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
975 while ( \
976 KMP_ATOMIC_LD_RLX(&l->lk.poll)(&l->lk.poll)->load(std::memory_order_relaxed) != tas_free || \
977 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
978 __kmp_spin_backoff(&backoff); \
979 if (TCR_4(__kmp_nth)(__kmp_nth) > \
980 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
981 KMP_YIELD(TRUE){ __kmp_x86_pause(); __kmp_yield(((!0))); }; \
982 } else { \
983 KMP_YIELD_SPIN(spins){ __kmp_x86_pause(); (spins) -= 2; if (!(spins)) { __kmp_yield
(1); (spins) = __kmp_yield_next; } }
; \
984 } \
985 } \
986 } \
987 KMP_FSYNC_ACQUIRED(l)(!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)(l))
; \
988 }
989
990// Fast-path test tas lock
991#define KMP_TEST_TAS_LOCK(lock, gtid, rc){ kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; kmp_int32 tas_free
= (locktag_tas); kmp_int32 tas_busy = ((gtid + 1) << 8
| locktag_tas); rc = (&l->lk.poll)->load(std::memory_order_relaxed
) == tas_free && __kmp_atomic_compare_store_acq(&
l->lk.poll, tas_free, tas_busy); }
\
992 { \
993 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
994 kmp_int32 tas_free = KMP_LOCK_FREE(tas)(locktag_tas); \
995 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas)((gtid + 1) << 8 | locktag_tas); \
996 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll)(&l->lk.poll)->load(std::memory_order_relaxed) == tas_free && \
997 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
998 }
999
1000// Fast-path release tas lock
1001#define KMP_RELEASE_TAS_LOCK(lock, gtid){ (&((kmp_tas_lock_t *)lock)->lk.poll)->store((locktag_tas
), std::memory_order_release); }
\
1002 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas))(&((kmp_tas_lock_t *)lock)->lk.poll)->store((locktag_tas
), std::memory_order_release)
; }
1003
1004#if KMP_USE_FUTEX(1 && !0 && (0 || 1 || KMP_ARCH_ARM || 0))
1005
1006#include <sys/syscall.h>
1007#include <unistd.h>
1008#ifndef FUTEX_WAIT0
1009#define FUTEX_WAIT0 0
1010#endif
1011#ifndef FUTEX_WAKE1
1012#define FUTEX_WAKE1 1
1013#endif
1014
1015// Fast-path acquire futex lock
1016#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid){ kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; kmp_int32
gtid_code = (gtid + 1) << 1; ; (!__kmp_itt_fsync_prepare_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)(ftx))
; kmp_int32 poll_val; while ((poll_val = __sync_val_compare_and_swap
((volatile kmp_uint32 *)(&(ftx->lk.poll)), (kmp_uint32
)((locktag_futex)), (kmp_uint32)(((gtid_code) << 8 | locktag_futex
)))) != (locktag_futex)) { kmp_int32 cond = ((poll_val) >>
8) & 1; if (!cond) { if (!__sync_val_compare_and_swap((volatile
kmp_uint32 *)(&(ftx->lk.poll)), (kmp_uint32)(poll_val
), (kmp_uint32)(poll_val | ((1) << 8 | locktag_futex)))
) { continue; } poll_val |= ((1) << 8 | locktag_futex);
} kmp_int32 rc; if ((rc = syscall(202, &(ftx->lk.poll
), 0, poll_val, __null, __null, 0)) != 0) { continue; } gtid_code
|= 1; } (!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)(ftx)); }
\
1017 { \
1018 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1019 kmp_int32 gtid_code = (gtid + 1) << 1; \
1020 KMP_MB(); \
1021 KMP_FSYNC_PREPARE(ftx)(!__kmp_itt_fsync_prepare_ptr__3_0) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0
((void *)(ftx))
; \
1022 kmp_int32 poll_val; \
1023 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \__sync_val_compare_and_swap((volatile kmp_uint32 *)(&(ftx
->lk.poll)), (kmp_uint32)((locktag_futex)), (kmp_uint32)((
(gtid_code) << 8 | locktag_futex)))
1024 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \__sync_val_compare_and_swap((volatile kmp_uint32 *)(&(ftx
->lk.poll)), (kmp_uint32)((locktag_futex)), (kmp_uint32)((
(gtid_code) << 8 | locktag_futex)))
1025 KMP_LOCK_BUSY(gtid_code, futex))__sync_val_compare_and_swap((volatile kmp_uint32 *)(&(ftx
->lk.poll)), (kmp_uint32)((locktag_futex)), (kmp_uint32)((
(gtid_code) << 8 | locktag_futex)))
) != KMP_LOCK_FREE(futex)(locktag_futex)) { \
1026 kmp_int32 cond = KMP_LOCK_STRIP(poll_val)((poll_val) >> 8) & 1; \
1027 if (!cond) { \
1028 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \__sync_val_compare_and_swap((volatile kmp_uint32 *)(&(ftx
->lk.poll)), (kmp_uint32)(poll_val), (kmp_uint32)(poll_val
| ((1) << 8 | locktag_futex)))
1029 poll_val | \__sync_val_compare_and_swap((volatile kmp_uint32 *)(&(ftx
->lk.poll)), (kmp_uint32)(poll_val), (kmp_uint32)(poll_val
| ((1) << 8 | locktag_futex)))
1030 KMP_LOCK_BUSY(1, futex))__sync_val_compare_and_swap((volatile kmp_uint32 *)(&(ftx
->lk.poll)), (kmp_uint32)(poll_val), (kmp_uint32)(poll_val
| ((1) << 8 | locktag_futex)))
) { \
1031 continue; \
1032 } \
1033 poll_val |= KMP_LOCK_BUSY(1, futex)((1) << 8 | locktag_futex); \
1034 } \
1035 kmp_int32 rc; \
1036 if ((rc = syscall(__NR_futex202, &(ftx->lk.poll), FUTEX_WAIT0, poll_val, \
1037 NULL__null, NULL__null, 0)) != 0) { \
1038 continue; \
1039 } \
1040 gtid_code |= 1; \
1041 } \
1042 KMP_FSYNC_ACQUIRED(ftx)(!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)(ftx))
; \
1043 }
1044
1045// Fast-path test futex lock
1046#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc){ kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; if (__sync_bool_compare_and_swap
((volatile kmp_uint32 *)(&(ftx->lk.poll)), (kmp_uint32
)((locktag_futex)), (kmp_uint32)(((gtid + 1 << 1) <<
8 | locktag_futex)))) { (!__kmp_itt_fsync_acquired_ptr__3_0)
? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0((void *)(ftx))
; rc = (!0); } else { rc = 0; } }
\
1047 { \
1048 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1049 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(ftx
->lk.poll)), (kmp_uint32)((locktag_futex)), (kmp_uint32)((
(gtid + 1 << 1) << 8 | locktag_futex)))
1050 KMP_LOCK_BUSY(gtid + 1 << 1, futex))__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(ftx
->lk.poll)), (kmp_uint32)((locktag_futex)), (kmp_uint32)((
(gtid + 1 << 1) << 8 | locktag_futex)))
) { \
1051 KMP_FSYNC_ACQUIRED(ftx)(!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)(ftx))
; \
1052 rc = TRUE(!0); \
1053 } else { \
1054 rc = FALSE0; \
1055 } \
1056 }
1057
1058// Fast-path release futex lock
1059#define KMP_RELEASE_FUTEX_LOCK(lock, gtid){ kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; ; (!__kmp_itt_fsync_releasing_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_releasing_ptr__3_0((void *)(ftx
)); kmp_int32 poll_val = __sync_lock_test_and_set((volatile kmp_uint32
*)(&(ftx->lk.poll)), (kmp_uint32)((locktag_futex))); if
(((poll_val) >> 8) & 1) { syscall(202, &(ftx->
lk.poll), 1, ((1) << 8 | locktag_futex), __null, __null
, 0); } ; { __kmp_x86_pause(); __kmp_yield(((__kmp_nth) > (
__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))); }; }
\
1060 { \
1061 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1062 KMP_MB(); \
1063 KMP_FSYNC_RELEASING(ftx)(!__kmp_itt_fsync_releasing_ptr__3_0) ? (void)0 : __kmp_itt_fsync_releasing_ptr__3_0
((void *)(ftx))
; \
1064 kmp_int32 poll_val = \
1065 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex))__sync_lock_test_and_set((volatile kmp_uint32 *)(&(ftx->
lk.poll)), (kmp_uint32)((locktag_futex)))
; \
1066 if (KMP_LOCK_STRIP(poll_val)((poll_val) >> 8) & 1) { \
1067 syscall(__NR_futex202, &(ftx->lk.poll), FUTEX_WAKE1, \
1068 KMP_LOCK_BUSY(1, futex)((1) << 8 | locktag_futex), NULL__null, NULL__null, 0); \
1069 } \
1070 KMP_MB(); \
1071 KMP_YIELD(TCR_4(__kmp_nth) > \{ __kmp_x86_pause(); __kmp_yield(((__kmp_nth) > (__kmp_avail_proc
? __kmp_avail_proc : __kmp_xproc))); }
1072 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)){ __kmp_x86_pause(); __kmp_yield(((__kmp_nth) > (__kmp_avail_proc
? __kmp_avail_proc : __kmp_xproc))); }
; \
1073 }
1074
1075#endif // KMP_USE_FUTEX
1076
1077#else // KMP_USE_DYNAMIC_LOCK
1078
1079static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1080 ident_t const *loc,
1081 kmp_int32 gtid) {
1082 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1083
1084 // Because of the double-check, the following load doesn't need to be volatile
1085 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp)((void *)(*lck_pp));
1086
1087 if (lck == NULL__null) {
1088 void *idx;
1089
1090 // Allocate & initialize the lock.
1091 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1092 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section1);
1093 __kmp_init_user_lock_with_checks(lck);
1094 __kmp_set_user_lock_location(lck, loc);
1095#if USE_ITT_BUILD1
1096 __kmp_itt_critical_creating(lck);
1097// __kmp_itt_critical_creating() should be called *before* the first usage
1098// of underlying lock. It is the only place where we can guarantee it. There
1099// are chances the lock will destroyed with no usage, but it is not a
1100// problem, because this is not real event seen by user but rather setting
1101// name for object (lock). See more details in kmp_itt.h.
1102#endif /* USE_ITT_BUILD */
1103
1104 // Use a cmpxchg instruction to slam the start of the critical section with
1105 // the lock pointer. If another thread beat us to it, deallocate the lock,
1106 // and use the lock that the other thread allocated.
1107 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck)__sync_bool_compare_and_swap((void *volatile *)(lck_pp), (void
*)(0), (void *)(lck))
;
1108
1109 if (status == 0) {
1110// Deallocate the lock and reload the value.
1111#if USE_ITT_BUILD1
1112 __kmp_itt_critical_destroyed(lck);
1113// Let ITT know the lock is destroyed and the same memory location may be reused
1114// for another purpose.
1115#endif /* USE_ITT_BUILD */
1116 __kmp_destroy_user_lock_with_checks(lck);
1117 __kmp_user_lock_free(&idx, gtid, lck);
1118 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp)((void *)(*lck_pp));
1119 KMP_DEBUG_ASSERT(lck != NULL)if (!(lck != __null)) { __kmp_debug_assert("lck != __null", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 1119); }
;
1120 }
1121 }
1122 return lck;
1123}
1124
1125#endif // KMP_USE_DYNAMIC_LOCK
1126
1127/*!
1128@ingroup WORK_SHARING
1129@param loc source location information.
1130@param global_tid global thread number .
1131@param crit identity of the critical section. This could be a pointer to a lock
1132associated with the critical section, or some other suitably unique value.
1133
1134Enter code protected by a `critical` construct.
1135This function blocks until the executing thread can enter the critical section.
1136*/
1137void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1138 kmp_critical_name *crit) {
1139#if KMP_USE_DYNAMIC_LOCK1
1140#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1141 OMPT_STORE_RETURN_ADDRESS(global_tid)if (ompt_enabled.enabled && global_tid >= 0 &&
__kmp_threads[global_tid] && !__kmp_threads[global_tid
]->th.ompt_thread_info.return_address) __kmp_threads[global_tid
]->th.ompt_thread_info.return_address = __builtin_return_address
(0)
;
1142#endif // OMPT_SUPPORT
1143 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1144#else
1145 KMP_COUNT_BLOCK(OMP_CRITICAL)((void)0);
1146#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1147 omp_state_t prev_state = omp_state_undefined;
1148 ompt_thread_info_t ti;
1149#endif
1150 kmp_user_lock_p lck;
1151
1152 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_critical: called T#%d\n"
, global_tid); }
;
1153
1154 // TODO: add THR_OVHD_STATE
1155
1156 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait)((void)0);
1157 KMP_CHECK_USER_LOCK_INIT();
1158
1159 if ((__kmp_user_lock_kind == lk_tas) &&
1160 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZEsizeof(void *))) {
1161 lck = (kmp_user_lock_p)crit;
1162 }
1163#if KMP_USE_FUTEX(1 && !0 && (0 || 1 || KMP_ARCH_ARM || 0))
1164 else if ((__kmp_user_lock_kind == lk_futex) &&
1165 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZEsizeof(void *))) {
1166 lck = (kmp_user_lock_p)crit;
1167 }
1168#endif
1169 else { // ticket, queuing or drdpa
1170 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1171 }
1172
1173 if (__kmp_env_consistency_check)
1174 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1175
1176// since the critical directive binds to all threads, not just the current
1177// team we have to check this even if we are in a serialized team.
1178// also, even if we are the uber thread, we still have to conduct the lock,
1179// as we have to contend with sibling threads.
1180
1181#if USE_ITT_BUILD1
1182 __kmp_itt_critical_acquiring(lck);
1183#endif /* USE_ITT_BUILD */
1184#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1185 OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads
[gtid] && !__kmp_threads[gtid]->th.ompt_thread_info
.return_address) __kmp_threads[gtid]->th.ompt_thread_info.
return_address = __builtin_return_address(0)
;
1186 void *codeptr_ra = NULL__null;
1187 if (ompt_enabled.enabled) {
1188 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1189 /* OMPT state update */
1190 prev_state = ti.state;
1191 ti.wait_id = (omp_wait_id_t)lck;
1192 ti.state = omp_state_wait_critical;
1193
1194 /* OMPT event callback */
1195 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
1196 if (ompt_enabled.ompt_callback_mutex_acquire) {
1197 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
1198 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1199 (omp_wait_id_t)crit, codeptr_ra);
1200 }
1201 }
1202#endif
1203 // Value of 'crit' should be good for using as a critical_id of the critical
1204 // section directive.
1205 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1206
1207#if USE_ITT_BUILD1
1208 __kmp_itt_critical_acquired(lck);
1209#endif /* USE_ITT_BUILD */
1210#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1211 if (ompt_enabled.enabled) {
1212 /* OMPT state update */
1213 ti.state = prev_state;
1214 ti.wait_id = 0;
1215
1216 /* OMPT event callback */
1217 if (ompt_enabled.ompt_callback_mutex_acquired) {
1218 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
1219 ompt_mutex_critical, (omp_wait_id_t)crit, codeptr_ra);
1220 }
1221 }
1222#endif
1223 KMP_POP_PARTITIONED_TIMER()((void)0);
1224
1225 KMP_PUSH_PARTITIONED_TIMER(OMP_critical)((void)0);
1226 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmpc_critical: done T#%d\n"
, global_tid); }
;
1227#endif // KMP_USE_DYNAMIC_LOCK
1228}
1229
1230#if KMP_USE_DYNAMIC_LOCK1
1231
1232// Converts the given hint to an internal lock implementation
1233static __forceinline__inline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1234#if KMP_USE_TSX(0 || 1) && !0
1235#define KMP_TSX_LOCK(seq)lockseq_seq lockseq_##seq
1236#else
1237#define KMP_TSX_LOCK(seq)lockseq_seq __kmp_user_lock_seq
1238#endif
1239
1240#if KMP_ARCH_X860 || KMP_ARCH_X86_641
1241#define KMP_CPUINFO_RTM(__kmp_cpuinfo.rtm) (__kmp_cpuinfo.rtm)
1242#else
1243#define KMP_CPUINFO_RTM(__kmp_cpuinfo.rtm) 0
1244#endif
1245
1246 // Hints that do not require further logic
1247 if (hint & kmp_lock_hint_hle)
1248 return KMP_TSX_LOCK(hle)lockseq_hle;
1249 if (hint & kmp_lock_hint_rtm)
1250 return KMP_CPUINFO_RTM(__kmp_cpuinfo.rtm) ? KMP_TSX_LOCK(rtm)lockseq_rtm : __kmp_user_lock_seq;
1251 if (hint & kmp_lock_hint_adaptive)
1252 return KMP_CPUINFO_RTM(__kmp_cpuinfo.rtm) ? KMP_TSX_LOCK(adaptive)lockseq_adaptive : __kmp_user_lock_seq;
1253
1254 // Rule out conflicting hints first by returning the default lock
1255 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1256 return __kmp_user_lock_seq;
1257 if ((hint & omp_lock_hint_speculative) &&
1258 (hint & omp_lock_hint_nonspeculative))
1259 return __kmp_user_lock_seq;
1260
1261 // Do not even consider speculation when it appears to be contended
1262 if (hint & omp_lock_hint_contended)
1263 return lockseq_queuing;
1264
1265 // Uncontended lock without speculation
1266 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1267 return lockseq_tas;
1268
1269 // HLE lock for speculation
1270 if (hint & omp_lock_hint_speculative)
1271 return KMP_TSX_LOCK(hle)lockseq_hle;
1272
1273 return __kmp_user_lock_seq;
1274}
1275
1276#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1277#if KMP_USE_DYNAMIC_LOCK1
1278static kmp_mutex_impl_t
1279__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1280 if (user_lock) {
1281 switch (KMP_EXTRACT_D_TAG(user_lock)(*((kmp_dyna_lock_t *)(user_lock)) & ((1 << 8) - 1)
& -(*((kmp_dyna_lock_t *)(user_lock)) & 1))
) {
1282 case 0:
1283 break;
1284#if KMP_USE_FUTEX(1 && !0 && (0 || 1 || KMP_ARCH_ARM || 0))
1285 case locktag_futex:
1286 return kmp_mutex_impl_queuing;
1287#endif
1288 case locktag_tas:
1289 return kmp_mutex_impl_spin;
1290#if KMP_USE_TSX(0 || 1) && !0
1291 case locktag_hle:
1292 return kmp_mutex_impl_speculative;
1293#endif
1294 default:
1295 return ompt_mutex_impl_unknown;
1296 }
1297 ilock = KMP_LOOKUP_I_LOCK(user_lock)((sizeof(int) < sizeof(void *)) ? (*(__kmp_i_lock_table.table
+ ((*(kmp_lock_index_t *)(user_lock) >> 1)) / 1024) + (
(*(kmp_lock_index_t *)(user_lock) >> 1)) % 1024) : *((kmp_indirect_lock_t
**)(user_lock)))
;
1298 }
1299 KMP_ASSERT(ilock)if (!(ilock)) { __kmp_debug_assert("ilock", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 1299); }
;
1300 switch (ilock->type) {
1301#if KMP_USE_TSX(0 || 1) && !0
1302 case locktag_adaptive:
1303 case locktag_rtm:
1304 return kmp_mutex_impl_speculative;
1305#endif
1306 case locktag_nested_tas:
1307 return kmp_mutex_impl_spin;
1308#if KMP_USE_FUTEX(1 && !0 && (0 || 1 || KMP_ARCH_ARM || 0))
1309 case locktag_nested_futex:
1310#endif
1311 case locktag_ticket:
1312 case locktag_queuing:
1313 case locktag_drdpa:
1314 case locktag_nested_ticket:
1315 case locktag_nested_queuing:
1316 case locktag_nested_drdpa:
1317 return kmp_mutex_impl_queuing;
1318 default:
1319 return ompt_mutex_impl_unknown;
1320 }
1321}
1322#else
1323// For locks without dynamic binding
1324static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1325 switch (__kmp_user_lock_kind) {
1326 case lk_tas:
1327 return kmp_mutex_impl_spin;
1328#if KMP_USE_FUTEX(1 && !0 && (0 || 1 || KMP_ARCH_ARM || 0))
1329 case lk_futex:
1330#endif
1331 case lk_ticket:
1332 case lk_queuing:
1333 case lk_drdpa:
1334 return kmp_mutex_impl_queuing;
1335#if KMP_USE_TSX(0 || 1) && !0
1336 case lk_hle:
1337 case lk_rtm:
1338 case lk_adaptive:
1339 return kmp_mutex_impl_speculative;
1340#endif
1341 default:
1342 return ompt_mutex_impl_unknown;
1343 }
1344}
1345#endif // KMP_USE_DYNAMIC_LOCK
1346#endif // OMPT_SUPPORT && OMPT_OPTIONAL
1347
1348/*!
1349@ingroup WORK_SHARING
1350@param loc source location information.
1351@param global_tid global thread number.
1352@param crit identity of the critical section. This could be a pointer to a lock
1353associated with the critical section, or some other suitably unique value.
1354@param hint the lock hint.
1355
1356Enter code protected by a `critical` construct with a hint. The hint value is
1357used to suggest a lock implementation. This function blocks until the executing
1358thread can enter the critical section unless the hint suggests use of
1359speculative execution and the hardware supports it.
1360*/
1361void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1362 kmp_critical_name *crit, uint32_t hint) {
1363 KMP_COUNT_BLOCK(OMP_CRITICAL)((void)0);
1364 kmp_user_lock_p lck;
1365#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1366 omp_state_t prev_state = omp_state_undefined;
1367 ompt_thread_info_t ti;
1368 // This is the case, if called from __kmpc_critical:
1369 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid)__ompt_load_return_address(global_tid);
1370 if (!codeptr)
1371 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
1372#endif
1373
1374 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_critical: called T#%d\n"
, global_tid); }
;
1375
1376 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1377 // Check if it is initialized.
1378 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait)((void)0);
1379 if (*lk == 0) {
1380 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1381 if (KMP_IS_D_LOCK(lckseq)((lckseq) >= lockseq_tas && (lckseq) <= lockseq_hle
)
) {
1382 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,__sync_bool_compare_and_swap((volatile kmp_uint32 *)((volatile
kmp_int32 *)crit), (kmp_uint32)(0), (kmp_uint32)(((lckseq) <<
1 | 1)))
1383 KMP_GET_D_TAG(lckseq))__sync_bool_compare_and_swap((volatile kmp_uint32 *)((volatile
kmp_int32 *)crit), (kmp_uint32)(0), (kmp_uint32)(((lckseq) <<
1 | 1)))
;
1384 } else {
1385 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq)(kmp_indirect_locktag_t)((lckseq)-lockseq_ticket));
1386 }
1387 }
1388 // Branch for accessing the actual lock object and set operation. This
1389 // branching is inevitable since this lock initialization does not follow the
1390 // normal dispatch path (lock table is not used).
1391 if (KMP_EXTRACT_D_TAG(lk)(*((kmp_dyna_lock_t *)(lk)) & ((1 << 8) - 1) & -
(*((kmp_dyna_lock_t *)(lk)) & 1))
!= 0) {
1392 lck = (kmp_user_lock_p)lk;
1393 if (__kmp_env_consistency_check) {
1394 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1395 __kmp_map_hint_to_lock(hint));
1396 }
1397#if USE_ITT_BUILD1
1398 __kmp_itt_critical_acquiring(lck);
1399#endif
1400#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1401 if (ompt_enabled.enabled) {
1402 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1403 /* OMPT state update */
1404 prev_state = ti.state;
1405 ti.wait_id = (omp_wait_id_t)lck;
1406 ti.state = omp_state_wait_critical;
1407
1408 /* OMPT event callback */
1409 if (ompt_enabled.ompt_callback_mutex_acquire) {
1410 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
1411 ompt_mutex_critical, (unsigned int)hint,
1412 __ompt_get_mutex_impl_type(crit), (omp_wait_id_t)crit, codeptr);
1413 }
1414 }
1415#endif
1416#if KMP_USE_INLINED_TAS(1 && (0 || 1 || KMP_ARCH_ARM)) && 1
1417 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1418 KMP_ACQUIRE_TAS_LOCK(lck, global_tid){ kmp_tas_lock_t *l = (kmp_tas_lock_t *)lck; kmp_int32 tas_free
= (locktag_tas); kmp_int32 tas_busy = ((global_tid + 1) <<
8 | locktag_tas); if ((&l->lk.poll)->load(std::memory_order_relaxed
) != tas_free || !__kmp_atomic_compare_store_acq(&l->lk
.poll, tas_free, tas_busy)) { kmp_uint32 spins; (!__kmp_itt_fsync_prepare_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)(l)); {
(spins) = __kmp_yield_init; }; if ((__kmp_nth) > (__kmp_avail_proc
? __kmp_avail_proc : __kmp_xproc)) { { __kmp_x86_pause(); __kmp_yield
(((!0))); }; } else { { __kmp_x86_pause(); (spins) -= 2; if (
!(spins)) { __kmp_yield(1); (spins) = __kmp_yield_next; } }; }
kmp_backoff_t backoff = __kmp_spin_backoff_params; while ( (
&l->lk.poll)->load(std::memory_order_relaxed) != tas_free
|| !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free
, tas_busy)) { __kmp_spin_backoff(&backoff); if ((__kmp_nth
) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
{ __kmp_x86_pause(); __kmp_yield(((!0))); }; } else { { __kmp_x86_pause
(); (spins) -= 2; if (!(spins)) { __kmp_yield(1); (spins) = __kmp_yield_next
; } }; } } } (!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 :
__kmp_itt_fsync_acquired_ptr__3_0((void *)(l)); }
;
1419 } else
1420#elif KMP_USE_INLINED_FUTEX(1 && !0 && (0 || 1 || KMP_ARCH_ARM || 0)) &&
0
1421 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1422 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid){ kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lck; kmp_int32 gtid_code
= (global_tid + 1) << 1; ; (!__kmp_itt_fsync_prepare_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)(ftx))
; kmp_int32 poll_val; while ((poll_val = __sync_val_compare_and_swap
((volatile kmp_uint32 *)(&(ftx->lk.poll)), (kmp_uint32
)((locktag_futex)), (kmp_uint32)(((gtid_code) << 8 | locktag_futex
)))) != (locktag_futex)) { kmp_int32 cond = ((poll_val) >>
8) & 1; if (!cond) { if (!__sync_val_compare_and_swap((volatile
kmp_uint32 *)(&(ftx->lk.poll)), (kmp_uint32)(poll_val
), (kmp_uint32)(poll_val | ((1) << 8 | locktag_futex)))
) { continue; } poll_val |= ((1) << 8 | locktag_futex);
} kmp_int32 rc; if ((rc = syscall(202, &(ftx->lk.poll
), 0, poll_val, __null, __null, 0)) != 0) { continue; } gtid_code
|= 1; } (!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)(ftx)); }
;
1423 } else
1424#endif
1425 {
1426 KMP_D_LOCK_FUNC(lk, set)__kmp_direct_set[(*((kmp_dyna_lock_t *)(lk)) & ((1 <<
8) - 1) & -(*((kmp_dyna_lock_t *)(lk)) & 1))]
(lk, global_tid);
1427 }
1428 } else {
1429 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1430 lck = ilk->lock;
1431 if (__kmp_env_consistency_check) {
1432 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1433 __kmp_map_hint_to_lock(hint));
1434 }
1435#if USE_ITT_BUILD1
1436 __kmp_itt_critical_acquiring(lck);
1437#endif
1438#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1439 if (ompt_enabled.enabled) {
1440 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1441 /* OMPT state update */
1442 prev_state = ti.state;
1443 ti.wait_id = (omp_wait_id_t)lck;
1444 ti.state = omp_state_wait_critical;
1445
1446 /* OMPT event callback */
1447 if (ompt_enabled.ompt_callback_mutex_acquire) {
1448 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
1449 ompt_mutex_critical, (unsigned int)hint,
1450 __ompt_get_mutex_impl_type(0, ilk), (omp_wait_id_t)crit, codeptr);
1451 }
1452 }
1453#endif
1454 KMP_I_LOCK_FUNC(ilk, set)__kmp_indirect_set[((kmp_indirect_lock_t *)(ilk))->type](lck, global_tid);
1455 }
1456 KMP_POP_PARTITIONED_TIMER()((void)0);
1457
1458#if USE_ITT_BUILD1
1459 __kmp_itt_critical_acquired(lck);
1460#endif /* USE_ITT_BUILD */
1461#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1462 if (ompt_enabled.enabled) {
1463 /* OMPT state update */
1464 ti.state = prev_state;
1465 ti.wait_id = 0;
1466
1467 /* OMPT event callback */
1468 if (ompt_enabled.ompt_callback_mutex_acquired) {
1469 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
1470 ompt_mutex_critical, (omp_wait_id_t)crit, codeptr);
1471 }
1472 }
1473#endif
1474
1475 KMP_PUSH_PARTITIONED_TIMER(OMP_critical)((void)0);
1476 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmpc_critical: done T#%d\n"
, global_tid); }
;
1477} // __kmpc_critical_with_hint
1478
1479#endif // KMP_USE_DYNAMIC_LOCK
1480
1481/*!
1482@ingroup WORK_SHARING
1483@param loc source location information.
1484@param global_tid global thread number .
1485@param crit identity of the critical section. This could be a pointer to a lock
1486associated with the critical section, or some other suitably unique value.
1487
1488Leave a critical section, releasing any lock that was held during its execution.
1489*/
1490void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1491 kmp_critical_name *crit) {
1492 kmp_user_lock_p lck;
1493
1494 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_end_critical: called T#%d\n"
, global_tid); }
;
1495
1496#if KMP_USE_DYNAMIC_LOCK1
1497 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)((__kmp_user_lock_seq) >= lockseq_tas && (__kmp_user_lock_seq
) <= lockseq_hle)
) {
1498 lck = (kmp_user_lock_p)crit;
1499 KMP_ASSERT(lck != NULL)if (!(lck != __null)) { __kmp_debug_assert("lck != NULL", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 1499); }
;
1500 if (__kmp_env_consistency_check) {
1501 __kmp_pop_sync(global_tid, ct_critical, loc);
1502 }
1503#if USE_ITT_BUILD1
1504 __kmp_itt_critical_releasing(lck);
1505#endif
1506#if KMP_USE_INLINED_TAS(1 && (0 || 1 || KMP_ARCH_ARM)) && 1
1507 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1508 KMP_RELEASE_TAS_LOCK(lck, global_tid){ (&((kmp_tas_lock_t *)lck)->lk.poll)->store((locktag_tas
), std::memory_order_release); }
;
1509 } else
1510#elif KMP_USE_INLINED_FUTEX(1 && !0 && (0 || 1 || KMP_ARCH_ARM || 0)) &&
0
1511 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1512 KMP_RELEASE_FUTEX_LOCK(lck, global_tid){ kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lck; ; (!__kmp_itt_fsync_releasing_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_releasing_ptr__3_0((void *)(ftx
)); kmp_int32 poll_val = __sync_lock_test_and_set((volatile kmp_uint32
*)(&(ftx->lk.poll)), (kmp_uint32)((locktag_futex))); if
(((poll_val) >> 8) & 1) { syscall(202, &(ftx->
lk.poll), 1, ((1) << 8 | locktag_futex), __null, __null
, 0); } ; { __kmp_x86_pause(); __kmp_yield(((__kmp_nth) > (
__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))); }; }
;
1513 } else
1514#endif
1515 {
1516 KMP_D_LOCK_FUNC(lck, unset)__kmp_direct_unset[(*((kmp_dyna_lock_t *)(lck)) & ((1 <<
8) - 1) & -(*((kmp_dyna_lock_t *)(lck)) & 1))]
((kmp_dyna_lock_t *)lck, global_tid);
1517 }
1518 } else {
1519 kmp_indirect_lock_t *ilk =
1520 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit))((void *)(*((kmp_indirect_lock_t **)crit)));
1521 KMP_ASSERT(ilk != NULL)if (!(ilk != __null)) { __kmp_debug_assert("ilk != NULL", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 1521); }
;
1522 lck = ilk->lock;
1523 if (__kmp_env_consistency_check) {
1524 __kmp_pop_sync(global_tid, ct_critical, loc);
1525 }
1526#if USE_ITT_BUILD1
1527 __kmp_itt_critical_releasing(lck);
1528#endif
1529 KMP_I_LOCK_FUNC(ilk, unset)__kmp_indirect_unset[((kmp_indirect_lock_t *)(ilk))->type](lck, global_tid);
1530 }
1531
1532#else // KMP_USE_DYNAMIC_LOCK
1533
1534 if ((__kmp_user_lock_kind == lk_tas) &&
1535 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZEsizeof(void *))) {
1536 lck = (kmp_user_lock_p)crit;
1537 }
1538#if KMP_USE_FUTEX(1 && !0 && (0 || 1 || KMP_ARCH_ARM || 0))
1539 else if ((__kmp_user_lock_kind == lk_futex) &&
1540 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZEsizeof(void *))) {
1541 lck = (kmp_user_lock_p)crit;
1542 }
1543#endif
1544 else { // ticket, queuing or drdpa
1545 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit))((void *)(*((kmp_user_lock_p *)crit)));
1546 }
1547
1548 KMP_ASSERT(lck != NULL)if (!(lck != __null)) { __kmp_debug_assert("lck != NULL", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 1548); }
;
1549
1550 if (__kmp_env_consistency_check)
1551 __kmp_pop_sync(global_tid, ct_critical, loc);
1552
1553#if USE_ITT_BUILD1
1554 __kmp_itt_critical_releasing(lck);
1555#endif /* USE_ITT_BUILD */
1556 // Value of 'crit' should be good for using as a critical_id of the critical
1557 // section directive.
1558 __kmp_release_user_lock_with_checks(lck, global_tid);
1559
1560#endif // KMP_USE_DYNAMIC_LOCK
1561
1562#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1563 /* OMPT release event triggers after lock is released; place here to trigger
1564 * for all #if branches */
1565 OMPT_STORE_RETURN_ADDRESS(global_tid)if (ompt_enabled.enabled && global_tid >= 0 &&
__kmp_threads[global_tid] && !__kmp_threads[global_tid
]->th.ompt_thread_info.return_address) __kmp_threads[global_tid
]->th.ompt_thread_info.return_address = __builtin_return_address
(0)
;
1566 if (ompt_enabled.ompt_callback_mutex_released) {
1567 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)ompt_callback_mutex_released_callback(
1568 ompt_mutex_critical, (omp_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0)__ompt_load_return_address(0));
1569 }
1570#endif
1571
1572 KMP_POP_PARTITIONED_TIMER()((void)0);
1573 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmpc_end_critical: done T#%d\n"
, global_tid); }
;
1574}
1575
1576/*!
1577@ingroup SYNCHRONIZATION
1578@param loc source location information
1579@param global_tid thread id.
1580@return one if the thread should execute the master block, zero otherwise
1581
1582Start execution of a combined barrier and master. The barrier is executed inside
1583this function.
1584*/
1585kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1586 int status;
1587
1588 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_barrier_master: called T#%d\n"
, global_tid); }
;
1589
1590 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
1
Assuming '__kmp_init_parallel' is not equal to 0
2
Taking false branch
1591 __kmp_parallel_initialize();
1592
1593 if (__kmp_env_consistency_check)
3
Assuming '__kmp_env_consistency_check' is 0
4
Taking false branch
1594 __kmp_check_barrier(global_tid, ct_barrier, loc);
1595
1596#if OMPT_SUPPORT1
1597 omp_frame_t *ompt_frame;
5
'ompt_frame' declared without an initial value
1598 if (ompt_enabled.enabled) {
6
Assuming the condition is false
7
Taking false branch
1599 __ompt_get_task_info_internal(0, NULL__null, NULL__null, &ompt_frame, NULL__null, NULL__null);
1600 if (ompt_frame->enter_frame == NULL__null)
1601 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1)__builtin_frame_address(1);
1602 OMPT_STORE_RETURN_ADDRESS(global_tid)if (ompt_enabled.enabled && global_tid >= 0 &&
__kmp_threads[global_tid] && !__kmp_threads[global_tid
]->th.ompt_thread_info.return_address) __kmp_threads[global_tid
]->th.ompt_thread_info.return_address = __builtin_return_address
(0)
;
1603 }
1604#endif
1605#if USE_ITT_NOTIFY1
1606 __kmp_threads[global_tid]->th.th_ident = loc;
1607#endif
1608 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE(!0), 0, NULL__null, NULL__null);
1609#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1610 if (ompt_enabled.enabled) {
8
Assuming the condition is true
9
Taking true branch
1611 ompt_frame->enter_frame = NULL__null;
10
Dereference of undefined pointer value
1612 }
1613#endif
1614
1615 return (status != 0) ? 0 : 1;
1616}
1617
1618/*!
1619@ingroup SYNCHRONIZATION
1620@param loc source location information
1621@param global_tid thread id.
1622
1623Complete the execution of a combined barrier and master. This function should
1624only be called at the completion of the <tt>master</tt> code. Other threads will
1625still be waiting at the barrier and this call releases them.
1626*/
1627void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1628 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_end_barrier_master: called T#%d\n"
, global_tid); }
;
1629
1630 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1631}
1632
1633/*!
1634@ingroup SYNCHRONIZATION
1635@param loc source location information
1636@param global_tid thread id.
1637@return one if the thread should execute the master block, zero otherwise
1638
1639Start execution of a combined barrier and master(nowait) construct.
1640The barrier is executed inside this function.
1641There is no equivalent "end" function, since the
1642*/
1643kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1644 kmp_int32 ret;
1645
1646 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_barrier_master_nowait: called T#%d\n"
, global_tid); }
;
1647
1648 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
1649 __kmp_parallel_initialize();
1650
1651 if (__kmp_env_consistency_check) {
1652 if (loc == 0) {
1653 KMP_WARNING(ConstructIdentInvalid)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_ConstructIdentInvalid
), __kmp_msg_null)
; // ??? What does it mean for the user?
1654 }
1655 __kmp_check_barrier(global_tid, ct_barrier, loc);
1656 }
1657
1658#if OMPT_SUPPORT1
1659 omp_frame_t *ompt_frame;
1660 if (ompt_enabled.enabled) {
1661 __ompt_get_task_info_internal(0, NULL__null, NULL__null, &ompt_frame, NULL__null, NULL__null);
1662 if (ompt_frame->enter_frame == NULL__null)
1663 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1)__builtin_frame_address(1);
1664 OMPT_STORE_RETURN_ADDRESS(global_tid)if (ompt_enabled.enabled && global_tid >= 0 &&
__kmp_threads[global_tid] && !__kmp_threads[global_tid
]->th.ompt_thread_info.return_address) __kmp_threads[global_tid
]->th.ompt_thread_info.return_address = __builtin_return_address
(0)
;
1665 }
1666#endif
1667#if USE_ITT_NOTIFY1
1668 __kmp_threads[global_tid]->th.th_ident = loc;
1669#endif
1670 __kmp_barrier(bs_plain_barrier, global_tid, FALSE0, 0, NULL__null, NULL__null);
1671#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1672 if (ompt_enabled.enabled) {
1673 ompt_frame->enter_frame = NULL__null;
1674 }
1675#endif
1676
1677 ret = __kmpc_master(loc, global_tid);
1678
1679 if (__kmp_env_consistency_check) {
1680 /* there's no __kmpc_end_master called; so the (stats) */
1681 /* actions of __kmpc_end_master are done here */
1682
1683 if (global_tid < 0) {
1684 KMP_WARNING(ThreadIdentInvalid)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_ThreadIdentInvalid
), __kmp_msg_null)
;
1685 }
1686 if (ret) {
1687 /* only one thread should do the pop since only */
1688 /* one did the push (see __kmpc_master()) */
1689
1690 __kmp_pop_sync(global_tid, ct_master, loc);
1691 }
1692 }
1693
1694 return (ret);
1695}
1696
1697/* The BARRIER for a SINGLE process section is always explicit */
1698/*!
1699@ingroup WORK_SHARING
1700@param loc source location information
1701@param global_tid global thread number
1702@return One if this thread should execute the single construct, zero otherwise.
1703
1704Test whether to execute a <tt>single</tt> construct.
1705There are no implicit barriers in the two "single" calls, rather the compiler
1706should introduce an explicit barrier if it is required.
1707*/
1708
1709kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1710 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE(!0));
1711
1712 if (rc) {
1713 // We are going to execute the single statement, so we should count it.
1714 KMP_COUNT_BLOCK(OMP_SINGLE)((void)0);
1715 KMP_PUSH_PARTITIONED_TIMER(OMP_single)((void)0);
1716 }
1717
1718#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1719 kmp_info_t *this_thr = __kmp_threads[global_tid];
1720 kmp_team_t *team = this_thr->th.th_team;
1721 int tid = __kmp_tid_from_gtid(global_tid);
1722
1723 if (ompt_enabled.enabled) {
1724 if (rc) {
1725 if (ompt_enabled.ompt_callback_work) {
1726 ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback(
1727 ompt_work_single_executor, ompt_scope_begin,
1728 &(team->t.ompt_team_info.parallel_data),
1729 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1730 1, OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
1731 }
1732 } else {
1733 if (ompt_enabled.ompt_callback_work) {
1734 ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback(
1735 ompt_work_single_other, ompt_scope_begin,
1736 &(team->t.ompt_team_info.parallel_data),
1737 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1738 1, OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
1739 ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback(
1740 ompt_work_single_other, ompt_scope_end,
1741 &(team->t.ompt_team_info.parallel_data),
1742 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1743 1, OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
1744 }
1745 }
1746 }
1747#endif
1748
1749 return rc;
1750}
1751
1752/*!
1753@ingroup WORK_SHARING
1754@param loc source location information
1755@param global_tid global thread number
1756
1757Mark the end of a <tt>single</tt> construct. This function should
1758only be called by the thread that executed the block of code protected
1759by the `single` construct.
1760*/
1761void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1762 __kmp_exit_single(global_tid);
1763 KMP_POP_PARTITIONED_TIMER()((void)0);
1764
1765#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1766 kmp_info_t *this_thr = __kmp_threads[global_tid];
1767 kmp_team_t *team = this_thr->th.th_team;
1768 int tid = __kmp_tid_from_gtid(global_tid);
1769
1770 if (ompt_enabled.ompt_callback_work) {
1771 ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback(
1772 ompt_work_single_executor, ompt_scope_end,
1773 &(team->t.ompt_team_info.parallel_data),
1774 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1775 OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
1776 }
1777#endif
1778}
1779
1780/*!
1781@ingroup WORK_SHARING
1782@param loc Source location
1783@param global_tid Global thread id
1784
1785Mark the end of a statically scheduled loop.
1786*/
1787void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1788 KMP_POP_PARTITIONED_TIMER()((void)0);
1789 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_for_static_fini called T#%d\n"
, global_tid); }
;
1790
1791#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1792 if (ompt_enabled.ompt_callback_work) {
1793 ompt_work_t ompt_work_type = ompt_work_loop;
1794 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL__null);
1795 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1796 // Determine workshare type
1797 if (loc != NULL__null) {
1798 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1799 ompt_work_type = ompt_work_loop;
1800 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1801 ompt_work_type = ompt_work_sections;
1802 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1803 ompt_work_type = ompt_work_distribute;
1804 } else {
1805 // use default set above.
1806 // a warning about this case is provided in __kmpc_for_static_init
1807 }
1808 KMP_DEBUG_ASSERT(ompt_work_type)if (!(ompt_work_type)) { __kmp_debug_assert("ompt_work_type",
"/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 1808); }
;
1809 }
1810 ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback(
1811 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1812 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
1813 }
1814#endif
1815 if (__kmp_env_consistency_check)
1816 __kmp_pop_workshare(global_tid, ct_pdo, loc);
1817}
1818
1819// User routines which take C-style arguments (call by value)
1820// different from the Fortran equivalent routines
1821
1822void ompc_set_num_threads(int arg) {
1823 // !!!!! TODO: check the per-task binding
1824 __kmp_set_num_threads(arg, __kmp_entry_gtid()__kmp_get_global_thread_id_reg());
1825}
1826
1827void ompc_set_dynamic(int flag) {
1828 kmp_info_t *thread;
1829
1830 /* For the thread-private implementation of the internal controls */
1831 thread = __kmp_entry_thread();
1832
1833 __kmp_save_internal_controls(thread);
1834
1835 set__dynamic(thread, flag ? TRUE : FALSE)(((thread)->th.th_current_task->td_icvs.dynamic) = (flag
? (!0) : 0))
;
1836}
1837
1838void ompc_set_nested(int flag) {
1839 kmp_info_t *thread;
1840
1841 /* For the thread-private internal controls implementation */
1842 thread = __kmp_entry_thread();
1843
1844 __kmp_save_internal_controls(thread);
1845
1846 set__nested(thread, flag ? TRUE : FALSE)(((thread)->th.th_current_task->td_icvs.nested) = (flag
? (!0) : 0))
;
1847}
1848
1849void ompc_set_max_active_levels(int max_active_levels) {
1850 /* TO DO */
1851 /* we want per-task implementation of this internal control */
1852
1853 /* For the per-thread internal controls implementation */
1854 __kmp_set_max_active_levels(__kmp_entry_gtid()__kmp_get_global_thread_id_reg(), max_active_levels);
1855}
1856
1857void ompc_set_schedule(omp_sched_t kind, int modifier) {
1858 // !!!!! TODO: check the per-task binding
1859 __kmp_set_schedule(__kmp_entry_gtid()__kmp_get_global_thread_id_reg(), (kmp_sched_t)kind, modifier);
1860}
1861
1862int ompc_get_ancestor_thread_num(int level) {
1863 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid()__kmp_get_global_thread_id_reg(), level);
1864}
1865
1866int ompc_get_team_size(int level) {
1867 return __kmp_get_team_size(__kmp_entry_gtid()__kmp_get_global_thread_id_reg(), level);
1868}
1869
1870void kmpc_set_stacksize(int arg) {
1871 // __kmp_aux_set_stacksize initializes the library if needed
1872 __kmp_aux_set_stacksize(arg);
1873}
1874
1875void kmpc_set_stacksize_s(size_t arg) {
1876 // __kmp_aux_set_stacksize initializes the library if needed
1877 __kmp_aux_set_stacksize(arg);
1878}
1879
1880void kmpc_set_blocktime(int arg) {
1881 int gtid, tid;
1882 kmp_info_t *thread;
1883
1884 gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
1885 tid = __kmp_tid_from_gtid(gtid);
1886 thread = __kmp_thread_from_gtid(gtid);
1887
1888 __kmp_aux_set_blocktime(arg, thread, tid);
1889}
1890
1891void kmpc_set_library(int arg) {
1892 // __kmp_user_set_library initializes the library if needed
1893 __kmp_user_set_library((enum library_type)arg);
1894}
1895
1896void kmpc_set_defaults(char const *str) {
1897 // __kmp_aux_set_defaults initializes the library if needed
1898 __kmp_aux_set_defaults(str, KMP_STRLENstrlen(str));
1899}
1900
1901void kmpc_set_disp_num_buffers(int arg) {
1902 // ignore after initialization because some teams have already
1903 // allocated dispatch buffers
1904 if (__kmp_init_serial == 0 && arg > 0)
1905 __kmp_dispatch_num_buffers = arg;
1906}
1907
1908int kmpc_set_affinity_mask_proc(int proc, void **mask) {
1909#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED1
1910 return -1;
1911#else
1912 if (!TCR_4(__kmp_init_middle)(__kmp_init_middle)) {
1913 __kmp_middle_initialize();
1914 }
1915 return __kmp_aux_set_affinity_mask_proc(proc, mask);
1916#endif
1917}
1918
1919int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
1920#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED1
1921 return -1;
1922#else
1923 if (!TCR_4(__kmp_init_middle)(__kmp_init_middle)) {
1924 __kmp_middle_initialize();
1925 }
1926 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
1927#endif
1928}
1929
1930int kmpc_get_affinity_mask_proc(int proc, void **mask) {
1931#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED1
1932 return -1;
1933#else
1934 if (!TCR_4(__kmp_init_middle)(__kmp_init_middle)) {
1935 __kmp_middle_initialize();
1936 }
1937 return __kmp_aux_get_affinity_mask_proc(proc, mask);
1938#endif
1939}
1940
1941/* -------------------------------------------------------------------------- */
1942/*!
1943@ingroup THREADPRIVATE
1944@param loc source location information
1945@param gtid global thread number
1946@param cpy_size size of the cpy_data buffer
1947@param cpy_data pointer to data to be copied
1948@param cpy_func helper function to call for copying data
1949@param didit flag variable: 1=single thread; 0=not single thread
1950
1951__kmpc_copyprivate implements the interface for the private data broadcast
1952needed for the copyprivate clause associated with a single region in an
1953OpenMP<sup>*</sup> program (both C and Fortran).
1954All threads participating in the parallel region call this routine.
1955One of the threads (called the single thread) should have the <tt>didit</tt>
1956variable set to 1 and all other threads should have that variable set to 0.
1957All threads pass a pointer to a data buffer (cpy_data) that they have built.
1958
1959The OpenMP specification forbids the use of nowait on the single region when a
1960copyprivate clause is present. However, @ref __kmpc_copyprivate implements a
1961barrier internally to avoid race conditions, so the code generation for the
1962single region should avoid generating a barrier after the call to @ref
1963__kmpc_copyprivate.
1964
1965The <tt>gtid</tt> parameter is the global thread id for the current thread.
1966The <tt>loc</tt> parameter is a pointer to source location information.
1967
1968Internal implementation: The single thread will first copy its descriptor
1969address (cpy_data) to a team-private location, then the other threads will each
1970call the function pointed to by the parameter cpy_func, which carries out the
1971copy by copying the data using the cpy_data buffer.
1972
1973The cpy_func routine used for the copy and the contents of the data area defined
1974by cpy_data and cpy_size may be built in any fashion that will allow the copy
1975to be done. For instance, the cpy_data buffer can hold the actual data to be
1976copied or it may hold a list of pointers to the data. The cpy_func routine must
1977interpret the cpy_data buffer appropriately.
1978
1979The interface to cpy_func is as follows:
1980@code
1981void cpy_func( void *destination, void *source )
1982@endcode
1983where void *destination is the cpy_data pointer for the thread being copied to
1984and void *source is the cpy_data pointer for the thread being copied from.
1985*/
1986void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
1987 void *cpy_data, void (*cpy_func)(void *, void *),
1988 kmp_int32 didit) {
1989 void **data_ptr;
1990
1991 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_copyprivate: called T#%d\n"
, gtid); }
;
1992
1993 KMP_MB();
1994
1995 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
1996
1997 if (__kmp_env_consistency_check) {
1998 if (loc == 0) {
1999 KMP_WARNING(ConstructIdentInvalid)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_ConstructIdentInvalid
), __kmp_msg_null)
;
2000 }
2001 }
2002
2003 // ToDo: Optimize the following two barriers into some kind of split barrier
2004
2005 if (didit)
2006 *data_ptr = cpy_data;
2007
2008#if OMPT_SUPPORT1
2009 omp_frame_t *ompt_frame;
2010 if (ompt_enabled.enabled) {
2011 __ompt_get_task_info_internal(0, NULL__null, NULL__null, &ompt_frame, NULL__null, NULL__null);
2012 if (ompt_frame->enter_frame == NULL__null)
2013 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1)__builtin_frame_address(1);
2014 OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads
[gtid] && !__kmp_threads[gtid]->th.ompt_thread_info
.return_address) __kmp_threads[gtid]->th.ompt_thread_info.
return_address = __builtin_return_address(0)
;
2015 }
2016#endif
2017/* This barrier is not a barrier region boundary */
2018#if USE_ITT_NOTIFY1
2019 __kmp_threads[gtid]->th.th_ident = loc;
2020#endif
2021 __kmp_barrier(bs_plain_barrier, gtid, FALSE0, 0, NULL__null, NULL__null);
2022
2023 if (!didit)
2024 (*cpy_func)(cpy_data, *data_ptr);
2025
2026// Consider next barrier a user-visible barrier for barrier region boundaries
2027// Nesting checks are already handled by the single construct checks
2028
2029#if OMPT_SUPPORT1
2030 if (ompt_enabled.enabled) {
2031 OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads
[gtid] && !__kmp_threads[gtid]->th.ompt_thread_info
.return_address) __kmp_threads[gtid]->th.ompt_thread_info.
return_address = __builtin_return_address(0)
;
2032 }
2033#endif
2034#if USE_ITT_NOTIFY1
2035 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2036// tasks can overwrite the location)
2037#endif
2038 __kmp_barrier(bs_plain_barrier, gtid, FALSE0, 0, NULL__null, NULL__null);
2039#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2040 if (ompt_enabled.enabled) {
2041 ompt_frame->enter_frame = NULL__null;
2042 }
2043#endif
2044}
2045
2046/* -------------------------------------------------------------------------- */
2047
2048#define INIT_LOCK__kmp_init_user_lock_with_checks __kmp_init_user_lock_with_checks
2049#define INIT_NESTED_LOCK__kmp_init_nested_user_lock_with_checks __kmp_init_nested_user_lock_with_checks
2050#define ACQUIRE_LOCK__kmp_acquire_user_lock_with_checks __kmp_acquire_user_lock_with_checks
2051#define ACQUIRE_LOCK_TIMED__kmp_acquire_user_lock_with_checks_timed __kmp_acquire_user_lock_with_checks_timed
2052#define ACQUIRE_NESTED_LOCK__kmp_acquire_nested_user_lock_with_checks __kmp_acquire_nested_user_lock_with_checks
2053#define ACQUIRE_NESTED_LOCK_TIMED__kmp_acquire_nested_user_lock_with_checks_timed \
2054 __kmp_acquire_nested_user_lock_with_checks_timed
2055#define RELEASE_LOCK__kmp_release_user_lock_with_checks __kmp_release_user_lock_with_checks
2056#define RELEASE_NESTED_LOCK__kmp_release_nested_user_lock_with_checks __kmp_release_nested_user_lock_with_checks
2057#define TEST_LOCK__kmp_test_user_lock_with_checks __kmp_test_user_lock_with_checks
2058#define TEST_NESTED_LOCK__kmp_test_nested_user_lock_with_checks __kmp_test_nested_user_lock_with_checks
2059#define DESTROY_LOCK__kmp_destroy_user_lock_with_checks __kmp_destroy_user_lock_with_checks
2060#define DESTROY_NESTED_LOCK__kmp_destroy_nested_user_lock_with_checks __kmp_destroy_nested_user_lock_with_checks
2061
2062// TODO: Make check abort messages use location info & pass it into
2063// with_checks routines
2064
2065#if KMP_USE_DYNAMIC_LOCK1
2066
2067// internal lock initializer
2068static __forceinline__inline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2069 kmp_dyna_lockseq_t seq) {
2070 if (KMP_IS_D_LOCK(seq)((seq) >= lockseq_tas && (seq) <= lockseq_hle)) {
2071 KMP_INIT_D_LOCK(lock, seq)__kmp_direct_init[((seq) << 1 | 1)]((kmp_dyna_lock_t *)
lock, seq)
;
2072#if USE_ITT_BUILD1
2073 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL__null);
2074#endif
2075 } else {
2076 KMP_INIT_I_LOCK(lock, seq)__kmp_direct_init[0]((kmp_dyna_lock_t *)(lock), seq);
2077#if USE_ITT_BUILD1
2078 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock)((sizeof(int) < sizeof(void *)) ? (*(__kmp_i_lock_table.table
+ ((*(kmp_lock_index_t *)(lock) >> 1)) / 1024) + ((*(kmp_lock_index_t
*)(lock) >> 1)) % 1024) : *((kmp_indirect_lock_t **)(lock
)))
;
2079 __kmp_itt_lock_creating(ilk->lock, loc);
2080#endif
2081 }
2082}
2083
2084// internal nest lock initializer
2085static __forceinline__inline void
2086__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2087 kmp_dyna_lockseq_t seq) {
2088#if KMP_USE_TSX(0 || 1) && !0
2089 // Don't have nested lock implementation for speculative locks
2090 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2091 seq = __kmp_user_lock_seq;
2092#endif
2093 switch (seq) {
2094 case lockseq_tas:
2095 seq = lockseq_nested_tas;
2096 break;
2097#if KMP_USE_FUTEX(1 && !0 && (0 || 1 || KMP_ARCH_ARM || 0))
2098 case lockseq_futex:
2099 seq = lockseq_nested_futex;
2100 break;
2101#endif
2102 case lockseq_ticket:
2103 seq = lockseq_nested_ticket;
2104 break;
2105 case lockseq_queuing:
2106 seq = lockseq_nested_queuing;
2107 break;
2108 case lockseq_drdpa:
2109 seq = lockseq_nested_drdpa;
2110 break;
2111 default:
2112 seq = lockseq_nested_queuing;
2113 }
2114 KMP_INIT_I_LOCK(lock, seq)__kmp_direct_init[0]((kmp_dyna_lock_t *)(lock), seq);
2115#if USE_ITT_BUILD1
2116 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock)((sizeof(int) < sizeof(void *)) ? (*(__kmp_i_lock_table.table
+ ((*(kmp_lock_index_t *)(lock) >> 1)) / 1024) + ((*(kmp_lock_index_t
*)(lock) >> 1)) % 1024) : *((kmp_indirect_lock_t **)(lock
)))
;
2117 __kmp_itt_lock_creating(ilk->lock, loc);
2118#endif
2119}
2120
2121/* initialize the lock with a hint */
2122void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2123 uintptr_t hint) {
2124 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 2124); }
;
2125 if (__kmp_env_consistency_check && user_lock == NULL__null) {
2126 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_LockIsUninitialized
, "omp_init_lock_with_hint"), __kmp_msg_null)
;
2127 }
2128
2129 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2130
2131#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2132 // This is the case, if called from omp_init_lock_with_hint:
2133 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2134 if (!codeptr)
2135 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2136 if (ompt_enabled.ompt_callback_lock_init) {
2137 ompt_callbacks.ompt_callback(ompt_callback_lock_init)ompt_callback_lock_init_callback(
2138 ompt_mutex_lock, (omp_lock_hint_t)hint,
2139 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2140 codeptr);
2141 }
2142#endif
2143}
2144
2145/* initialize the lock with a hint */
2146void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2147 void **user_lock, uintptr_t hint) {
2148 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 2148); }
;
2149 if (__kmp_env_consistency_check && user_lock == NULL__null) {
2150 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_LockIsUninitialized
, "omp_init_nest_lock_with_hint"), __kmp_msg_null)
;
2151 }
2152
2153 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2154
2155#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2156 // This is the case, if called from omp_init_lock_with_hint:
2157 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2158 if (!codeptr)
2159 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2160 if (ompt_enabled.ompt_callback_lock_init) {
2161 ompt_callbacks.ompt_callback(ompt_callback_lock_init)ompt_callback_lock_init_callback(
2162 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2163 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2164 codeptr);
2165 }
2166#endif
2167}
2168
2169#endif // KMP_USE_DYNAMIC_LOCK
2170
2171/* initialize the lock */
2172void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2173#if KMP_USE_DYNAMIC_LOCK1
2174
2175 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 2175); }
;
2176 if (__kmp_env_consistency_check && user_lock == NULL__null) {
2177 KMP_FATAL(LockIsUninitialized, "omp_init_lock")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_LockIsUninitialized
, "omp_init_lock"), __kmp_msg_null)
;
2178 }
2179 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2180
2181#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2182 // This is the case, if called from omp_init_lock_with_hint:
2183 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2184 if (!codeptr)
2185 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2186 if (ompt_enabled.ompt_callback_lock_init) {
2187 ompt_callbacks.ompt_callback(ompt_callback_lock_init)ompt_callback_lock_init_callback(
2188 ompt_mutex_lock, omp_lock_hint_none,
2189 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2190 codeptr);
2191 }
2192#endif
2193
2194#else // KMP_USE_DYNAMIC_LOCK
2195
2196 static char const *const func = "omp_init_lock";
2197 kmp_user_lock_p lck;
2198 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 2198); }
;
2199
2200 if (__kmp_env_consistency_check) {
2201 if (user_lock == NULL__null) {
2202 KMP_FATAL(LockIsUninitialized, func)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_LockIsUninitialized
, func), __kmp_msg_null)
;
2203 }
2204 }
2205
2206 KMP_CHECK_USER_LOCK_INIT();
2207
2208 if ((__kmp_user_lock_kind == lk_tas) &&
2209 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
2210 lck = (kmp_user_lock_p)user_lock;
2211 }
2212#if KMP_USE_FUTEX(1 && !0 && (0 || 1 || KMP_ARCH_ARM || 0))
2213 else if ((__kmp_user_lock_kind == lk_futex) &&
2214 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
2215 lck = (kmp_user_lock_p)user_lock;
2216 }
2217#endif
2218 else {
2219 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2220 }
2221 INIT_LOCK__kmp_init_user_lock_with_checks(lck);
2222 __kmp_set_user_lock_location(lck, loc);
2223
2224#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2225 // This is the case, if called from omp_init_lock_with_hint:
2226 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2227 if (!codeptr)
2228 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2229 if (ompt_enabled.ompt_callback_lock_init) {
2230 ompt_callbacks.ompt_callback(ompt_callback_lock_init)ompt_callback_lock_init_callback(
2231 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2232 (omp_wait_id_t)user_lock, codeptr);
2233 }
2234#endif
2235
2236#if USE_ITT_BUILD1
2237 __kmp_itt_lock_creating(lck);
2238#endif /* USE_ITT_BUILD */
2239
2240#endif // KMP_USE_DYNAMIC_LOCK
2241} // __kmpc_init_lock
2242
2243/* initialize the lock */
2244void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2245#if KMP_USE_DYNAMIC_LOCK1
2246
2247 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 2247); }
;
2248 if (__kmp_env_consistency_check && user_lock == NULL__null) {
2249 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_LockIsUninitialized
, "omp_init_nest_lock"), __kmp_msg_null)
;
2250 }
2251 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2252
2253#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2254 // This is the case, if called from omp_init_lock_with_hint:
2255 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2256 if (!codeptr)
2257 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2258 if (ompt_enabled.ompt_callback_lock_init) {
2259 ompt_callbacks.ompt_callback(ompt_callback_lock_init)ompt_callback_lock_init_callback(
2260 ompt_mutex_nest_lock, omp_lock_hint_none,
2261 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2262 codeptr);
2263 }
2264#endif
2265
2266#else // KMP_USE_DYNAMIC_LOCK
2267
2268 static char const *const func = "omp_init_nest_lock";
2269 kmp_user_lock_p lck;
2270 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 2270); }
;
2271
2272 if (__kmp_env_consistency_check) {
2273 if (user_lock == NULL__null) {
2274 KMP_FATAL(LockIsUninitialized, func)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_LockIsUninitialized
, func), __kmp_msg_null)
;
2275 }
2276 }
2277
2278 KMP_CHECK_USER_LOCK_INIT();
2279
2280 if ((__kmp_user_lock_kind == lk_tas) &&
2281 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2282 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
2283 lck = (kmp_user_lock_p)user_lock;
2284 }
2285#if KMP_USE_FUTEX(1 && !0 && (0 || 1 || KMP_ARCH_ARM || 0))
2286 else if ((__kmp_user_lock_kind == lk_futex) &&
2287 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2288 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
2289 lck = (kmp_user_lock_p)user_lock;
2290 }
2291#endif
2292 else {
2293 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2294 }
2295
2296 INIT_NESTED_LOCK__kmp_init_nested_user_lock_with_checks(lck);
2297 __kmp_set_user_lock_location(lck, loc);
2298
2299#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2300 // This is the case, if called from omp_init_lock_with_hint:
2301 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2302 if (!codeptr)
2303 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2304 if (ompt_enabled.ompt_callback_lock_init) {
2305 ompt_callbacks.ompt_callback(ompt_callback_lock_init)ompt_callback_lock_init_callback(
2306 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2307 (omp_wait_id_t)user_lock, codeptr);
2308 }
2309#endif
2310
2311#if USE_ITT_BUILD1
2312 __kmp_itt_lock_creating(lck);
2313#endif /* USE_ITT_BUILD */
2314
2315#endif // KMP_USE_DYNAMIC_LOCK
2316} // __kmpc_init_nest_lock
2317
2318void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2319#if KMP_USE_DYNAMIC_LOCK1
2320
2321#if USE_ITT_BUILD1
2322 kmp_user_lock_p lck;
2323 if (KMP_EXTRACT_D_TAG(user_lock)(*((kmp_dyna_lock_t *)(user_lock)) & ((1 << 8) - 1)
& -(*((kmp_dyna_lock_t *)(user_lock)) & 1))
== 0) {
2324 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock)((sizeof(int) < sizeof(void *)) ? (*(__kmp_i_lock_table.table
+ ((*(kmp_lock_index_t *)(user_lock) >> 1)) / 1024) + (
(*(kmp_lock_index_t *)(user_lock) >> 1)) % 1024) : *((kmp_indirect_lock_t
**)(user_lock)))
)->lock;
2325 } else {
2326 lck = (kmp_user_lock_p)user_lock;
2327 }
2328 __kmp_itt_lock_destroyed(lck);
2329#endif
2330#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2331 // This is the case, if called from omp_init_lock_with_hint:
2332 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2333 if (!codeptr)
2334 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2335 if (ompt_enabled.ompt_callback_lock_destroy) {
2336 kmp_user_lock_p lck;
2337 if (KMP_EXTRACT_D_TAG(user_lock)(*((kmp_dyna_lock_t *)(user_lock)) & ((1 << 8) - 1)
& -(*((kmp_dyna_lock_t *)(user_lock)) & 1))
== 0) {
2338 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock)((sizeof(int) < sizeof(void *)) ? (*(__kmp_i_lock_table.table
+ ((*(kmp_lock_index_t *)(user_lock) >> 1)) / 1024) + (
(*(kmp_lock_index_t *)(user_lock) >> 1)) % 1024) : *((kmp_indirect_lock_t
**)(user_lock)))
)->lock;
2339 } else {
2340 lck = (kmp_user_lock_p)user_lock;
2341 }
2342 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)ompt_callback_lock_destroy_callback(
2343 ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
2344 }
2345#endif
2346 KMP_D_LOCK_FUNC(user_lock, destroy)__kmp_direct_destroy[(*((kmp_dyna_lock_t *)(user_lock)) &
((1 << 8) - 1) & -(*((kmp_dyna_lock_t *)(user_lock
)) & 1))]
((kmp_dyna_lock_t *)user_lock);
2347#else
2348 kmp_user_lock_p lck;
2349
2350 if ((__kmp_user_lock_kind == lk_tas) &&
2351 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
2352 lck = (kmp_user_lock_p)user_lock;
2353 }
2354#if KMP_USE_FUTEX(1 && !0 && (0 || 1 || KMP_ARCH_ARM || 0))
2355 else if ((__kmp_user_lock_kind == lk_futex) &&
2356 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
2357 lck = (kmp_user_lock_p)user_lock;
2358 }
2359#endif
2360 else {
2361 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2362 }
2363
2364#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2365 // This is the case, if called from omp_init_lock_with_hint:
2366 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2367 if (!codeptr)
2368 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2369 if (ompt_enabled.ompt_callback_lock_destroy) {
2370 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)ompt_callback_lock_destroy_callback(
2371 ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
2372 }
2373#endif
2374
2375#if USE_ITT_BUILD1
2376 __kmp_itt_lock_destroyed(lck);
2377#endif /* USE_ITT_BUILD */
2378 DESTROY_LOCK__kmp_destroy_user_lock_with_checks(lck);
2379
2380 if ((__kmp_user_lock_kind == lk_tas) &&
2381 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
2382 ;
2383 }
2384#if KMP_USE_FUTEX(1 && !0 && (0 || 1 || KMP_ARCH_ARM || 0))
2385 else if ((__kmp_user_lock_kind == lk_futex) &&
2386 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
2387 ;
2388 }
2389#endif
2390 else {
2391 __kmp_user_lock_free(user_lock, gtid, lck);
2392 }
2393#endif // KMP_USE_DYNAMIC_LOCK
2394} // __kmpc_destroy_lock
2395
2396/* destroy the lock */
2397void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2398#if KMP_USE_DYNAMIC_LOCK1
2399
2400#if USE_ITT_BUILD1
2401 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock)((sizeof(int) < sizeof(void *)) ? (*(__kmp_i_lock_table.table
+ ((*(kmp_lock_index_t *)(user_lock) >> 1)) / 1024) + (
(*(kmp_lock_index_t *)(user_lock) >> 1)) % 1024) : *((kmp_indirect_lock_t
**)(user_lock)))
;
2402 __kmp_itt_lock_destroyed(ilk->lock);
2403#endif
2404#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2405 // This is the case, if called from omp_init_lock_with_hint:
2406 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2407 if (!codeptr)
2408 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2409 if (ompt_enabled.ompt_callback_lock_destroy) {
2410 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)ompt_callback_lock_destroy_callback(
2411 ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
2412 }
2413#endif
2414 KMP_D_LOCK_FUNC(user_lock, destroy)__kmp_direct_destroy[(*((kmp_dyna_lock_t *)(user_lock)) &
((1 << 8) - 1) & -(*((kmp_dyna_lock_t *)(user_lock
)) & 1))]
((kmp_dyna_lock_t *)user_lock);
2415
2416#else // KMP_USE_DYNAMIC_LOCK
2417
2418 kmp_user_lock_p lck;
2419
2420 if ((__kmp_user_lock_kind == lk_tas) &&
2421 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2422 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
2423 lck = (kmp_user_lock_p)user_lock;
2424 }
2425#if KMP_USE_FUTEX(1 && !0 && (0 || 1 || KMP_ARCH_ARM || 0))
2426 else if ((__kmp_user_lock_kind == lk_futex) &&
2427 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2428 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
2429 lck = (kmp_user_lock_p)user_lock;
2430 }
2431#endif
2432 else {
2433 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2434 }
2435
2436#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2437 // This is the case, if called from omp_init_lock_with_hint:
2438 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2439 if (!codeptr)
2440 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2441 if (ompt_enabled.ompt_callback_lock_destroy) {
2442 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)ompt_callback_lock_destroy_callback(
2443 ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
2444 }
2445#endif
2446
2447#if USE_ITT_BUILD1
2448 __kmp_itt_lock_destroyed(lck);
2449#endif /* USE_ITT_BUILD */
2450
2451 DESTROY_NESTED_LOCK__kmp_destroy_nested_user_lock_with_checks(lck);
2452
2453 if ((__kmp_user_lock_kind == lk_tas) &&
2454 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2455 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
2456 ;
2457 }
2458#if KMP_USE_FUTEX(1 && !0 && (0 || 1 || KMP_ARCH_ARM || 0))
2459 else if ((__kmp_user_lock_kind == lk_futex) &&
2460 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2461 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
2462 ;
2463 }
2464#endif
2465 else {
2466 __kmp_user_lock_free(user_lock, gtid, lck);
2467 }
2468#endif // KMP_USE_DYNAMIC_LOCK
2469} // __kmpc_destroy_nest_lock
2470
2471void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2472 KMP_COUNT_BLOCK(OMP_set_lock)((void)0);
2473#if KMP_USE_DYNAMIC_LOCK1
2474 int tag = KMP_EXTRACT_D_TAG(user_lock)(*((kmp_dyna_lock_t *)(user_lock)) & ((1 << 8) - 1)
& -(*((kmp_dyna_lock_t *)(user_lock)) & 1))
;
2475#if USE_ITT_BUILD1
2476 __kmp_itt_lock_acquiring(
2477 (kmp_user_lock_p)
2478 user_lock); // itt function will get to the right lock object.
2479#endif
2480#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2481 // This is the case, if called from omp_init_lock_with_hint:
2482 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2483 if (!codeptr)
2484 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2485 if (ompt_enabled.ompt_callback_mutex_acquire) {
2486 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
2487 ompt_mutex_lock, omp_lock_hint_none,
2488 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2489 codeptr);
2490 }
2491#endif
2492#if KMP_USE_INLINED_TAS(1 && (0 || 1 || KMP_ARCH_ARM)) && 1
2493 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2494 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid){ kmp_tas_lock_t *l = (kmp_tas_lock_t *)user_lock; kmp_int32 tas_free
= (locktag_tas); kmp_int32 tas_busy = ((gtid + 1) << 8
| locktag_tas); if ((&l->lk.poll)->load(std::memory_order_relaxed
) != tas_free || !__kmp_atomic_compare_store_acq(&l->lk
.poll, tas_free, tas_busy)) { kmp_uint32 spins; (!__kmp_itt_fsync_prepare_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)(l)); {
(spins) = __kmp_yield_init; }; if ((__kmp_nth) > (__kmp_avail_proc
? __kmp_avail_proc : __kmp_xproc)) { { __kmp_x86_pause(); __kmp_yield
(((!0))); }; } else { { __kmp_x86_pause(); (spins) -= 2; if (
!(spins)) { __kmp_yield(1); (spins) = __kmp_yield_next; } }; }
kmp_backoff_t backoff = __kmp_spin_backoff_params; while ( (
&l->lk.poll)->load(std::memory_order_relaxed) != tas_free
|| !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free
, tas_busy)) { __kmp_spin_backoff(&backoff); if ((__kmp_nth
) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
{ __kmp_x86_pause(); __kmp_yield(((!0))); }; } else { { __kmp_x86_pause
(); (spins) -= 2; if (!(spins)) { __kmp_yield(1); (spins) = __kmp_yield_next
; } }; } } } (!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 :
__kmp_itt_fsync_acquired_ptr__3_0((void *)(l)); }
;
2495 } else
2496#elif KMP_USE_INLINED_FUTEX(1 && !0 && (0 || 1 || KMP_ARCH_ARM || 0)) &&
0
2497 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2498 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid){ kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)user_lock; kmp_int32
gtid_code = (gtid + 1) << 1; ; (!__kmp_itt_fsync_prepare_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)(ftx))
; kmp_int32 poll_val; while ((poll_val = __sync_val_compare_and_swap
((volatile kmp_uint32 *)(&(ftx->lk.poll)), (kmp_uint32
)((locktag_futex)), (kmp_uint32)(((gtid_code) << 8 | locktag_futex
)))) != (locktag_futex)) { kmp_int32 cond = ((poll_val) >>
8) & 1; if (!cond) { if (!__sync_val_compare_and_swap((volatile
kmp_uint32 *)(&(ftx->lk.poll)), (kmp_uint32)(poll_val
), (kmp_uint32)(poll_val | ((1) << 8 | locktag_futex)))
) { continue; } poll_val |= ((1) << 8 | locktag_futex);
} kmp_int32 rc; if ((rc = syscall(202, &(ftx->lk.poll
), 0, poll_val, __null, __null, 0)) != 0) { continue; } gtid_code
|= 1; } (!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)(ftx)); }
;
2499 } else
2500#endif
2501 {
2502 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2503 }
2504#if USE_ITT_BUILD1
2505 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2506#endif
2507#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2508 if (ompt_enabled.ompt_callback_mutex_acquired) {
2509 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
2510 ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
2511 }
2512#endif
2513
2514#else // KMP_USE_DYNAMIC_LOCK
2515
2516 kmp_user_lock_p lck;
2517
2518 if ((__kmp_user_lock_kind == lk_tas) &&
2519 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
2520 lck = (kmp_user_lock_p)user_lock;
2521 }
2522#if KMP_USE_FUTEX(1 && !0 && (0 || 1 || KMP_ARCH_ARM || 0))
2523 else if ((__kmp_user_lock_kind == lk_futex) &&
2524 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
2525 lck = (kmp_user_lock_p)user_lock;
2526 }
2527#endif
2528 else {
2529 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2530 }
2531
2532#if USE_ITT_BUILD1
2533 __kmp_itt_lock_acquiring(lck);
2534#endif /* USE_ITT_BUILD */
2535#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2536 // This is the case, if called from omp_init_lock_with_hint:
2537 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2538 if (!codeptr)
2539 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2540 if (ompt_enabled.ompt_callback_mutex_acquire) {
2541 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
2542 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2543 (omp_wait_id_t)lck, codeptr);
2544 }
2545#endif
2546
2547 ACQUIRE_LOCK__kmp_acquire_user_lock_with_checks(lck, gtid);
2548
2549#if USE_ITT_BUILD1
2550 __kmp_itt_lock_acquired(lck);
2551#endif /* USE_ITT_BUILD */
2552
2553#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2554 if (ompt_enabled.ompt_callback_mutex_acquired) {
2555 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
2556 ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
2557 }
2558#endif
2559
2560#endif // KMP_USE_DYNAMIC_LOCK
2561}
2562
2563void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2564#if KMP_USE_DYNAMIC_LOCK1
2565
2566#if USE_ITT_BUILD1
2567 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2568#endif
2569#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2570 // This is the case, if called from omp_init_lock_with_hint:
2571 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2572 if (!codeptr)
2573 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2574 if (ompt_enabled.enabled) {
2575 if (ompt_enabled.ompt_callback_mutex_acquire) {
2576 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
2577 ompt_mutex_nest_lock, omp_lock_hint_none,
2578 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2579 codeptr);
2580 }
2581 }
2582#endif
2583 int acquire_status =
2584 KMP_D_LOCK_FUNC(user_lock, set)__kmp_direct_set[(*((kmp_dyna_lock_t *)(user_lock)) & ((1
<< 8) - 1) & -(*((kmp_dyna_lock_t *)(user_lock)) &
1))]
((kmp_dyna_lock_t *)user_lock, gtid);
2585 (void) acquire_status;
2586#if USE_ITT_BUILD1
2587 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2588#endif
2589
2590#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2591 if (ompt_enabled.enabled) {
2592 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST1) {
2593 if (ompt_enabled.ompt_callback_mutex_acquired) {
2594 // lock_first
2595 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
2596 ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
2597 }
2598 } else {
2599 if (ompt_enabled.ompt_callback_nest_lock) {
2600 // lock_next
2601 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)ompt_callback_nest_lock_callback(
2602 ompt_scope_begin, (omp_wait_id_t)user_lock, codeptr);
2603 }
2604 }
2605 }
2606#endif
2607
2608#else // KMP_USE_DYNAMIC_LOCK
2609 int acquire_status;
2610 kmp_user_lock_p lck;
2611
2612 if ((__kmp_user_lock_kind == lk_tas) &&
2613 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2614 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
2615 lck = (kmp_user_lock_p)user_lock;
2616 }
2617#if KMP_USE_FUTEX(1 && !0 && (0 || 1 || KMP_ARCH_ARM || 0))
2618 else if ((__kmp_user_lock_kind == lk_futex) &&
2619 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2620 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
2621 lck = (kmp_user_lock_p)user_lock;
2622 }
2623#endif
2624 else {
2625 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2626 }
2627
2628#if USE_ITT_BUILD1
2629 __kmp_itt_lock_acquiring(lck);
2630#endif /* USE_ITT_BUILD */
2631#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2632 // This is the case, if called from omp_init_lock_with_hint:
2633 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2634 if (!codeptr)
2635 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2636 if (ompt_enabled.enabled) {
2637 if (ompt_enabled.ompt_callback_mutex_acquire) {
2638 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
2639 ompt_mutex_nest_lock, omp_lock_hint_none,
2640 __ompt_get_mutex_impl_type(), (omp_wait_id_t)lck, codeptr);
2641 }
2642 }
2643#endif
2644
2645 ACQUIRE_NESTED_LOCK__kmp_acquire_nested_user_lock_with_checks(lck, gtid, &acquire_status);
2646
2647#if USE_ITT_BUILD1
2648 __kmp_itt_lock_acquired(lck);
2649#endif /* USE_ITT_BUILD */
2650
2651#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2652 if (ompt_enabled.enabled) {
2653 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST1) {
2654 if (ompt_enabled.ompt_callback_mutex_acquired) {
2655 // lock_first
2656 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
2657 ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
2658 }
2659 } else {
2660 if (ompt_enabled.ompt_callback_nest_lock) {
2661 // lock_next
2662 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)ompt_callback_nest_lock_callback(
2663 ompt_scope_begin, (omp_wait_id_t)lck, codeptr);
2664 }
2665 }
2666 }
2667#endif
2668
2669#endif // KMP_USE_DYNAMIC_LOCK
2670}
2671
2672void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2673#if KMP_USE_DYNAMIC_LOCK1
2674
2675 int tag = KMP_EXTRACT_D_TAG(user_lock)(*((kmp_dyna_lock_t *)(user_lock)) & ((1 << 8) - 1)
& -(*((kmp_dyna_lock_t *)(user_lock)) & 1))
;
2676#if USE_ITT_BUILD1
2677 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2678#endif
2679#if KMP_USE_INLINED_TAS(1 && (0 || 1 || KMP_ARCH_ARM)) && 1
2680 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2681 KMP_RELEASE_TAS_LOCK(user_lock, gtid){ (&((kmp_tas_lock_t *)user_lock)->lk.poll)->store(
(locktag_tas), std::memory_order_release); }
;
2682 } else
2683#elif KMP_USE_INLINED_FUTEX(1 && !0 && (0 || 1 || KMP_ARCH_ARM || 0)) &&
0
2684 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2685 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid){ kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)user_lock; ; (!
__kmp_itt_fsync_releasing_ptr__3_0) ? (void)0 : __kmp_itt_fsync_releasing_ptr__3_0
((void *)(ftx)); kmp_int32 poll_val = __sync_lock_test_and_set
((volatile kmp_uint32 *)(&(ftx->lk.poll)), (kmp_uint32
)((locktag_futex))); if (((poll_val) >> 8) & 1) { syscall
(202, &(ftx->lk.poll), 1, ((1) << 8 | locktag_futex
), __null, __null, 0); } ; { __kmp_x86_pause(); __kmp_yield((
(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
))); }; }
;
2686 } else
2687#endif
2688 {
2689 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2690 }
2691
2692#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2693 // This is the case, if called from omp_init_lock_with_hint:
2694 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2695 if (!codeptr)
2696 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2697 if (ompt_enabled.ompt_callback_mutex_released) {
2698 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)ompt_callback_mutex_released_callback(
2699 ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
2700 }
2701#endif
2702
2703#else // KMP_USE_DYNAMIC_LOCK
2704
2705 kmp_user_lock_p lck;
2706
2707 /* Can't use serial interval since not block structured */
2708 /* release the lock */
2709
2710 if ((__kmp_user_lock_kind == lk_tas) &&
2711 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
2712#if KMP_OS_LINUX1 && \
2713 (KMP_ARCH_X860 || KMP_ARCH_X86_641 || KMP_ARCH_ARM || KMP_ARCH_AARCH640)
2714// "fast" path implemented to fix customer performance issue
2715#if USE_ITT_BUILD1
2716 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2717#endif /* USE_ITT_BUILD */
2718 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0)(((kmp_user_lock_p)user_lock)->tas.lk.poll) = (0);
2719 KMP_MB();
2720
2721#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2722 // This is the case, if called from omp_init_lock_with_hint:
2723 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2724 if (!codeptr)
2725 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2726 if (ompt_enabled.ompt_callback_mutex_released) {
2727 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)ompt_callback_mutex_released_callback(
2728 ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
2729 }
2730#endif
2731
2732 return;
2733#else
2734 lck = (kmp_user_lock_p)user_lock;
2735#endif
2736 }
2737#if KMP_USE_FUTEX(1 && !0 && (0 || 1 || KMP_ARCH_ARM || 0))
2738 else if ((__kmp_user_lock_kind == lk_futex) &&
2739 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
2740 lck = (kmp_user_lock_p)user_lock;
2741 }
2742#endif
2743 else {
2744 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2745 }
2746
2747#if USE_ITT_BUILD1
2748 __kmp_itt_lock_releasing(lck);
2749#endif /* USE_ITT_BUILD */
2750
2751 RELEASE_LOCK__kmp_release_user_lock_with_checks(lck, gtid);
2752
2753#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2754 // This is the case, if called from omp_init_lock_with_hint:
2755 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2756 if (!codeptr)
2757 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2758 if (ompt_enabled.ompt_callback_mutex_released) {
2759 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)ompt_callback_mutex_released_callback(
2760 ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
2761 }
2762#endif
2763
2764#endif // KMP_USE_DYNAMIC_LOCK
2765}
2766
2767/* release the lock */
2768void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2769#if KMP_USE_DYNAMIC_LOCK1
2770
2771#if USE_ITT_BUILD1
2772 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2773#endif
2774 int release_status =
2775 KMP_D_LOCK_FUNC(user_lock, unset)__kmp_direct_unset[(*((kmp_dyna_lock_t *)(user_lock)) & (
(1 << 8) - 1) & -(*((kmp_dyna_lock_t *)(user_lock))
& 1))]
((kmp_dyna_lock_t *)user_lock, gtid);
2776 (void) release_status;
2777
2778#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2779 // This is the case, if called from omp_init_lock_with_hint:
2780 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2781 if (!codeptr)
2782 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2783 if (ompt_enabled.enabled) {
2784 if (release_status == KMP_LOCK_RELEASED1) {
2785 if (ompt_enabled.ompt_callback_mutex_released) {
2786 // release_lock_last
2787 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)ompt_callback_mutex_released_callback(
2788 ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
2789 }
2790 } else if (ompt_enabled.ompt_callback_nest_lock) {
2791 // release_lock_prev
2792 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)ompt_callback_nest_lock_callback(
2793 ompt_scope_end, (omp_wait_id_t)user_lock, codeptr);
2794 }
2795 }
2796#endif
2797
2798#else // KMP_USE_DYNAMIC_LOCK
2799
2800 kmp_user_lock_p lck;
2801
2802 /* Can't use serial interval since not block structured */
2803
2804 if ((__kmp_user_lock_kind == lk_tas) &&
2805 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2806 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
2807#if KMP_OS_LINUX1 && \
2808 (KMP_ARCH_X860 || KMP_ARCH_X86_641 || KMP_ARCH_ARM || KMP_ARCH_AARCH640)
2809 // "fast" path implemented to fix customer performance issue
2810 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2811#if USE_ITT_BUILD1
2812 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2813#endif /* USE_ITT_BUILD */
2814
2815#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2816 int release_status = KMP_LOCK_STILL_HELD0;
2817#endif
2818
2819 if (--(tl->lk.depth_locked) == 0) {
2820 TCW_4(tl->lk.poll, 0)(tl->lk.poll) = (0);
2821#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2822 release_status = KMP_LOCK_RELEASED1;
2823#endif
2824 }
2825 KMP_MB();
2826
2827#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2828 // This is the case, if called from omp_init_lock_with_hint:
2829 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2830 if (!codeptr)
2831 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2832 if (ompt_enabled.enabled) {
2833 if (release_status == KMP_LOCK_RELEASED1) {
2834 if (ompt_enabled.ompt_callback_mutex_released) {
2835 // release_lock_last
2836 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)ompt_callback_mutex_released_callback(
2837 ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
2838 }
2839 } else if (ompt_enabled.ompt_callback_nest_lock) {
2840 // release_lock_previous
2841 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)ompt_callback_nest_lock_callback(
2842 ompt_mutex_scope_end, (omp_wait_id_t)lck, codeptr);
2843 }
2844 }
2845#endif
2846
2847 return;
2848#else
2849 lck = (kmp_user_lock_p)user_lock;
2850#endif
2851 }
2852#if KMP_USE_FUTEX(1 && !0 && (0 || 1 || KMP_ARCH_ARM || 0))
2853 else if ((__kmp_user_lock_kind == lk_futex) &&
2854 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2855 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
2856 lck = (kmp_user_lock_p)user_lock;
2857 }
2858#endif
2859 else {
2860 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
2861 }
2862
2863#if USE_ITT_BUILD1
2864 __kmp_itt_lock_releasing(lck);
2865#endif /* USE_ITT_BUILD */
2866
2867 int release_status;
2868 release_status = RELEASE_NESTED_LOCK__kmp_release_nested_user_lock_with_checks(lck, gtid);
2869#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2870 // This is the case, if called from omp_init_lock_with_hint:
2871 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2872 if (!codeptr)
2873 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2874 if (ompt_enabled.enabled) {
2875 if (release_status == KMP_LOCK_RELEASED1) {
2876 if (ompt_enabled.ompt_callback_mutex_released) {
2877 // release_lock_last
2878 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)ompt_callback_mutex_released_callback(
2879 ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
2880 }
2881 } else if (ompt_enabled.ompt_callback_nest_lock) {
2882 // release_lock_previous
2883 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)ompt_callback_nest_lock_callback(
2884 ompt_mutex_scope_end, (omp_wait_id_t)lck, codeptr);
2885 }
2886 }
2887#endif
2888
2889#endif // KMP_USE_DYNAMIC_LOCK
2890}
2891
2892/* try to acquire the lock */
2893int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2894 KMP_COUNT_BLOCK(OMP_test_lock)((void)0);
2895
2896#if KMP_USE_DYNAMIC_LOCK1
2897 int rc;
2898 int tag = KMP_EXTRACT_D_TAG(user_lock)(*((kmp_dyna_lock_t *)(user_lock)) & ((1 << 8) - 1)
& -(*((kmp_dyna_lock_t *)(user_lock)) & 1))
;
2899#if USE_ITT_BUILD1
2900 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2901#endif
2902#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2903 // This is the case, if called from omp_init_lock_with_hint:
2904 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2905 if (!codeptr)
2906 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2907 if (ompt_enabled.ompt_callback_mutex_acquire) {
2908 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
2909 ompt_mutex_lock, omp_lock_hint_none,
2910 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2911 codeptr);
2912 }
2913#endif
2914#if KMP_USE_INLINED_TAS(1 && (0 || 1 || KMP_ARCH_ARM)) && 1
2915 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2916 KMP_TEST_TAS_LOCK(user_lock, gtid, rc){ kmp_tas_lock_t *l = (kmp_tas_lock_t *)user_lock; kmp_int32 tas_free
= (locktag_tas); kmp_int32 tas_busy = ((gtid + 1) << 8
| locktag_tas); rc = (&l->lk.poll)->load(std::memory_order_relaxed
) == tas_free && __kmp_atomic_compare_store_acq(&
l->lk.poll, tas_free, tas_busy); }
;
2917 } else
2918#elif KMP_USE_INLINED_FUTEX(1 && !0 && (0 || 1 || KMP_ARCH_ARM || 0)) &&
0
2919 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2920 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc){ kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)user_lock; if (
__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(ftx
->lk.poll)), (kmp_uint32)((locktag_futex)), (kmp_uint32)((
(gtid + 1 << 1) << 8 | locktag_futex)))) { (!__kmp_itt_fsync_acquired_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0((void *)(ftx)
); rc = (!0); } else { rc = 0; } }
;
2921 } else
2922#endif
2923 {
2924 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2925 }
2926 if (rc) {
2927#if USE_ITT_BUILD1
2928 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2929#endif
2930#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2931 if (ompt_enabled.ompt_callback_mutex_acquired) {
2932 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
2933 ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
2934 }
2935#endif
2936 return FTN_TRUE(!0);
2937 } else {
2938#if USE_ITT_BUILD1
2939 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2940#endif
2941 return FTN_FALSE0;
2942 }
2943
2944#else // KMP_USE_DYNAMIC_LOCK
2945
2946 kmp_user_lock_p lck;
2947 int rc;
2948
2949 if ((__kmp_user_lock_kind == lk_tas) &&
2950 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
2951 lck = (kmp_user_lock_p)user_lock;
2952 }
2953#if KMP_USE_FUTEX(1 && !0 && (0 || 1 || KMP_ARCH_ARM || 0))
2954 else if ((__kmp_user_lock_kind == lk_futex) &&
2955 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZEsizeof(int))) {
2956 lck = (kmp_user_lock_p)user_lock;
2957 }
2958#endif
2959 else {
2960 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
2961 }
2962
2963#if USE_ITT_BUILD1
2964 __kmp_itt_lock_acquiring(lck);
2965#endif /* USE_ITT_BUILD */
2966#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2967 // This is the case, if called from omp_init_lock_with_hint:
2968 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2969 if (!codeptr)
2970 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2971 if (ompt_enabled.ompt_callback_mutex_acquire) {
2972 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
2973 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2974 (omp_wait_id_t)lck, codeptr);
2975 }
2976#endif
2977
2978 rc = TEST_LOCK__kmp_test_user_lock_with_checks(lck, gtid);
2979#if USE_ITT_BUILD1
2980 if (rc) {
2981 __kmp_itt_lock_acquired(lck);
2982 } else {
2983 __kmp_itt_lock_cancelled(lck);
2984 }
2985#endif /* USE_ITT_BUILD */
2986#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2987 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
2988 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
2989 ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
2990 }
2991#endif
2992
2993 return (rc ? FTN_TRUE(!0) : FTN_FALSE0);
2994
2995/* Can't use serial interval since not block structured */
2996
2997#endif // KMP_USE_DYNAMIC_LOCK
2998}
2999
3000/* try to acquire the lock */
3001int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3002#if KMP_USE_DYNAMIC_LOCK1
3003 int rc;
3004#if USE_ITT_BUILD1
3005 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3006#endif
3007#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3008 // This is the case, if called from omp_init_lock_with_hint:
3009 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
3010 if (!codeptr)
3011 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
3012 if (ompt_enabled.ompt_callback_mutex_acquire) {
3013 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
3014 ompt_mutex_nest_lock, omp_lock_hint_none,
3015 __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
3016 codeptr);
3017 }
3018#endif
3019 rc = KMP_D_LOCK_FUNC(user_lock, test)__kmp_direct_test[(*((kmp_dyna_lock_t *)(user_lock)) & ((
1 << 8) - 1) & -(*((kmp_dyna_lock_t *)(user_lock)) &
1))]
((kmp_dyna_lock_t *)user_lock, gtid);
3020#if USE_ITT_BUILD1
3021 if (rc) {
3022 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3023 } else {
3024 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3025 }
3026#endif
3027#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3028 if (ompt_enabled.enabled && rc) {
3029 if (rc == 1) {
3030 if (ompt_enabled.ompt_callback_mutex_acquired) {
3031 // lock_first
3032 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
3033 ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
3034 }
3035 } else {
3036 if (ompt_enabled.ompt_callback_nest_lock) {
3037 // lock_next
3038 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)ompt_callback_nest_lock_callback(
3039 ompt_scope_begin, (omp_wait_id_t)user_lock, codeptr);
3040 }
3041 }
3042 }
3043#endif
3044 return rc;
3045
3046#else // KMP_USE_DYNAMIC_LOCK
3047
3048 kmp_user_lock_p lck;
3049 int rc;
3050
3051 if ((__kmp_user_lock_kind == lk_tas) &&
3052 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3053 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
3054 lck = (kmp_user_lock_p)user_lock;
3055 }
3056#if KMP_USE_FUTEX(1 && !0 && (0 || 1 || KMP_ARCH_ARM || 0))
3057 else if ((__kmp_user_lock_kind == lk_futex) &&
3058 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3059 OMP_NEST_LOCK_T_SIZEsizeof(void *))) {
3060 lck = (kmp_user_lock_p)user_lock;
3061 }
3062#endif
3063 else {
3064 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3065 }
3066
3067#if USE_ITT_BUILD1
3068 __kmp_itt_lock_acquiring(lck);
3069#endif /* USE_ITT_BUILD */
3070
3071#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3072 // This is the case, if called from omp_init_lock_with_hint:
3073 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
3074 if (!codeptr)
3075 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
3076 if (ompt_enabled.enabled) &&
3077 ompt_enabled.ompt_callback_mutex_acquire) {
3078 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)ompt_callback_mutex_acquire_callback(
3079 ompt_mutex_nest_lock, omp_lock_hint_none,
3080 __ompt_get_mutex_impl_type(), (omp_wait_id_t)lck, codeptr);
3081 }
3082#endif
3083
3084 rc = TEST_NESTED_LOCK__kmp_test_nested_user_lock_with_checks(lck, gtid);
3085#if USE_ITT_BUILD1
3086 if (rc) {
3087 __kmp_itt_lock_acquired(lck);
3088 } else {
3089 __kmp_itt_lock_cancelled(lck);
3090 }
3091#endif /* USE_ITT_BUILD */
3092#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3093 if (ompt_enabled.enabled && rc) {
3094 if (rc == 1) {
3095 if (ompt_enabled.ompt_callback_mutex_acquired) {
3096 // lock_first
3097 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)ompt_callback_mutex_acquired_callback(
3098 ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
3099 }
3100 } else {
3101 if (ompt_enabled.ompt_callback_nest_lock) {
3102 // lock_next
3103 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)ompt_callback_nest_lock_callback(
3104 ompt_mutex_scope_begin, (omp_wait_id_t)lck, codeptr);
3105 }
3106 }
3107 }
3108#endif
3109 return rc;
3110
3111/* Can't use serial interval since not block structured */
3112
3113#endif // KMP_USE_DYNAMIC_LOCK
3114}
3115
3116// Interface to fast scalable reduce methods routines
3117
3118// keep the selected method in a thread local structure for cross-function
3119// usage: will be used in __kmpc_end_reduce* functions;
3120// another solution: to re-determine the method one more time in
3121// __kmpc_end_reduce* functions (new prototype required then)
3122// AT: which solution is better?
3123#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3124 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3125
3126#define __KMP_GET_REDUCTION_METHOD(gtid) \
3127 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3128
3129// description of the packed_reduction_method variable: look at the macros in
3130// kmp.h
3131
3132// used in a critical section reduce block
3133static __forceinline__inline void
3134__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3135 kmp_critical_name *crit) {
3136
3137 // this lock was visible to a customer and to the threading profile tool as a
3138 // serial overhead span (although it's used for an internal purpose only)
3139 // why was it visible in previous implementation?
3140 // should we keep it visible in new reduce block?
3141 kmp_user_lock_p lck;
3142
3143#if KMP_USE_DYNAMIC_LOCK1
3144
3145 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3146 // Check if it is initialized.
3147 if (*lk == 0) {
3148 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)((__kmp_user_lock_seq) >= lockseq_tas && (__kmp_user_lock_seq
) <= lockseq_hle)
) {
3149 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,__sync_bool_compare_and_swap((volatile kmp_uint32 *)((volatile
kmp_int32 *)crit), (kmp_uint32)(0), (kmp_uint32)(((__kmp_user_lock_seq
) << 1 | 1)))
3150 KMP_GET_D_TAG(__kmp_user_lock_seq))__sync_bool_compare_and_swap((volatile kmp_uint32 *)((volatile
kmp_int32 *)crit), (kmp_uint32)(0), (kmp_uint32)(((__kmp_user_lock_seq
) << 1 | 1)))
;
3151 } else {
3152 __kmp_init_indirect_csptr(crit, loc, global_tid,
3153 KMP_GET_I_TAG(__kmp_user_lock_seq)(kmp_indirect_locktag_t)((__kmp_user_lock_seq)-lockseq_ticket
)
);
3154 }
3155 }
3156 // Branch for accessing the actual lock object and set operation. This
3157 // branching is inevitable since this lock initialization does not follow the
3158 // normal dispatch path (lock table is not used).
3159 if (KMP_EXTRACT_D_TAG(lk)(*((kmp_dyna_lock_t *)(lk)) & ((1 << 8) - 1) & -
(*((kmp_dyna_lock_t *)(lk)) & 1))
!= 0) {
3160 lck = (kmp_user_lock_p)lk;
3161 KMP_DEBUG_ASSERT(lck != NULL)if (!(lck != __null)) { __kmp_debug_assert("lck != __null", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 3161); }
;
3162 if (__kmp_env_consistency_check) {
3163 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3164 }
3165 KMP_D_LOCK_FUNC(lk, set)__kmp_direct_set[(*((kmp_dyna_lock_t *)(lk)) & ((1 <<
8) - 1) & -(*((kmp_dyna_lock_t *)(lk)) & 1))]
(lk, global_tid);
3166 } else {
3167 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3168 lck = ilk->lock;
3169 KMP_DEBUG_ASSERT(lck != NULL)if (!(lck != __null)) { __kmp_debug_assert("lck != __null", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 3169); }
;
3170 if (__kmp_env_consistency_check) {
3171 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3172 }
3173 KMP_I_LOCK_FUNC(ilk, set)__kmp_indirect_set[((kmp_indirect_lock_t *)(ilk))->type](lck, global_tid);
3174 }
3175
3176#else // KMP_USE_DYNAMIC_LOCK
3177
3178 // We know that the fast reduction code is only emitted by Intel compilers
3179 // with 32 byte critical sections. If there isn't enough space, then we
3180 // have to use a pointer.
3181 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE32) {
3182 lck = (kmp_user_lock_p)crit;
3183 } else {
3184 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3185 }
3186 KMP_DEBUG_ASSERT(lck != NULL)if (!(lck != __null)) { __kmp_debug_assert("lck != __null", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 3186); }
;
3187
3188 if (__kmp_env_consistency_check)
3189 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3190
3191 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3192
3193#endif // KMP_USE_DYNAMIC_LOCK
3194}
3195
3196// used in a critical section reduce block
3197static __forceinline__inline void
3198__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3199 kmp_critical_name *crit) {
3200
3201 kmp_user_lock_p lck;
3202
3203#if KMP_USE_DYNAMIC_LOCK1
3204
3205 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)((__kmp_user_lock_seq) >= lockseq_tas && (__kmp_user_lock_seq
) <= lockseq_hle)
) {
3206 lck = (kmp_user_lock_p)crit;
3207 if (__kmp_env_consistency_check)
3208 __kmp_pop_sync(global_tid, ct_critical, loc);
3209 KMP_D_LOCK_FUNC(lck, unset)__kmp_direct_unset[(*((kmp_dyna_lock_t *)(lck)) & ((1 <<
8) - 1) & -(*((kmp_dyna_lock_t *)(lck)) & 1))]
((kmp_dyna_lock_t *)lck, global_tid);
3210 } else {
3211 kmp_indirect_lock_t *ilk =
3212 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit))((void *)(*((kmp_indirect_lock_t **)crit)));
3213 if (__kmp_env_consistency_check)
3214 __kmp_pop_sync(global_tid, ct_critical, loc);
3215 KMP_I_LOCK_FUNC(ilk, unset)__kmp_indirect_unset[((kmp_indirect_lock_t *)(ilk))->type](ilk->lock, global_tid);
3216 }
3217
3218#else // KMP_USE_DYNAMIC_LOCK
3219
3220 // We know that the fast reduction code is only emitted by Intel compilers
3221 // with 32 byte critical sections. If there isn't enough space, then we have
3222 // to use a pointer.
3223 if (__kmp_base_user_lock_size > 32) {
3224 lck = *((kmp_user_lock_p *)crit);
3225 KMP_ASSERT(lck != NULL)if (!(lck != __null)) { __kmp_debug_assert("lck != NULL", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 3225); }
;
3226 } else {
3227 lck = (kmp_user_lock_p)crit;
3228 }
3229
3230 if (__kmp_env_consistency_check)
3231 __kmp_pop_sync(global_tid, ct_critical, loc);
3232
3233 __kmp_release_user_lock_with_checks(lck, global_tid);
3234
3235#endif // KMP_USE_DYNAMIC_LOCK
3236} // __kmp_end_critical_section_reduce_block
3237
3238#if OMP_40_ENABLED(50 >= 40)
3239static __forceinline__inline int
3240__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3241 int *task_state) {
3242 kmp_team_t *team;
3243
3244 // Check if we are inside the teams construct?
3245 if (th->th.th_teams_microtask) {
3246 *team_p = team = th->th.th_team;
3247 if (team->t.t_level == th->th.th_teams_level) {
3248 // This is reduction at teams construct.
3249 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid)if (!(!th->th.th_info.ds.ds_tid)) { __kmp_debug_assert("!th->th.th_info.ds.ds_tid"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 3249); }
; // AC: check that tid == 0
3250 // Let's swap teams temporarily for the reduction.
3251 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3252 th->th.th_team = team->t.t_parent;
3253 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3254 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3255 *task_state = th->th.th_task_state;
3256 th->th.th_task_state = 0;
3257
3258 return 1;
3259 }
3260 }
3261 return 0;
3262}
3263
3264static __forceinline__inline void
3265__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3266 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3267 th->th.th_info.ds.ds_tid = 0;
3268 th->th.th_team = team;
3269 th->th.th_team_nproc = team->t.t_nproc;
3270 th->th.th_task_team = team->t.t_task_team[task_state];
3271 th->th.th_task_state = task_state;
3272}
3273#endif
3274
3275/* 2.a.i. Reduce Block without a terminating barrier */
3276/*!
3277@ingroup SYNCHRONIZATION
3278@param loc source location information
3279@param global_tid global thread number
3280@param num_vars number of items (variables) to be reduced
3281@param reduce_size size of data in bytes to be reduced
3282@param reduce_data pointer to data to be reduced
3283@param reduce_func callback function providing reduction operation on two
3284operands and returning result of reduction in lhs_data
3285@param lck pointer to the unique lock data structure
3286@result 1 for the master thread, 0 for all other team threads, 2 for all team
3287threads if atomic reduction needed
3288
3289The nowait version is used for a reduce clause with the nowait argument.
3290*/
3291kmp_int32
3292__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3293 size_t reduce_size, void *reduce_data,
3294 void (*reduce_func)(void *lhs_data, void *rhs_data),
3295 kmp_critical_name *lck) {
3296
3297 KMP_COUNT_BLOCK(REDUCE_nowait)((void)0);
3298 int retval = 0;
3299 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3300#if OMP_40_ENABLED(50 >= 40)
3301 kmp_info_t *th;
3302 kmp_team_t *team;
3303 int teams_swapped = 0, task_state;
3304#endif
3305 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduce_nowait() enter: called T#%d\n"
, global_tid); }
;
3306
3307 // why do we need this initialization here at all?
3308 // Reduction clause can not be used as a stand-alone directive.
3309
3310 // do not call __kmp_serial_initialize(), it will be called by
3311 // __kmp_parallel_initialize() if needed
3312 // possible detection of false-positive race by the threadchecker ???
3313 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
3314 __kmp_parallel_initialize();
3315
3316// check correctness of reduce block nesting
3317#if KMP_USE_DYNAMIC_LOCK1
3318 if (__kmp_env_consistency_check)
3319 __kmp_push_sync(global_tid, ct_reduce, loc, NULL__null, 0);
3320#else
3321 if (__kmp_env_consistency_check)
3322 __kmp_push_sync(global_tid, ct_reduce, loc, NULL__null);
3323#endif
3324
3325#if OMP_40_ENABLED(50 >= 40)
3326 th = __kmp_thread_from_gtid(global_tid);
3327 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3328#endif // OMP_40_ENABLED
3329
3330 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3331 // the value should be kept in a variable
3332 // the variable should be either a construct-specific or thread-specific
3333 // property, not a team specific property
3334 // (a thread can reach the next reduce block on the next construct, reduce
3335 // method may differ on the next construct)
3336 // an ident_t "loc" parameter could be used as a construct-specific property
3337 // (what if loc == 0?)
3338 // (if both construct-specific and team-specific variables were shared,
3339 // then unness extra syncs should be needed)
3340 // a thread-specific variable is better regarding two issues above (next
3341 // construct and extra syncs)
3342 // a thread-specific "th_local.reduction_method" variable is used currently
3343 // each thread executes 'determine' and 'set' lines (no need to execute by one
3344 // thread, to avoid unness extra syncs)
3345
3346 packed_reduction_method = __kmp_determine_reduction_method(
3347 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3348 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3349
3350 if (packed_reduction_method == critical_reduce_block) {
3351
3352 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3353 retval = 1;
3354
3355 } else if (packed_reduction_method == empty_reduce_block) {
3356
3357 // usage: if team size == 1, no synchronization is required ( Intel
3358 // platforms only )
3359 retval = 1;
3360
3361 } else if (packed_reduction_method == atomic_reduce_block) {
3362
3363 retval = 2;
3364
3365 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3366 // won't be called by the code gen)
3367 // (it's not quite good, because the checking block has been closed by
3368 // this 'pop',
3369 // but atomic operation has not been executed yet, will be executed
3370 // slightly later, literally on next instruction)
3371 if (__kmp_env_consistency_check)
3372 __kmp_pop_sync(global_tid, ct_reduce, loc);
3373
3374 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,((((enum _reduction_method)((packed_reduction_method) & (
0x0000FF00)))) == (tree_reduce_block))
3375 tree_reduce_block)((((enum _reduction_method)((packed_reduction_method) & (
0x0000FF00)))) == (tree_reduce_block))
) {
3376
3377// AT: performance issue: a real barrier here
3378// AT: (if master goes slow, other threads are blocked here waiting for the
3379// master to come and release them)
3380// AT: (it's not what a customer might expect specifying NOWAIT clause)
3381// AT: (specifying NOWAIT won't result in improvement of performance, it'll
3382// be confusing to a customer)
3383// AT: another implementation of *barrier_gather*nowait() (or some other design)
3384// might go faster and be more in line with sense of NOWAIT
3385// AT: TO DO: do epcc test and compare times
3386
3387// this barrier should be invisible to a customer and to the threading profile
3388// tool (it's neither a terminating barrier nor customer's code, it's
3389// used for an internal purpose)
3390#if OMPT_SUPPORT1
3391 // JP: can this barrier potentially leed to task scheduling?
3392 // JP: as long as there is a barrier in the implementation, OMPT should and
3393 // will provide the barrier events
3394 // so we set-up the necessary frame/return addresses.
3395 omp_frame_t *ompt_frame;
3396 if (ompt_enabled.enabled) {
3397 __ompt_get_task_info_internal(0, NULL__null, NULL__null, &ompt_frame, NULL__null, NULL__null);
3398 if (ompt_frame->enter_frame == NULL__null)
3399 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1)__builtin_frame_address(1);
3400 OMPT_STORE_RETURN_ADDRESS(global_tid)if (ompt_enabled.enabled && global_tid >= 0 &&
__kmp_threads[global_tid] && !__kmp_threads[global_tid
]->th.ompt_thread_info.return_address) __kmp_threads[global_tid
]->th.ompt_thread_info.return_address = __builtin_return_address
(0)
;
3401 }
3402#endif
3403#if USE_ITT_NOTIFY1
3404 __kmp_threads[global_tid]->th.th_ident = loc;
3405#endif
3406 retval =
3407 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method)((enum barrier_type)((packed_reduction_method) & (0x000000FF
)))
,
3408 global_tid, FALSE0, reduce_size, reduce_data, reduce_func);
3409 retval = (retval != 0) ? (0) : (1);
3410#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3411 if (ompt_enabled.enabled) {
3412 ompt_frame->enter_frame = NULL__null;
3413 }
3414#endif
3415
3416 // all other workers except master should do this pop here
3417 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3418 if (__kmp_env_consistency_check) {
3419 if (retval == 0) {
3420 __kmp_pop_sync(global_tid, ct_reduce, loc);
3421 }
3422 }
3423
3424 } else {
3425
3426 // should never reach this block
3427 KMP_ASSERT(0)if (!(0)) { __kmp_debug_assert("0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 3427); }
; // "unexpected method"
3428 }
3429#if OMP_40_ENABLED(50 >= 40)
3430 if (teams_swapped) {
3431 __kmp_restore_swapped_teams(th, team, task_state);
3432 }
3433#endif
3434 KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n"
, global_tid, packed_reduction_method, retval); }
3435 10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n"
, global_tid, packed_reduction_method, retval); }
3436 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n"
, global_tid, packed_reduction_method, retval); }
3437 global_tid, packed_reduction_method, retval))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n"
, global_tid, packed_reduction_method, retval); }
;
3438
3439 return retval;
3440}
3441
3442/*!
3443@ingroup SYNCHRONIZATION
3444@param loc source location information
3445@param global_tid global thread id.
3446@param lck pointer to the unique lock data structure
3447
3448Finish the execution of a reduce nowait.
3449*/
3450void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3451 kmp_critical_name *lck) {
3452
3453 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3454
3455 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_end_reduce_nowait() enter: called T#%d\n"
, global_tid); }
;
3456
3457 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3458
3459 if (packed_reduction_method == critical_reduce_block) {
3460
3461 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3462
3463 } else if (packed_reduction_method == empty_reduce_block) {
3464
3465 // usage: if team size == 1, no synchronization is required ( on Intel
3466 // platforms only )
3467
3468 } else if (packed_reduction_method == atomic_reduce_block) {
3469
3470 // neither master nor other workers should get here
3471 // (code gen does not generate this call in case 2: atomic reduce block)
3472 // actually it's better to remove this elseif at all;
3473 // after removal this value will checked by the 'else' and will assert
3474
3475 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,((((enum _reduction_method)((packed_reduction_method) & (
0x0000FF00)))) == (tree_reduce_block))
3476 tree_reduce_block)((((enum _reduction_method)((packed_reduction_method) & (
0x0000FF00)))) == (tree_reduce_block))
) {
3477
3478 // only master gets here
3479
3480 } else {
3481
3482 // should never reach this block
3483 KMP_ASSERT(0)if (!(0)) { __kmp_debug_assert("0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 3483); }
; // "unexpected method"
3484 }
3485
3486 if (__kmp_env_consistency_check)
3487 __kmp_pop_sync(global_tid, ct_reduce, loc);
3488
3489 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n"
, global_tid, packed_reduction_method); }
3490 global_tid, packed_reduction_method))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n"
, global_tid, packed_reduction_method); }
;
3491
3492 return;
3493}
3494
3495/* 2.a.ii. Reduce Block with a terminating barrier */
3496
3497/*!
3498@ingroup SYNCHRONIZATION
3499@param loc source location information
3500@param global_tid global thread number
3501@param num_vars number of items (variables) to be reduced
3502@param reduce_size size of data in bytes to be reduced
3503@param reduce_data pointer to data to be reduced
3504@param reduce_func callback function providing reduction operation on two
3505operands and returning result of reduction in lhs_data
3506@param lck pointer to the unique lock data structure
3507@result 1 for the master thread, 0 for all other team threads, 2 for all team
3508threads if atomic reduction needed
3509
3510A blocking reduce that includes an implicit barrier.
3511*/
3512kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3513 size_t reduce_size, void *reduce_data,
3514 void (*reduce_func)(void *lhs_data, void *rhs_data),
3515 kmp_critical_name *lck) {
3516 KMP_COUNT_BLOCK(REDUCE_wait)((void)0);
3517 int retval = 0;
3518 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3519#if OMP_40_ENABLED(50 >= 40)
3520 kmp_info_t *th;
3521 kmp_team_t *team;
3522 int teams_swapped = 0, task_state;
3523#endif
3524
3525 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduce() enter: called T#%d\n"
, global_tid); }
;
3526
3527 // why do we need this initialization here at all?
3528 // Reduction clause can not be a stand-alone directive.
3529
3530 // do not call __kmp_serial_initialize(), it will be called by
3531 // __kmp_parallel_initialize() if needed
3532 // possible detection of false-positive race by the threadchecker ???
3533 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
3534 __kmp_parallel_initialize();
3535
3536// check correctness of reduce block nesting
3537#if KMP_USE_DYNAMIC_LOCK1
3538 if (__kmp_env_consistency_check)
3539 __kmp_push_sync(global_tid, ct_reduce, loc, NULL__null, 0);
3540#else
3541 if (__kmp_env_consistency_check)
3542 __kmp_push_sync(global_tid, ct_reduce, loc, NULL__null);
3543#endif
3544
3545#if OMP_40_ENABLED(50 >= 40)
3546 th = __kmp_thread_from_gtid(global_tid);
3547 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3548#endif // OMP_40_ENABLED
3549
3550 packed_reduction_method = __kmp_determine_reduction_method(
3551 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3552 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3553
3554 if (packed_reduction_method == critical_reduce_block) {
3555
3556 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3557 retval = 1;
3558
3559 } else if (packed_reduction_method == empty_reduce_block) {
3560
3561 // usage: if team size == 1, no synchronization is required ( Intel
3562 // platforms only )
3563 retval = 1;
3564
3565 } else if (packed_reduction_method == atomic_reduce_block) {
3566
3567 retval = 2;
3568
3569 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,((((enum _reduction_method)((packed_reduction_method) & (
0x0000FF00)))) == (tree_reduce_block))
3570 tree_reduce_block)((((enum _reduction_method)((packed_reduction_method) & (
0x0000FF00)))) == (tree_reduce_block))
) {
3571
3572// case tree_reduce_block:
3573// this barrier should be visible to a customer and to the threading profile
3574// tool (it's a terminating barrier on constructs if NOWAIT not specified)
3575#if OMPT_SUPPORT1
3576 omp_frame_t *ompt_frame;
3577 if (ompt_enabled.enabled) {
3578 __ompt_get_task_info_internal(0, NULL__null, NULL__null, &ompt_frame, NULL__null, NULL__null);
3579 if (ompt_frame->enter_frame == NULL__null)
3580 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1)__builtin_frame_address(1);
3581 OMPT_STORE_RETURN_ADDRESS(global_tid)if (ompt_enabled.enabled && global_tid >= 0 &&
__kmp_threads[global_tid] && !__kmp_threads[global_tid
]->th.ompt_thread_info.return_address) __kmp_threads[global_tid
]->th.ompt_thread_info.return_address = __builtin_return_address
(0)
;
3582 }
3583#endif
3584#if USE_ITT_NOTIFY1
3585 __kmp_threads[global_tid]->th.th_ident =
3586 loc; // needed for correct notification of frames
3587#endif
3588 retval =
3589 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method)((enum barrier_type)((packed_reduction_method) & (0x000000FF
)))
,
3590 global_tid, TRUE(!0), reduce_size, reduce_data, reduce_func);
3591 retval = (retval != 0) ? (0) : (1);
3592#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3593 if (ompt_enabled.enabled) {
3594 ompt_frame->enter_frame = NULL__null;
3595 }
3596#endif
3597
3598 // all other workers except master should do this pop here
3599 // ( none of other workers except master will enter __kmpc_end_reduce() )
3600 if (__kmp_env_consistency_check) {
3601 if (retval == 0) { // 0: all other workers; 1: master
3602 __kmp_pop_sync(global_tid, ct_reduce, loc);
3603 }
3604 }
3605
3606 } else {
3607
3608 // should never reach this block
3609 KMP_ASSERT(0)if (!(0)) { __kmp_debug_assert("0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 3609); }
; // "unexpected method"
3610 }
3611#if OMP_40_ENABLED(50 >= 40)
3612 if (teams_swapped) {
3613 __kmp_restore_swapped_teams(th, team, task_state);
3614 }
3615#endif
3616
3617 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n"
, global_tid, packed_reduction_method, retval); }
3618 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n"
, global_tid, packed_reduction_method, retval); }
3619 global_tid, packed_reduction_method, retval))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n"
, global_tid, packed_reduction_method, retval); }
;
3620
3621 return retval;
3622}
3623
3624/*!
3625@ingroup SYNCHRONIZATION
3626@param loc source location information
3627@param global_tid global thread id.
3628@param lck pointer to the unique lock data structure
3629
3630Finish the execution of a blocking reduce.
3631The <tt>lck</tt> pointer must be the same as that used in the corresponding
3632start function.
3633*/
3634void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3635 kmp_critical_name *lck) {
3636
3637 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3638#if OMP_40_ENABLED(50 >= 40)
3639 kmp_info_t *th;
3640 kmp_team_t *team;
3641 int teams_swapped = 0, task_state;
3642#endif
3643
3644 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_end_reduce() enter: called T#%d\n"
, global_tid); }
;
3645
3646#if OMP_40_ENABLED(50 >= 40)
3647 th = __kmp_thread_from_gtid(global_tid);
3648 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3649#endif // OMP_40_ENABLED
3650
3651 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3652
3653 // this barrier should be visible to a customer and to the threading profile
3654 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3655
3656 if (packed_reduction_method == critical_reduce_block) {
3657
3658 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3659
3660// TODO: implicit barrier: should be exposed
3661#if OMPT_SUPPORT1
3662 omp_frame_t *ompt_frame;
3663 if (ompt_enabled.enabled) {
3664 __ompt_get_task_info_internal(0, NULL__null, NULL__null, &ompt_frame, NULL__null, NULL__null);
3665 if (ompt_frame->enter_frame == NULL__null)
3666 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1)__builtin_frame_address(1);
3667 OMPT_STORE_RETURN_ADDRESS(global_tid)if (ompt_enabled.enabled && global_tid >= 0 &&
__kmp_threads[global_tid] && !__kmp_threads[global_tid
]->th.ompt_thread_info.return_address) __kmp_threads[global_tid
]->th.ompt_thread_info.return_address = __builtin_return_address
(0)
;
3668 }
3669#endif
3670#if USE_ITT_NOTIFY1
3671 __kmp_threads[global_tid]->th.th_ident = loc;
3672#endif
3673 __kmp_barrier(bs_plain_barrier, global_tid, FALSE0, 0, NULL__null, NULL__null);
3674#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3675 if (ompt_enabled.enabled) {
3676 ompt_frame->enter_frame = NULL__null;
3677 }
3678#endif
3679
3680 } else if (packed_reduction_method == empty_reduce_block) {
3681
3682// usage: if team size==1, no synchronization is required (Intel platforms only)
3683
3684// TODO: implicit barrier: should be exposed
3685#if OMPT_SUPPORT1
3686 omp_frame_t *ompt_frame;
3687 if (ompt_enabled.enabled) {
3688 __ompt_get_task_info_internal(0, NULL__null, NULL__null, &ompt_frame, NULL__null, NULL__null);
3689 if (ompt_frame->enter_frame == NULL__null)
3690 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1)__builtin_frame_address(1);
3691 OMPT_STORE_RETURN_ADDRESS(global_tid)if (ompt_enabled.enabled && global_tid >= 0 &&
__kmp_threads[global_tid] && !__kmp_threads[global_tid
]->th.ompt_thread_info.return_address) __kmp_threads[global_tid
]->th.ompt_thread_info.return_address = __builtin_return_address
(0)
;
3692 }
3693#endif
3694#if USE_ITT_NOTIFY1
3695 __kmp_threads[global_tid]->th.th_ident = loc;
3696#endif
3697 __kmp_barrier(bs_plain_barrier, global_tid, FALSE0, 0, NULL__null, NULL__null);
3698#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3699 if (ompt_enabled.enabled) {
3700 ompt_frame->enter_frame = NULL__null;
3701 }
3702#endif
3703
3704 } else if (packed_reduction_method == atomic_reduce_block) {
3705
3706#if OMPT_SUPPORT1
3707 omp_frame_t *ompt_frame;
3708 if (ompt_enabled.enabled) {
3709 __ompt_get_task_info_internal(0, NULL__null, NULL__null, &ompt_frame, NULL__null, NULL__null);
3710 if (ompt_frame->enter_frame == NULL__null)
3711 ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1)__builtin_frame_address(1);
3712 OMPT_STORE_RETURN_ADDRESS(global_tid)if (ompt_enabled.enabled && global_tid >= 0 &&
__kmp_threads[global_tid] && !__kmp_threads[global_tid
]->th.ompt_thread_info.return_address) __kmp_threads[global_tid
]->th.ompt_thread_info.return_address = __builtin_return_address
(0)
;
3713 }
3714#endif
3715// TODO: implicit barrier: should be exposed
3716#if USE_ITT_NOTIFY1
3717 __kmp_threads[global_tid]->th.th_ident = loc;
3718#endif
3719 __kmp_barrier(bs_plain_barrier, global_tid, FALSE0, 0, NULL__null, NULL__null);
3720#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
3721 if (ompt_enabled.enabled) {
3722 ompt_frame->enter_frame = NULL__null;
3723 }
3724#endif
3725
3726 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,((((enum _reduction_method)((packed_reduction_method) & (
0x0000FF00)))) == (tree_reduce_block))
3727 tree_reduce_block)((((enum _reduction_method)((packed_reduction_method) & (
0x0000FF00)))) == (tree_reduce_block))
) {
3728
3729 // only master executes here (master releases all other workers)
3730 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method)((enum barrier_type)((packed_reduction_method) & (0x000000FF
)))
,
3731 global_tid);
3732
3733 } else {
3734
3735 // should never reach this block
3736 KMP_ASSERT(0)if (!(0)) { __kmp_debug_assert("0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 3736); }
; // "unexpected method"
3737 }
3738#if OMP_40_ENABLED(50 >= 40)
3739 if (teams_swapped) {
3740 __kmp_restore_swapped_teams(th, team, task_state);
3741 }
3742#endif
3743
3744 if (__kmp_env_consistency_check)
3745 __kmp_pop_sync(global_tid, ct_reduce, loc);
3746
3747 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_end_reduce() exit: called T#%d: method %08x\n"
, global_tid, packed_reduction_method); }
3748 global_tid, packed_reduction_method))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_end_reduce() exit: called T#%d: method %08x\n"
, global_tid, packed_reduction_method); }
;
3749
3750 return;
3751}
3752
3753#undef __KMP_GET_REDUCTION_METHOD
3754#undef __KMP_SET_REDUCTION_METHOD
3755
3756/* end of interface to fast scalable reduce routines */
3757
3758kmp_uint64 __kmpc_get_taskid() {
3759
3760 kmp_int32 gtid;
3761 kmp_info_t *thread;
3762
3763 gtid = __kmp_get_gtid()__kmp_get_global_thread_id();
3764 if (gtid < 0) {
3765 return 0;
3766 }
3767 thread = __kmp_thread_from_gtid(gtid);
3768 return thread->th.th_current_task->td_task_id;
3769
3770} // __kmpc_get_taskid
3771
3772kmp_uint64 __kmpc_get_parent_taskid() {
3773
3774 kmp_int32 gtid;
3775 kmp_info_t *thread;
3776 kmp_taskdata_t *parent_task;
3777
3778 gtid = __kmp_get_gtid()__kmp_get_global_thread_id();
3779 if (gtid < 0) {
3780 return 0;
3781 }
3782 thread = __kmp_thread_from_gtid(gtid);
3783 parent_task = thread->th.th_current_task->td_parent;
3784 return (parent_task == NULL__null ? 0 : parent_task->td_task_id);
3785
3786} // __kmpc_get_parent_taskid
3787
3788#if OMP_45_ENABLED(50 >= 45)
3789/*!
3790@ingroup WORK_SHARING
3791@param loc source location information.
3792@param gtid global thread number.
3793@param num_dims number of associated doacross loops.
3794@param dims info on loops bounds.
3795
3796Initialize doacross loop information.
3797Expect compiler send us inclusive bounds,
3798e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
3799*/
3800void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
3801 const struct kmp_dim *dims) {
3802 int j, idx;
3803 kmp_int64 last, trace_count;
3804 kmp_info_t *th = __kmp_threads[gtid];
3805 kmp_team_t *team = th->th.th_team;
3806 kmp_uint32 *flags;
3807 kmp_disp_t *pr_buf = th->th.th_dispatch;
3808 dispatch_shared_info_t *sh_buf;
3809
3810 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n"
, gtid, num_dims, !team->t.t_serialized); }
3811 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n"
, gtid, num_dims, !team->t.t_serialized); }
3812 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n"
, gtid, num_dims, !team->t.t_serialized); }
3813 gtid, num_dims, !team->t.t_serialized))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n"
, gtid, num_dims, !team->t.t_serialized); }
;
3814 KMP_DEBUG_ASSERT(dims != NULL)if (!(dims != __null)) { __kmp_debug_assert("dims != __null",
"/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 3814); }
;
3815 KMP_DEBUG_ASSERT(num_dims > 0)if (!(num_dims > 0)) { __kmp_debug_assert("num_dims > 0"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 3815); }
;
3816
3817 if (team->t.t_serialized) {
3818 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_init() exit: serialized team\n"
); }
;
3819 return; // no dependencies if team is serialized
3820 }
3821 KMP_DEBUG_ASSERT(team->t.t_nproc > 1)if (!(team->t.t_nproc > 1)) { __kmp_debug_assert("team->t.t_nproc > 1"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 3821); }
;
3822 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
3823 // the next loop
3824 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3825
3826 // Save bounds info into allocated private buffer
3827 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL)if (!(pr_buf->th_doacross_info == __null)) { __kmp_debug_assert
("pr_buf->th_doacross_info == __null", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 3827); }
;
3828 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(___kmp_thread_malloc((th), (sizeof(kmp_int64) * (4 * num_dims
+ 1)), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 3829)
3829 th, sizeof(kmp_int64) * (4 * num_dims + 1))___kmp_thread_malloc((th), (sizeof(kmp_int64) * (4 * num_dims
+ 1)), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 3829)
;
3830 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL)if (!(pr_buf->th_doacross_info != __null)) { __kmp_debug_assert
("pr_buf->th_doacross_info != __null", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 3830); }
;
3831 pr_buf->th_doacross_info[0] =
3832 (kmp_int64)num_dims; // first element is number of dimensions
3833 // Save also address of num_done in order to access it later without knowing
3834 // the buffer index
3835 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3836 pr_buf->th_doacross_info[2] = dims[0].lo;
3837 pr_buf->th_doacross_info[3] = dims[0].up;
3838 pr_buf->th_doacross_info[4] = dims[0].st;
3839 last = 5;
3840 for (j = 1; j < num_dims; ++j) {
3841 kmp_int64
3842 range_length; // To keep ranges of all dimensions but the first dims[0]
3843 if (dims[j].st == 1) { // most common case
3844 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3845 range_length = dims[j].up - dims[j].lo + 1;
3846 } else {
3847 if (dims[j].st > 0) {
3848 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo)if (!(dims[j].up > dims[j].lo)) { __kmp_debug_assert("dims[j].up > dims[j].lo"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 3848); }
;
3849 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3850 } else { // negative increment
3851 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up)if (!(dims[j].lo > dims[j].up)) { __kmp_debug_assert("dims[j].lo > dims[j].up"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 3851); }
;
3852 range_length =
3853 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3854 }
3855 }
3856 pr_buf->th_doacross_info[last++] = range_length;
3857 pr_buf->th_doacross_info[last++] = dims[j].lo;
3858 pr_buf->th_doacross_info[last++] = dims[j].up;
3859 pr_buf->th_doacross_info[last++] = dims[j].st;
3860 }
3861
3862 // Compute total trip count.
3863 // Start with range of dims[0] which we don't need to keep in the buffer.
3864 if (dims[0].st == 1) { // most common case
3865 trace_count = dims[0].up - dims[0].lo + 1;
3866 } else if (dims[0].st > 0) {
3867 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo)if (!(dims[0].up > dims[0].lo)) { __kmp_debug_assert("dims[0].up > dims[0].lo"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 3867); }
;
3868 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3869 } else { // negative increment
3870 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up)if (!(dims[0].lo > dims[0].up)) { __kmp_debug_assert("dims[0].lo > dims[0].up"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 3870); }
;
3871 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3872 }
3873 for (j = 1; j < num_dims; ++j) {
3874 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3875 }
3876 KMP_DEBUG_ASSERT(trace_count > 0)if (!(trace_count > 0)) { __kmp_debug_assert("trace_count > 0"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 3876); }
;
3877
3878 // Check if shared buffer is not occupied by other loop (idx -
3879 // __kmp_dispatch_num_buffers)
3880 if (idx != sh_buf->doacross_buf_idx) {
3881 // Shared buffer is occupied, wait for it to be free
3882 __kmp_wait_yield_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3883 __kmp_eq_4, NULL__null);
3884 }
3885#if KMP_32_BIT_ARCH(0 || KMP_ARCH_ARM || 0)
3886 // Check if we are the first thread. After the CAS the first thread gets 0,
3887 // others get 1 if initialization is in progress, allocated pointer otherwise.
3888 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
3889 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(__sync_val_compare_and_swap((volatile kmp_uint32 *)((volatile
kmp_int32 *)&sh_buf->doacross_flags), (kmp_uint32)(__null
), (kmp_uint32)(1))
3890 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1)__sync_val_compare_and_swap((volatile kmp_uint32 *)((volatile
kmp_int32 *)&sh_buf->doacross_flags), (kmp_uint32)(__null
), (kmp_uint32)(1))
;
3891#else
3892 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(__sync_val_compare_and_swap((volatile kmp_uint64 *)((volatile
kmp_int64 *)&sh_buf->doacross_flags), (kmp_uint64)(__null
), (kmp_uint64)(1LL))
3893 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL)__sync_val_compare_and_swap((volatile kmp_uint64 *)((volatile
kmp_int64 *)&sh_buf->doacross_flags), (kmp_uint64)(__null
), (kmp_uint64)(1LL))
;
3894#endif
3895 if (flags == NULL__null) {
3896 // we are the first thread, allocate the array of flags
3897 size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration
3898 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1)___kmp_thread_calloc((th), (size), (1), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 3898)
;
3899 KMP_MB();
3900 sh_buf->doacross_flags = flags;
3901 } else if (flags == (kmp_uint32 *)1) {
3902#if KMP_32_BIT_ARCH(0 || KMP_ARCH_ARM || 0)
3903 // initialization is still in progress, need to wait
3904 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
3905#else
3906 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
3907#endif
3908 KMP_YIELD(TRUE){ __kmp_x86_pause(); __kmp_yield(((!0))); };
3909 KMP_MB();
3910 } else {
3911 KMP_MB();
3912 }
3913 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1)if (!(sh_buf->doacross_flags > (kmp_uint32 *)1)) { __kmp_debug_assert
("sh_buf->doacross_flags > (kmp_uint32 *)1", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 3913); }
; // check ptr value
3914 pr_buf->th_doacross_flags =
3915 sh_buf->doacross_flags; // save private copy in order to not
3916 // touch shared buffer on each iteration
3917 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_init() exit: T#%d\n"
, gtid); }
;
3918}
3919
3920void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
3921 kmp_int32 shft, num_dims, i;
3922 kmp_uint32 flag;
3923 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
3924 kmp_info_t *th = __kmp_threads[gtid];
3925 kmp_team_t *team = th->th.th_team;
3926 kmp_disp_t *pr_buf;
3927 kmp_int64 lo, up, st;
3928
3929 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() enter: called T#%d\n"
, gtid); }
;
3930 if (team->t.t_serialized) {
3931 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: serialized team\n"
); }
;
3932 return; // no dependencies if team is serialized
3933 }
3934
3935 // calculate sequential iteration number and check out-of-bounds condition
3936 pr_buf = th->th.th_dispatch;
3937 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL)if (!(pr_buf->th_doacross_info != __null)) { __kmp_debug_assert
("pr_buf->th_doacross_info != __null", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 3937); }
;
3938 num_dims = pr_buf->th_doacross_info[0];
3939 lo = pr_buf->th_doacross_info[2];
3940 up = pr_buf->th_doacross_info[3];
3941 st = pr_buf->th_doacross_info[4];
3942 if (st == 1) { // most common case
3943 if (vec[0] < lo || vec[0] > up) {
3944 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[0], lo, up); }
3945 "bounds [%lld,%lld]\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[0], lo, up); }
3946 gtid, vec[0], lo, up))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[0], lo, up); }
;
3947 return;
3948 }
3949 iter_number = vec[0] - lo;
3950 } else if (st > 0) {
3951 if (vec[0] < lo || vec[0] > up) {
3952 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[0], lo, up); }
3953 "bounds [%lld,%lld]\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[0], lo, up); }
3954 gtid, vec[0], lo, up))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[0], lo, up); }
;
3955 return;
3956 }
3957 iter_number = (kmp_uint64)(vec[0] - lo) / st;
3958 } else { // negative increment
3959 if (vec[0] > lo || vec[0] < up) {
3960 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[0], lo, up); }
3961 "bounds [%lld,%lld]\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[0], lo, up); }
3962 gtid, vec[0], lo, up))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[0], lo, up); }
;
3963 return;
3964 }
3965 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
3966 }
3967 for (i = 1; i < num_dims; ++i) {
3968 kmp_int64 iter, ln;
3969 kmp_int32 j = i * 4;
3970 ln = pr_buf->th_doacross_info[j + 1];
3971 lo = pr_buf->th_doacross_info[j + 2];
3972 up = pr_buf->th_doacross_info[j + 3];
3973 st = pr_buf->th_doacross_info[j + 4];
3974 if (st == 1) {
3975 if (vec[i] < lo || vec[i] > up) {
3976 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[i], lo, up); }
3977 "bounds [%lld,%lld]\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[i], lo, up); }
3978 gtid, vec[i], lo, up))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[i], lo, up); }
;
3979 return;
3980 }
3981 iter = vec[i] - lo;
3982 } else if (st > 0) {
3983 if (vec[i] < lo || vec[i] > up) {
3984 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[i], lo, up); }
3985 "bounds [%lld,%lld]\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[i], lo, up); }
3986 gtid, vec[i], lo, up))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[i], lo, up); }
;
3987 return;
3988 }
3989 iter = (kmp_uint64)(vec[i] - lo) / st;
3990 } else { // st < 0
3991 if (vec[i] > lo || vec[i] < up) {
3992 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[i], lo, up); }
3993 "bounds [%lld,%lld]\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[i], lo, up); }
3994 gtid, vec[i], lo, up))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
"bounds [%lld,%lld]\n", gtid, vec[i], lo, up); }
;
3995 return;
3996 }
3997 iter = (kmp_uint64)(lo - vec[i]) / (-st);
3998 }
3999 iter_number = iter + ln * iter_number;
4000 }
4001 shft = iter_number % 32; // use 32-bit granularity
4002 iter_number >>= 5; // divided by 32
4003 flag = 1 << shft;
4004 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4005 KMP_YIELD(TRUE){ __kmp_x86_pause(); __kmp_yield(((!0))); };
4006 }
4007 KMP_MB();
4008 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n"
, gtid, (iter_number << 5) + shft); }
4009 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n"
, gtid, (iter_number << 5) + shft); }
4010 gtid, (iter_number << 5) + shft))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n"
, gtid, (iter_number << 5) + shft); }
;
4011}
4012
4013void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4014 kmp_int32 shft, num_dims, i;
4015 kmp_uint32 flag;
4016 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4017 kmp_info_t *th = __kmp_threads[gtid];
4018 kmp_team_t *team = th->th.th_team;
4019 kmp_disp_t *pr_buf;
4020 kmp_int64 lo, st;
4021
4022 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_post() enter: called T#%d\n"
, gtid); }
;
4023 if (team->t.t_serialized) {
4024 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_post() exit: serialized team\n"
); }
;
4025 return; // no dependencies if team is serialized
4026 }
4027
4028 // calculate sequential iteration number (same as in "wait" but no
4029 // out-of-bounds checks)
4030 pr_buf = th->th.th_dispatch;
4031 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL)if (!(pr_buf->th_doacross_info != __null)) { __kmp_debug_assert
("pr_buf->th_doacross_info != __null", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 4031); }
;
4032 num_dims = pr_buf->th_doacross_info[0];
4033 lo = pr_buf->th_doacross_info[2];
4034 st = pr_buf->th_doacross_info[4];
4035 if (st == 1) { // most common case
4036 iter_number = vec[0] - lo;
4037 } else if (st > 0) {
4038 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4039 } else { // negative increment
4040 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4041 }
4042 for (i = 1; i < num_dims; ++i) {
4043 kmp_int64 iter, ln;
4044 kmp_int32 j = i * 4;
4045 ln = pr_buf->th_doacross_info[j + 1];
4046 lo = pr_buf->th_doacross_info[j + 2];
4047 st = pr_buf->th_doacross_info[j + 4];
4048 if (st == 1) {
4049 iter = vec[i] - lo;
4050 } else if (st > 0) {
4051 iter = (kmp_uint64)(vec[i] - lo) / st;
4052 } else { // st < 0
4053 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4054 }
4055 iter_number = iter + ln * iter_number;
4056 }
4057 shft = iter_number % 32; // use 32-bit granularity
4058 iter_number >>= 5; // divided by 32
4059 flag = 1 << shft;
4060 KMP_MB();
4061 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4062 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag)__sync_fetch_and_or((volatile kmp_uint32 *)(&pr_buf->th_doacross_flags
[iter_number]), (kmp_uint32)(flag))
;
4063 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n"
, gtid, (iter_number << 5) + shft); }
4064 (iter_number << 5) + shft))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n"
, gtid, (iter_number << 5) + shft); }
;
4065}
4066
4067void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4068 kmp_int32 num_done;
4069 kmp_info_t *th = __kmp_threads[gtid];
4070 kmp_team_t *team = th->th.th_team;
4071 kmp_disp_t *pr_buf = th->th.th_dispatch;
4072
4073 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_fini() enter: called T#%d\n"
, gtid); }
;
4074 if (team->t.t_serialized) {
4075 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_fini() exit: serialized team %p\n"
, team); }
;
4076 return; // nothing to do
4077 }
4078 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1])__sync_fetch_and_add((volatile kmp_int32 *)((kmp_int32 *)pr_buf
->th_doacross_info[1]), 1)
+ 1;
4079 if (num_done == th->th.th_team_nproc) {
4080 // we are the last thread, need to free shared resources
4081 int idx = pr_buf->th_doacross_buf_idx - 1;
4082 dispatch_shared_info_t *sh_buf =
4083 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4084 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==if (!(pr_buf->th_doacross_info[1] == (kmp_int64)&sh_buf
->doacross_num_done)) { __kmp_debug_assert("pr_buf->th_doacross_info[1] == (kmp_int64)&sh_buf->doacross_num_done"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 4085); }
4085 (kmp_int64)&sh_buf->doacross_num_done)if (!(pr_buf->th_doacross_info[1] == (kmp_int64)&sh_buf
->doacross_num_done)) { __kmp_debug_assert("pr_buf->th_doacross_info[1] == (kmp_int64)&sh_buf->doacross_num_done"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 4085); }
;
4086 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done)if (!(num_done == sh_buf->doacross_num_done)) { __kmp_debug_assert
("num_done == sh_buf->doacross_num_done", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 4086); }
;
4087 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx)if (!(idx == sh_buf->doacross_buf_idx)) { __kmp_debug_assert
("idx == sh_buf->doacross_buf_idx", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 4087); }
;
4088 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags))___kmp_thread_free((th), (const_cast<kmp_uint32 *>(sh_buf
->doacross_flags)), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 4088)
;
4089 sh_buf->doacross_flags = NULL__null;
4090 sh_buf->doacross_num_done = 0;
4091 sh_buf->doacross_buf_idx +=
4092 __kmp_dispatch_num_buffers; // free buffer for future re-use
4093 }
4094 // free private resources (need to keep buffer index forever)
4095 pr_buf->th_doacross_flags = NULL__null;
4096 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info)___kmp_thread_free((th), ((void *)pr_buf->th_doacross_info
), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_csupport.cpp"
, 4096)
;
4097 pr_buf->th_doacross_info = NULL__null;
4098 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_doacross_fini() exit: T#%d\n"
, gtid); }
;
4099}
4100#endif
4101
4102#if OMP_50_ENABLED(50 >= 50)
4103int __kmpc_get_target_offload(void) {
4104 if (!__kmp_init_serial) {
4105 __kmp_serial_initialize();
4106 }
4107 return __kmp_target_offload;
4108}
4109#endif // OMP_50_ENABLED
4110
4111// end of file //