Bug Summary

File:build/source/openmp/runtime/src/kmp_runtime.cpp
Warning:line 1952, column 9
1st function call argument is an uninitialized value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name kmp_runtime.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-17/lib/clang/17 -D _DEBUG -D _GLIBCXX_ASSERTIONS -D _GNU_SOURCE -D _LIBCPP_ENABLE_ASSERTIONS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D omp_EXPORTS -I projects/openmp/runtime/src -I /build/source/openmp/runtime/src -I include -I /build/source/llvm/include -I /build/source/openmp/runtime/src/i18n -I /build/source/openmp/runtime/src/include -I /build/source/openmp/runtime/src/thirdparty/ittnotify -D _FORTIFY_SOURCE=2 -D NDEBUG -D _GNU_SOURCE -D _REENTRANT -D _FORTIFY_SOURCE=2 -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-17/lib/clang/17/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/source/= -source-date-epoch 1683717183 -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -Wno-extra -Wno-pedantic -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-frame-address -Wno-strict-aliasing -Wno-stringop-truncation -Wno-switch -Wno-uninitialized -Wno-cast-qual -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fno-rtti -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2023-05-10-133810-16478-1 -x c++ /build/source/openmp/runtime/src/kmp_runtime.cpp

/build/source/openmp/runtime/src/kmp_runtime.cpp

1/*
2 * kmp_runtime.cpp -- KPTS runtime support library
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#include "kmp.h"
14#include "kmp_affinity.h"
15#include "kmp_atomic.h"
16#include "kmp_environment.h"
17#include "kmp_error.h"
18#include "kmp_i18n.h"
19#include "kmp_io.h"
20#include "kmp_itt.h"
21#include "kmp_settings.h"
22#include "kmp_stats.h"
23#include "kmp_str.h"
24#include "kmp_wait_release.h"
25#include "kmp_wrapper_getpid.h"
26#include "kmp_dispatch.h"
27#if KMP_USE_HIER_SCHED0
28#include "kmp_dispatch_hier.h"
29#endif
30
31#if OMPT_SUPPORT1
32#include "ompt-specific.h"
33#endif
34#if OMPD_SUPPORT1
35#include "ompd-specific.h"
36#endif
37
38#if OMP_PROFILING_SUPPORT0
39#include "llvm/Support/TimeProfiler.h"
40static char *ProfileTraceFile = nullptr;
41#endif
42
43/* these are temporary issues to be dealt with */
44#define KMP_USE_PRCTL0 0
45
46#if KMP_OS_WINDOWS0
47#include <process.h>
48#endif
49
50#if KMP_OS_WINDOWS0
51// windows does not need include files as it doesn't use shared memory
52#else
53#include <sys/mman.h>
54#include <sys/stat.h>
55#include <fcntl.h>
56#define SHM_SIZE1024 1024
57#endif
58
59#if defined(KMP_GOMP_COMPAT)
60char const __kmp_version_alt_comp[] =
61 KMP_VERSION_PREFIX"\x00@(#) " "LLVM OMP " "alternative compiler support: yes";
62#endif /* defined(KMP_GOMP_COMPAT) */
63
64char const __kmp_version_omp_api[] =
65 KMP_VERSION_PREFIX"\x00@(#) " "LLVM OMP " "API version: 5.0 (201611)";
66
67#ifdef KMP_DEBUG1
68char const __kmp_version_lock[] =
69 KMP_VERSION_PREFIX"\x00@(#) " "LLVM OMP " "lock type: run time selectable";
70#endif /* KMP_DEBUG */
71
72#define KMP_MIN(x, y)((x) < (y) ? (x) : (y)) ((x) < (y) ? (x) : (y))
73
74/* ------------------------------------------------------------------------ */
75
76#if KMP_USE_MONITOR
77kmp_info_t __kmp_monitor;
78#endif
79
80/* Forward declarations */
81
82void __kmp_cleanup(void);
83
84static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *, int tid,
85 int gtid);
86static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
87 kmp_internal_control_t *new_icvs,
88 ident_t *loc);
89#if KMP_AFFINITY_SUPPORTED1
90static void __kmp_partition_places(kmp_team_t *team,
91 int update_master_only = 0);
92#endif
93static void __kmp_do_serial_initialize(void);
94void __kmp_fork_barrier(int gtid, int tid);
95void __kmp_join_barrier(int gtid);
96void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc,
97 kmp_internal_control_t *new_icvs, ident_t *loc);
98
99#ifdef USE_LOAD_BALANCE1
100static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc);
101#endif
102
103static int __kmp_expand_threads(int nNeed);
104#if KMP_OS_WINDOWS0
105static int __kmp_unregister_root_other_thread(int gtid);
106#endif
107static void __kmp_reap_thread(kmp_info_t *thread, int is_root);
108kmp_info_t *__kmp_thread_pool_insert_pt = NULL__null;
109
110void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads,
111 int new_nthreads);
112void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads);
113
114/* Calculate the identifier of the current thread */
115/* fast (and somewhat portable) way to get unique identifier of executing
116 thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */
117int __kmp_get_global_thread_id() {
118 int i;
119 kmp_info_t **other_threads;
120 size_t stack_data;
121 char *stack_addr;
122 size_t stack_size;
123 char *stack_base;
124
125 KA_TRACE(if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n"
, __kmp_nth, __kmp_all_nth); }
126 1000,if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n"
, __kmp_nth, __kmp_all_nth); }
127 ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n"
, __kmp_nth, __kmp_all_nth); }
128 __kmp_nth, __kmp_all_nth))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n"
, __kmp_nth, __kmp_all_nth); }
;
129
130 /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to
131 a parallel region, made it return KMP_GTID_DNE to force serial_initialize
132 by caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee
133 __kmp_init_gtid for this to work. */
134
135 if (!TCR_4(__kmp_init_gtid)(__kmp_init_gtid))
136 return KMP_GTID_DNE(-2);
137
138#ifdef KMP_TDATA_GTID1
139 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) >= 3) {
140 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using TDATA\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: using TDATA\n"
); }
;
141 return __kmp_gtid;
142 }
143#endif
144 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) >= 2) {
145 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using keyed TLS\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: using keyed TLS\n"
); }
;
146 return __kmp_gtid_get_specific();
147 }
148 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using internal alg.\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: using internal alg.\n"
); }
;
149
150 stack_addr = (char *)&stack_data;
151 other_threads = __kmp_threads;
152
153 /* ATT: The code below is a source of potential bugs due to unsynchronized
154 access to __kmp_threads array. For example:
155 1. Current thread loads other_threads[i] to thr and checks it, it is
156 non-NULL.
157 2. Current thread is suspended by OS.
158 3. Another thread unregisters and finishes (debug versions of free()
159 may fill memory with something like 0xEF).
160 4. Current thread is resumed.
161 5. Current thread reads junk from *thr.
162 TODO: Fix it. --ln */
163
164 for (i = 0; i < __kmp_threads_capacity; i++) {
165
166 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i])((void *)(other_threads[i]));
167 if (!thr)
168 continue;
169
170 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize)((void *)(thr->th.th_info.ds.ds_stacksize));
171 stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase)((void *)(thr->th.th_info.ds.ds_stackbase));
172
173 /* stack grows down -- search through all of the active threads */
174
175 if (stack_addr <= stack_base) {
176 size_t stack_diff = stack_base - stack_addr;
177
178 if (stack_diff <= stack_size) {
179 /* The only way we can be closer than the allocated */
180 /* stack size is if we are running on this thread. */
181 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i)if (!(__kmp_gtid_get_specific() == i)) { __kmp_debug_assert("__kmp_gtid_get_specific() == i"
, "openmp/runtime/src/kmp_runtime.cpp", 181); }
;
182 return i;
183 }
184 }
185 }
186
187 /* get specific to try and determine our gtid */
188 KA_TRACE(1000,if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: internal alg. failed to find "
"thread, using TLS\n"); }
189 ("*** __kmp_get_global_thread_id: internal alg. failed to find "if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: internal alg. failed to find "
"thread, using TLS\n"); }
190 "thread, using TLS\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: internal alg. failed to find "
"thread, using TLS\n"); }
;
191 i = __kmp_gtid_get_specific();
192
193 /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */
194
195 /* if we havn't been assigned a gtid, then return code */
196 if (i < 0)
197 return i;
198
199 /* dynamically updated stack window for uber threads to avoid get_specific
200 call */
201 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
202 KMP_FATAL(StackOverflow, i)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_StackOverflow, i), __kmp_msg_null
)
;
203 }
204
205 stack_base = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
206 if (stack_addr > stack_base) {
207 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr)((other_threads[i]->th.th_info.ds.ds_stackbase)) = ((stack_addr
))
;
208 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,((other_threads[i]->th.th_info.ds.ds_stacksize)) = ((other_threads
[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base)
)
209 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -((other_threads[i]->th.th_info.ds.ds_stacksize)) = ((other_threads
[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base)
)
210 stack_base)((other_threads[i]->th.th_info.ds.ds_stacksize)) = ((other_threads
[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base)
)
;
211 } else {
212 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,((other_threads[i]->th.th_info.ds.ds_stacksize)) = ((stack_base
- stack_addr))
213 stack_base - stack_addr)((other_threads[i]->th.th_info.ds.ds_stacksize)) = ((stack_base
- stack_addr))
;
214 }
215
216 /* Reprint stack bounds for ubermaster since they have been refined */
217 if (__kmp_storage_map) {
218 char *stack_end = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
219 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
220 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
221 other_threads[i]->th.th_info.ds.ds_stacksize,
222 "th_%d stack (refinement)", i);
223 }
224 return i;
225}
226
227int __kmp_get_global_thread_id_reg() {
228 int gtid;
229
230 if (!__kmp_init_serial) {
231 gtid = KMP_GTID_DNE(-2);
232 } else
233#ifdef KMP_TDATA_GTID1
234 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) >= 3) {
235 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using TDATA\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id_reg: using TDATA\n"
); }
;
236 gtid = __kmp_gtid;
237 } else
238#endif
239 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) >= 2) {
240 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using keyed TLS\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id_reg: using keyed TLS\n"
); }
;
241 gtid = __kmp_gtid_get_specific();
242 } else {
243 KA_TRACE(1000,if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id_reg: using internal alg.\n"
); }
244 ("*** __kmp_get_global_thread_id_reg: using internal alg.\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id_reg: using internal alg.\n"
); }
;
245 gtid = __kmp_get_global_thread_id();
246 }
247
248 /* we must be a new uber master sibling thread */
249 if (gtid == KMP_GTID_DNE(-2)) {
250 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_get_global_thread_id_reg: Encountered new root thread. "
"Registering a new gtid.\n"); }
251 ("__kmp_get_global_thread_id_reg: Encountered new root thread. "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_get_global_thread_id_reg: Encountered new root thread. "
"Registering a new gtid.\n"); }
252 "Registering a new gtid.\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_get_global_thread_id_reg: Encountered new root thread. "
"Registering a new gtid.\n"); }
;
253 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
254 if (!__kmp_init_serial) {
255 __kmp_do_serial_initialize();
256 gtid = __kmp_gtid_get_specific();
257 } else {
258 gtid = __kmp_register_root(FALSE0);
259 }
260 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
261 /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */
262 }
263
264 KMP_DEBUG_ASSERT(gtid >= 0)if (!(gtid >= 0)) { __kmp_debug_assert("gtid >= 0", "openmp/runtime/src/kmp_runtime.cpp"
, 264); }
;
265
266 return gtid;
267}
268
269/* caller must hold forkjoin_lock */
270void __kmp_check_stack_overlap(kmp_info_t *th) {
271 int f;
272 char *stack_beg = NULL__null;
273 char *stack_end = NULL__null;
274 int gtid;
275
276 KA_TRACE(10, ("__kmp_check_stack_overlap: called\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_check_stack_overlap: called\n"
); }
;
277 if (__kmp_storage_map) {
278 stack_end = (char *)th->th.th_info.ds.ds_stackbase;
279 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
280
281 gtid = __kmp_gtid_from_thread(th);
282
283 if (gtid == KMP_GTID_MONITOR(-4)) {
284 __kmp_print_storage_map_gtid(
285 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
286 "th_%s stack (%s)", "mon",
287 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
288 } else {
289 __kmp_print_storage_map_gtid(
290 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
291 "th_%d stack (%s)", gtid,
292 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
293 }
294 }
295
296 /* No point in checking ubermaster threads since they use refinement and
297 * cannot overlap */
298 gtid = __kmp_gtid_from_thread(th);
299 if (__kmp_env_checks == TRUE(!0) && !KMP_UBER_GTID(gtid)) {
300 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_check_stack_overlap: performing extensive checking\n"
); }
301 ("__kmp_check_stack_overlap: performing extensive checking\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_check_stack_overlap: performing extensive checking\n"
); }
;
302 if (stack_beg == NULL__null) {
303 stack_end = (char *)th->th.th_info.ds.ds_stackbase;
304 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
305 }
306
307 for (f = 0; f < __kmp_threads_capacity; f++) {
308 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f])((void *)(__kmp_threads[f]));
309
310 if (f_th && f_th != th) {
311 char *other_stack_end =
312 (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase)((void *)(f_th->th.th_info.ds.ds_stackbase));
313 char *other_stack_beg =
314 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize)((void *)(f_th->th.th_info.ds.ds_stacksize));
315 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
316 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
317
318 /* Print the other stack values before the abort */
319 if (__kmp_storage_map)
320 __kmp_print_storage_map_gtid(
321 -1, other_stack_beg, other_stack_end,
322 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize)((void *)(f_th->th.th_info.ds.ds_stacksize)),
323 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
324
325 __kmp_fatal(KMP_MSG(StackOverlap)__kmp_msg_format(kmp_i18n_msg_StackOverlap), KMP_HNT(ChangeStackLimit)__kmp_msg_format(kmp_i18n_hnt_ChangeStackLimit),
326 __kmp_msg_null);
327 }
328 }
329 }
330 }
331 KA_TRACE(10, ("__kmp_check_stack_overlap: returning\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_check_stack_overlap: returning\n"
); }
;
332}
333
334/* ------------------------------------------------------------------------ */
335
336void __kmp_infinite_loop(void) {
337 static int done = FALSE0;
338
339 while (!done) {
340 KMP_YIELD(TRUE){ __kmp_x86_pause(); if (((!0)) && (((__kmp_use_yield
== 1) || (__kmp_use_yield == 2 && (((__kmp_nth) >
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))))))) __kmp_yield
(); }
;
341 }
342}
343
344#define MAX_MESSAGE512 512
345
346void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, size_t size,
347 char const *format, ...) {
348 char buffer[MAX_MESSAGE512];
349 va_list ap;
350
351 va_start(ap, format)__builtin_va_start(ap, format);
352 KMP_SNPRINTFsnprintf(buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1,
353 p2, (unsigned long)size, format);
354 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
355 __kmp_vprintf(kmp_err, buffer, ap);
356#if KMP_PRINT_DATA_PLACEMENT
357 int node;
358 if (gtid >= 0) {
359 if (p1 <= p2 && (char *)p2 - (char *)p1 == size) {
360 if (__kmp_storage_map_verbose) {
361 node = __kmp_get_host_node(p1);
362 if (node < 0) /* doesn't work, so don't try this next time */
363 __kmp_storage_map_verbose = FALSE0;
364 else {
365 char *last;
366 int lastNode;
367 int localProc = __kmp_get_cpu_from_gtid(gtid);
368
369 const int page_size = KMP_GET_PAGE_SIZE()getpagesize();
370
371 p1 = (void *)((size_t)p1 & ~((size_t)page_size - 1));
372 p2 = (void *)(((size_t)p2 - 1) & ~((size_t)page_size - 1));
373 if (localProc >= 0)
374 __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid,
375 localProc >> 1);
376 else
377 __kmp_printf_no_lock(" GTID %d\n", gtid);
378#if KMP_USE_PRCTL0
379 /* The more elaborate format is disabled for now because of the prctl
380 * hanging bug. */
381 do {
382 last = p1;
383 lastNode = node;
384 /* This loop collates adjacent pages with the same host node. */
385 do {
386 (char *)p1 += page_size;
387 } while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
388 __kmp_printf_no_lock(" %p-%p memNode %d\n", last, (char *)p1 - 1,
389 lastNode);
390 } while (p1 <= p2);
391#else
392 __kmp_printf_no_lock(" %p-%p memNode %d\n", p1,
393 (char *)p1 + (page_size - 1),
394 __kmp_get_host_node(p1));
395 if (p1 < p2) {
396 __kmp_printf_no_lock(" %p-%p memNode %d\n", p2,
397 (char *)p2 + (page_size - 1),
398 __kmp_get_host_node(p2));
399 }
400#endif
401 }
402 }
403 } else
404 __kmp_printf_no_lock(" %s\n", KMP_I18N_STR(StorageMapWarning)__kmp_i18n_catgets(kmp_i18n_str_StorageMapWarning));
405 }
406#endif /* KMP_PRINT_DATA_PLACEMENT */
407 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
408}
409
410void __kmp_warn(char const *format, ...) {
411 char buffer[MAX_MESSAGE512];
412 va_list ap;
413
414 if (__kmp_generate_warnings == kmp_warnings_off) {
415 return;
416 }
417
418 va_start(ap, format)__builtin_va_start(ap, format);
419
420 KMP_SNPRINTFsnprintf(buffer, sizeof(buffer), "OMP warning: %s\n", format);
421 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
422 __kmp_vprintf(kmp_err, buffer, ap);
423 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
424
425 va_end(ap)__builtin_va_end(ap);
426}
427
428void __kmp_abort_process() {
429 // Later threads may stall here, but that's ok because abort() will kill them.
430 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
431
432 if (__kmp_debug_buf) {
433 __kmp_dump_debug_buffer();
434 }
435
436 if (KMP_OS_WINDOWS0) {
437 // Let other threads know of abnormal termination and prevent deadlock
438 // if abort happened during library initialization or shutdown
439 __kmp_global.g.g_abort = SIGABRT6;
440
441 /* On Windows* OS by default abort() causes pop-up error box, which stalls
442 nightly testing. Unfortunately, we cannot reliably suppress pop-up error
443 boxes. _set_abort_behavior() works well, but this function is not
444 available in VS7 (this is not problem for DLL, but it is a problem for
445 static OpenMP RTL). SetErrorMode (and so, timelimit utility) does not
446 help, at least in some versions of MS C RTL.
447
448 It seems following sequence is the only way to simulate abort() and
449 avoid pop-up error box. */
450 raise(SIGABRT6);
451 _exit(3); // Just in case, if signal ignored, exit anyway.
452 } else {
453 __kmp_unregister_library();
454 abort();
455 }
456
457 __kmp_infinite_loop();
458 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
459
460} // __kmp_abort_process
461
462void __kmp_abort_thread(void) {
463 // TODO: Eliminate g_abort global variable and this function.
464 // In case of abort just call abort(), it will kill all the threads.
465 __kmp_infinite_loop();
466} // __kmp_abort_thread
467
468/* Print out the storage map for the major kmp_info_t thread data structures
469 that are allocated together. */
470
471static void __kmp_print_thread_storage_map(kmp_info_t *thr, int gtid) {
472 __kmp_print_storage_map_gtid(gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d",
473 gtid);
474
475 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
476 sizeof(kmp_desc_t), "th_%d.th_info", gtid);
477
478 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
479 sizeof(kmp_local_t), "th_%d.th_local", gtid);
480
481 __kmp_print_storage_map_gtid(
482 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
483 sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid);
484
485 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
486 &thr->th.th_bar[bs_plain_barrier + 1],
487 sizeof(kmp_balign_t), "th_%d.th_bar[plain]",
488 gtid);
489
490 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
491 &thr->th.th_bar[bs_forkjoin_barrier + 1],
492 sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]",
493 gtid);
494
495#if KMP_FAST_REDUCTION_BARRIER1
496 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
497 &thr->th.th_bar[bs_reduction_barrier + 1],
498 sizeof(kmp_balign_t), "th_%d.th_bar[reduction]",
499 gtid);
500#endif // KMP_FAST_REDUCTION_BARRIER
501}
502
503/* Print out the storage map for the major kmp_team_t team data structures
504 that are allocated together. */
505
506static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team,
507 int team_id, int num_thr) {
508 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
509 __kmp_print_storage_map_gtid(-1, team, team + 1, sizeof(kmp_team_t), "%s_%d",
510 header, team_id);
511
512 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
513 &team->t.t_bar[bs_last_barrier],
514 sizeof(kmp_balign_team_t) * bs_last_barrier,
515 "%s_%d.t_bar", header, team_id);
516
517 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
518 &team->t.t_bar[bs_plain_barrier + 1],
519 sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]",
520 header, team_id);
521
522 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
523 &team->t.t_bar[bs_forkjoin_barrier + 1],
524 sizeof(kmp_balign_team_t),
525 "%s_%d.t_bar[forkjoin]", header, team_id);
526
527#if KMP_FAST_REDUCTION_BARRIER1
528 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
529 &team->t.t_bar[bs_reduction_barrier + 1],
530 sizeof(kmp_balign_team_t),
531 "%s_%d.t_bar[reduction]", header, team_id);
532#endif // KMP_FAST_REDUCTION_BARRIER
533
534 __kmp_print_storage_map_gtid(
535 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
536 sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id);
537
538 __kmp_print_storage_map_gtid(
539 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
540 sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id);
541
542 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
543 &team->t.t_disp_buffer[num_disp_buff],
544 sizeof(dispatch_shared_info_t) * num_disp_buff,
545 "%s_%d.t_disp_buffer", header, team_id);
546}
547
548static void __kmp_init_allocator() {
549 __kmp_init_memkind();
550 __kmp_init_target_mem();
551}
552static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
553
554/* ------------------------------------------------------------------------ */
555
556#if ENABLE_LIBOMPTARGET1
557static void __kmp_init_omptarget() {
558 __kmp_init_target_task();
559}
560#endif
561
562/* ------------------------------------------------------------------------ */
563
564#if KMP_DYNAMIC_LIB1
565#if KMP_OS_WINDOWS0
566
567BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
568 //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
569
570 switch (fdwReason) {
571
572 case DLL_PROCESS_ATTACH:
573 KA_TRACE(10, ("DllMain: PROCESS_ATTACH\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("DllMain: PROCESS_ATTACH\n"
); }
;
574
575 return TRUE(!0);
576
577 case DLL_PROCESS_DETACH:
578 KA_TRACE(10, ("DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()))if (kmp_a_debug >= 10) { __kmp_debug_printf ("DllMain: PROCESS_DETACH T#%d\n"
, __kmp_gtid_get_specific()); }
;
579
580 // According to Windows* documentation for DllMain entry point:
581 // for DLL_PROCESS_DETACH, lpReserved is used for telling the difference:
582 // lpReserved == NULL when FreeLibrary() is called,
583 // lpReserved != NULL when the process is terminated.
584 // When FreeLibrary() is called, worker threads remain alive. So the
585 // runtime's state is consistent and executing proper shutdown is OK.
586 // When the process is terminated, worker threads have exited or been
587 // forcefully terminated by the OS and only the shutdown thread remains.
588 // This can leave the runtime in an inconsistent state.
589 // Hence, only attempt proper cleanup when FreeLibrary() is called.
590 // Otherwise, rely on OS to reclaim resources.
591 if (lpReserved == NULL__null)
592 __kmp_internal_end_library(__kmp_gtid_get_specific());
593
594 return TRUE(!0);
595
596 case DLL_THREAD_ATTACH:
597 KA_TRACE(10, ("DllMain: THREAD_ATTACH\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("DllMain: THREAD_ATTACH\n"
); }
;
598
599 /* if we want to register new siblings all the time here call
600 * __kmp_get_gtid(); */
601 return TRUE(!0);
602
603 case DLL_THREAD_DETACH:
604 KA_TRACE(10, ("DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()))if (kmp_a_debug >= 10) { __kmp_debug_printf ("DllMain: THREAD_DETACH T#%d\n"
, __kmp_gtid_get_specific()); }
;
605
606 __kmp_internal_end_thread(__kmp_gtid_get_specific());
607 return TRUE(!0);
608 }
609
610 return TRUE(!0);
611}
612
613#endif /* KMP_OS_WINDOWS */
614#endif /* KMP_DYNAMIC_LIB */
615
616/* __kmp_parallel_deo -- Wait until it's our turn. */
617void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
618 int gtid = *gtid_ref;
619#ifdef BUILD_PARALLEL_ORDERED1
620 kmp_team_t *team = __kmp_team_from_gtid(gtid);
621#endif /* BUILD_PARALLEL_ORDERED */
622
623 if (__kmp_env_consistency_check) {
624 if (__kmp_threads[gtid]->th.th_root->r.r_active)
625#if KMP_USE_DYNAMIC_LOCK1
626 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL__null, 0);
627#else
628 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL__null);
629#endif
630 }
631#ifdef BUILD_PARALLEL_ORDERED1
632 if (!team->t.t_serialized) {
633 KMP_MB();
634 KMP_WAIT__kmp_wait_4(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ__kmp_eq_4,
635 NULL__null);
636 KMP_MB();
637 }
638#endif /* BUILD_PARALLEL_ORDERED */
639}
640
641/* __kmp_parallel_dxo -- Signal the next task. */
642void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
643 int gtid = *gtid_ref;
644#ifdef BUILD_PARALLEL_ORDERED1
645 int tid = __kmp_tid_from_gtid(gtid);
646 kmp_team_t *team = __kmp_team_from_gtid(gtid);
647#endif /* BUILD_PARALLEL_ORDERED */
648
649 if (__kmp_env_consistency_check) {
650 if (__kmp_threads[gtid]->th.th_root->r.r_active)
651 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
652 }
653#ifdef BUILD_PARALLEL_ORDERED1
654 if (!team->t.t_serialized) {
655 KMP_MB(); /* Flush all pending memory write invalidates. */
656
657 /* use the tid of the next thread in this team */
658 /* TODO replace with general release procedure */
659 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
660
661 KMP_MB(); /* Flush all pending memory write invalidates. */
662 }
663#endif /* BUILD_PARALLEL_ORDERED */
664}
665
666/* ------------------------------------------------------------------------ */
667/* The BARRIER for a SINGLE process section is always explicit */
668
669int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws) {
670 int status;
671 kmp_info_t *th;
672 kmp_team_t *team;
673
674 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
675 __kmp_parallel_initialize();
676 __kmp_resume_if_soft_paused();
677
678 th = __kmp_threads[gtid];
679 team = th->th.th_team;
680 status = 0;
681
682 th->th.th_ident = id_ref;
683
684 if (team->t.t_serialized) {
685 status = 1;
686 } else {
687 kmp_int32 old_this = th->th.th_local.this_construct;
688
689 ++th->th.th_local.this_construct;
690 /* try to set team count to thread count--success means thread got the
691 single block */
692 /* TODO: Should this be acquire or release? */
693 if (team->t.t_construct == old_this) {
694 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
695 th->th.th_local.this_construct);
696 }
697#if USE_ITT_BUILD1
698 if (__itt_metadata_add_ptr__kmp_itt_metadata_add_ptr__3_0 && __kmp_forkjoin_frames_mode == 3 &&
699 KMP_MASTER_GTID(gtid)(0 == __kmp_tid_from_gtid((gtid))) && th->th.th_teams_microtask == NULL__null &&
700 team->t.t_active_level == 1) {
701 // Only report metadata by primary thread of active team at level 1
702 __kmp_itt_metadata_single(id_ref);
703 }
704#endif /* USE_ITT_BUILD */
705 }
706
707 if (__kmp_env_consistency_check) {
708 if (status && push_ws) {
709 __kmp_push_workshare(gtid, ct_psingle, id_ref);
710 } else {
711 __kmp_check_workshare(gtid, ct_psingle, id_ref);
712 }
713 }
714#if USE_ITT_BUILD1
715 if (status) {
716 __kmp_itt_single_start(gtid);
717 }
718#endif /* USE_ITT_BUILD */
719 return status;
720}
721
722void __kmp_exit_single(int gtid) {
723#if USE_ITT_BUILD1
724 __kmp_itt_single_end(gtid);
725#endif /* USE_ITT_BUILD */
726 if (__kmp_env_consistency_check)
727 __kmp_pop_workshare(gtid, ct_psingle, NULL__null);
728}
729
730/* determine if we can go parallel or must use a serialized parallel region and
731 * how many threads we can use
732 * set_nproc is the number of threads requested for the team
733 * returns 0 if we should serialize or only use one thread,
734 * otherwise the number of threads to use
735 * The forkjoin lock is held by the caller. */
736static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
737 int master_tid, int set_nthreads,
738 int enter_teams) {
739 int capacity;
740 int new_nthreads;
741 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 741); }
;
742 KMP_DEBUG_ASSERT(root && parent_team)if (!(root && parent_team)) { __kmp_debug_assert("root && parent_team"
, "openmp/runtime/src/kmp_runtime.cpp", 742); }
;
743 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
744
745 // If dyn-var is set, dynamically adjust the number of desired threads,
746 // according to the method specified by dynamic_mode.
747 new_nthreads = set_nthreads;
748 if (!get__dynamic_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.dynamic)
) {
749 ;
750 }
751#ifdef USE_LOAD_BALANCE1
752 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
753 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
754 if (new_nthreads == 1) {
755 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d load balance reduced "
"reservation to 1 thread\n", master_tid); }
756 "reservation to 1 thread\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d load balance reduced "
"reservation to 1 thread\n", master_tid); }
757 master_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d load balance reduced "
"reservation to 1 thread\n", master_tid); }
;
758 return 1;
759 }
760 if (new_nthreads < set_nthreads) {
761 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d load balance reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
762 "reservation to %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d load balance reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
763 master_tid, new_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d load balance reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
;
764 }
765 }
766#endif /* USE_LOAD_BALANCE */
767 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
768 new_nthreads = __kmp_avail_proc - __kmp_nth +
769 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
770 if (new_nthreads <= 1) {
771 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d thread limit reduced "
"reservation to 1 thread\n", master_tid); }
772 "reservation to 1 thread\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d thread limit reduced "
"reservation to 1 thread\n", master_tid); }
773 master_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d thread limit reduced "
"reservation to 1 thread\n", master_tid); }
;
774 return 1;
775 }
776 if (new_nthreads < set_nthreads) {
777 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d thread limit reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
778 "reservation to %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d thread limit reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
779 master_tid, new_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d thread limit reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
;
780 } else {
781 new_nthreads = set_nthreads;
782 }
783 } else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
784 if (set_nthreads > 2) {
785 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
786 new_nthreads = (new_nthreads % set_nthreads) + 1;
787 if (new_nthreads == 1) {
788 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d dynamic random reduced "
"reservation to 1 thread\n", master_tid); }
789 "reservation to 1 thread\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d dynamic random reduced "
"reservation to 1 thread\n", master_tid); }
790 master_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d dynamic random reduced "
"reservation to 1 thread\n", master_tid); }
;
791 return 1;
792 }
793 if (new_nthreads < set_nthreads) {
794 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d dynamic random reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
795 "reservation to %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d dynamic random reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
796 master_tid, new_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d dynamic random reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
;
797 }
798 }
799 } else {
800 KMP_ASSERT(0)if (!(0)) { __kmp_debug_assert("0", "openmp/runtime/src/kmp_runtime.cpp"
, 800); }
;
801 }
802
803 // Respect KMP_ALL_THREADS/KMP_DEVICE_THREAD_LIMIT.
804 if (__kmp_nth + new_nthreads -
805 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
806 __kmp_max_nth) {
807 int tl_nthreads = __kmp_max_nth - __kmp_nth +
808 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
809 if (tl_nthreads <= 0) {
810 tl_nthreads = 1;
811 }
812
813 // If dyn-var is false, emit a 1-time warning.
814 if (!get__dynamic_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.dynamic)
&& (!__kmp_reserve_warn)) {
815 __kmp_reserve_warn = 1;
816 __kmp_msg(kmp_ms_warning,
817 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, set_nthreads, tl_nthreads
)
,
818 KMP_HNT(Unset_ALL_THREADS)__kmp_msg_format(kmp_i18n_hnt_Unset_ALL_THREADS), __kmp_msg_null);
819 }
820 if (tl_nthreads == 1) {
821 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
"reduced reservation to 1 thread\n", master_tid); }
822 "reduced reservation to 1 thread\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
"reduced reservation to 1 thread\n", master_tid); }
823 master_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
"reduced reservation to 1 thread\n", master_tid); }
;
824 return 1;
825 }
826 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
"reservation to %d threads\n", master_tid, tl_nthreads); }
827 "reservation to %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
"reservation to %d threads\n", master_tid, tl_nthreads); }
828 master_tid, tl_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
"reservation to %d threads\n", master_tid, tl_nthreads); }
;
829 new_nthreads = tl_nthreads;
830 }
831
832 // Respect OMP_THREAD_LIMIT
833 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
834 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
835 if (cg_nthreads + new_nthreads -
836 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
837 max_cg_threads) {
838 int tl_nthreads = max_cg_threads - cg_nthreads +
839 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
840 if (tl_nthreads <= 0) {
841 tl_nthreads = 1;
842 }
843
844 // If dyn-var is false, emit a 1-time warning.
845 if (!get__dynamic_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.dynamic)
&& (!__kmp_reserve_warn)) {
846 __kmp_reserve_warn = 1;
847 __kmp_msg(kmp_ms_warning,
848 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, set_nthreads, tl_nthreads
)
,
849 KMP_HNT(Unset_ALL_THREADS)__kmp_msg_format(kmp_i18n_hnt_Unset_ALL_THREADS), __kmp_msg_null);
850 }
851 if (tl_nthreads == 1) {
852 KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
"reduced reservation to 1 thread\n", master_tid); }
853 "reduced reservation to 1 thread\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
"reduced reservation to 1 thread\n", master_tid); }
854 master_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
"reduced reservation to 1 thread\n", master_tid); }
;
855 return 1;
856 }
857 KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
"reservation to %d threads\n", master_tid, tl_nthreads); }
858 "reservation to %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
"reservation to %d threads\n", master_tid, tl_nthreads); }
859 master_tid, tl_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
"reservation to %d threads\n", master_tid, tl_nthreads); }
;
860 new_nthreads = tl_nthreads;
861 }
862
863 // Check if the threads array is large enough, or needs expanding.
864 // See comment in __kmp_register_root() about the adjustment if
865 // __kmp_threads[0] == NULL.
866 capacity = __kmp_threads_capacity;
867 if (TCR_PTR(__kmp_threads[0])((void *)(__kmp_threads[0])) == NULL__null) {
868 --capacity;
869 }
870 // If it is not for initializing the hidden helper team, we need to take
871 // __kmp_hidden_helper_threads_num out of the capacity because it is included
872 // in __kmp_threads_capacity.
873 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)(__kmp_init_hidden_helper_threads)) {
874 capacity -= __kmp_hidden_helper_threads_num;
875 }
876 if (__kmp_nth + new_nthreads -
877 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
878 capacity) {
879 // Expand the threads array.
880 int slotsRequired = __kmp_nth + new_nthreads -
881 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
882 capacity;
883 int slotsAdded = __kmp_expand_threads(slotsRequired);
884 if (slotsAdded < slotsRequired) {
885 // The threads array was not expanded enough.
886 new_nthreads -= (slotsRequired - slotsAdded);
887 KMP_ASSERT(new_nthreads >= 1)if (!(new_nthreads >= 1)) { __kmp_debug_assert("new_nthreads >= 1"
, "openmp/runtime/src/kmp_runtime.cpp", 887); }
;
888
889 // If dyn-var is false, emit a 1-time warning.
890 if (!get__dynamic_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.dynamic)
&& (!__kmp_reserve_warn)) {
891 __kmp_reserve_warn = 1;
892 if (__kmp_tp_cached) {
893 __kmp_msg(kmp_ms_warning,
894 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, set_nthreads, new_nthreads
)
,
895 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity)__kmp_msg_format(kmp_i18n_hnt_Set_ALL_THREADPRIVATE, __kmp_tp_capacity
)
,
896 KMP_HNT(PossibleSystemLimitOnThreads)__kmp_msg_format(kmp_i18n_hnt_PossibleSystemLimitOnThreads), __kmp_msg_null);
897 } else {
898 __kmp_msg(kmp_ms_warning,
899 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, set_nthreads, new_nthreads
)
,
900 KMP_HNT(SystemLimitOnThreads)__kmp_msg_format(kmp_i18n_hnt_SystemLimitOnThreads), __kmp_msg_null);
901 }
902 }
903 }
904 }
905
906#ifdef KMP_DEBUG1
907 if (new_nthreads == 1) {
908 KC_TRACE(10,if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d serializing team after reclaiming "
"dead roots and rechecking; requested %d threads\n", __kmp_get_global_thread_id
(), set_nthreads); }
909 ("__kmp_reserve_threads: T#%d serializing team after reclaiming "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d serializing team after reclaiming "
"dead roots and rechecking; requested %d threads\n", __kmp_get_global_thread_id
(), set_nthreads); }
910 "dead roots and rechecking; requested %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d serializing team after reclaiming "
"dead roots and rechecking; requested %d threads\n", __kmp_get_global_thread_id
(), set_nthreads); }
911 __kmp_get_gtid(), set_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d serializing team after reclaiming "
"dead roots and rechecking; requested %d threads\n", __kmp_get_global_thread_id
(), set_nthreads); }
;
912 } else {
913 KC_TRACE(10, ("__kmp_reserve_threads: T#%d allocating %d threads; requested"if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d allocating %d threads; requested"
" %d threads\n", __kmp_get_global_thread_id(), new_nthreads,
set_nthreads); }
914 " %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d allocating %d threads; requested"
" %d threads\n", __kmp_get_global_thread_id(), new_nthreads,
set_nthreads); }
915 __kmp_get_gtid(), new_nthreads, set_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d allocating %d threads; requested"
" %d threads\n", __kmp_get_global_thread_id(), new_nthreads,
set_nthreads); }
;
916 }
917#endif // KMP_DEBUG
918 return new_nthreads;
919}
920
921/* Allocate threads from the thread pool and assign them to the new team. We are
922 assured that there are enough threads available, because we checked on that
923 earlier within critical section forkjoin */
924static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
925 kmp_info_t *master_th, int master_gtid,
926 int fork_teams_workers) {
927 int i;
928 int use_hot_team;
929
930 KA_TRACE(10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_team_threads: new_nprocs = %d\n"
, team->t.t_nproc); }
;
931 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid())if (!(master_gtid == __kmp_get_global_thread_id())) { __kmp_debug_assert
("master_gtid == __kmp_get_global_thread_id()", "openmp/runtime/src/kmp_runtime.cpp"
, 931); }
;
932 KMP_MB();
933
934 /* first, let's setup the primary thread */
935 master_th->th.th_info.ds.ds_tid = 0;
936 master_th->th.th_team = team;
937 master_th->th.th_team_nproc = team->t.t_nproc;
938 master_th->th.th_team_master = master_th;
939 master_th->th.th_team_serialized = FALSE0;
940 master_th->th.th_dispatch = &team->t.t_dispatch[0];
941
942/* make sure we are not the optimized hot team */
943#if KMP_NESTED_HOT_TEAMS1
944 use_hot_team = 0;
945 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
946 if (hot_teams) { // hot teams array is not allocated if
947 // KMP_HOT_TEAMS_MAX_LEVEL=0
948 int level = team->t.t_active_level - 1; // index in array of hot teams
949 if (master_th->th.th_teams_microtask) { // are we inside the teams?
950 if (master_th->th.th_teams_size.nteams > 1) {
951 ++level; // level was not increased in teams construct for
952 // team_of_masters
953 }
954 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
955 master_th->th.th_teams_level == team->t.t_level) {
956 ++level; // level was not increased in teams construct for
957 // team_of_workers before the parallel
958 } // team->t.t_level will be increased inside parallel
959 }
960 if (level < __kmp_hot_teams_max_level) {
961 if (hot_teams[level].hot_team) {
962 // hot team has already been allocated for given level
963 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team)if (!(hot_teams[level].hot_team == team)) { __kmp_debug_assert
("hot_teams[level].hot_team == team", "openmp/runtime/src/kmp_runtime.cpp"
, 963); }
;
964 use_hot_team = 1; // the team is ready to use
965 } else {
966 use_hot_team = 0; // AC: threads are not allocated yet
967 hot_teams[level].hot_team = team; // remember new hot team
968 hot_teams[level].hot_team_nth = team->t.t_nproc;
969 }
970 } else {
971 use_hot_team = 0;
972 }
973 }
974#else
975 use_hot_team = team == root->r.r_hot_team;
976#endif
977 if (!use_hot_team) {
978
979 /* install the primary thread */
980 team->t.t_threads[0] = master_th;
981 __kmp_initialize_info(master_th, team, 0, master_gtid);
982
983 /* now, install the worker threads */
984 for (i = 1; i < team->t.t_nproc; i++) {
985
986 /* fork or reallocate a new thread and install it in team */
987 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
988 team->t.t_threads[i] = thr;
989 KMP_DEBUG_ASSERT(thr)if (!(thr)) { __kmp_debug_assert("thr", "openmp/runtime/src/kmp_runtime.cpp"
, 989); }
;
990 KMP_DEBUG_ASSERT(thr->th.th_team == team)if (!(thr->th.th_team == team)) { __kmp_debug_assert("thr->th.th_team == team"
, "openmp/runtime/src/kmp_runtime.cpp", 990); }
;
991 /* align team and thread arrived states */
992 KA_TRACE(20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
"T#%d(%d:%d) join =%llu, plain=%llu\n", __kmp_gtid_from_tid(
0, team), team->t.t_id, 0, __kmp_gtid_from_tid(i, team), team
->t.t_id, i, team->t.t_bar[bs_forkjoin_barrier].b_arrived
, team->t.t_bar[bs_plain_barrier].b_arrived); }
993 "T#%d(%d:%d) join =%llu, plain=%llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
"T#%d(%d:%d) join =%llu, plain=%llu\n", __kmp_gtid_from_tid(
0, team), team->t.t_id, 0, __kmp_gtid_from_tid(i, team), team
->t.t_id, i, team->t.t_bar[bs_forkjoin_barrier].b_arrived
, team->t.t_bar[bs_plain_barrier].b_arrived); }
994 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
"T#%d(%d:%d) join =%llu, plain=%llu\n", __kmp_gtid_from_tid(
0, team), team->t.t_id, 0, __kmp_gtid_from_tid(i, team), team
->t.t_id, i, team->t.t_bar[bs_forkjoin_barrier].b_arrived
, team->t.t_bar[bs_plain_barrier].b_arrived); }
995 __kmp_gtid_from_tid(i, team), team->t.t_id, i,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
"T#%d(%d:%d) join =%llu, plain=%llu\n", __kmp_gtid_from_tid(
0, team), team->t.t_id, 0, __kmp_gtid_from_tid(i, team), team
->t.t_id, i, team->t.t_bar[bs_forkjoin_barrier].b_arrived
, team->t.t_bar[bs_plain_barrier].b_arrived); }
996 team->t.t_bar[bs_forkjoin_barrier].b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
"T#%d(%d:%d) join =%llu, plain=%llu\n", __kmp_gtid_from_tid(
0, team), team->t.t_id, 0, __kmp_gtid_from_tid(i, team), team
->t.t_id, i, team->t.t_bar[bs_forkjoin_barrier].b_arrived
, team->t.t_bar[bs_plain_barrier].b_arrived); }
997 team->t.t_bar[bs_plain_barrier].b_arrived))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
"T#%d(%d:%d) join =%llu, plain=%llu\n", __kmp_gtid_from_tid(
0, team), team->t.t_id, 0, __kmp_gtid_from_tid(i, team), team
->t.t_id, i, team->t.t_bar[bs_forkjoin_barrier].b_arrived
, team->t.t_bar[bs_plain_barrier].b_arrived); }
;
998 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
999 thr->th.th_teams_level = master_th->th.th_teams_level;
1000 thr->th.th_teams_size = master_th->th.th_teams_size;
1001 { // Initialize threads' barrier data.
1002 int b;
1003 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1004 for (b = 0; b < bs_last_barrier; ++b) {
1005 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1006 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG)if (!(balign[b].bb.wait_flag != 2)) { __kmp_debug_assert("balign[b].bb.wait_flag != 2"
, "openmp/runtime/src/kmp_runtime.cpp", 1006); }
;
1007#if USE_DEBUGGER0
1008 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1009#endif
1010 }
1011 }
1012 }
1013
1014#if KMP_AFFINITY_SUPPORTED1
1015 // Do not partition the places list for teams construct workers who
1016 // haven't actually been forked to do real work yet. This partitioning
1017 // will take place in the parallel region nested within the teams construct.
1018 if (!fork_teams_workers) {
1019 __kmp_partition_places(team);
1020 }
1021#endif
1022
1023 if (team->t.t_nproc > 1 &&
1024 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1025 team->t.b->update_num_threads(team->t.t_nproc);
1026 __kmp_add_threads_to_team(team, team->t.t_nproc);
1027 }
1028 }
1029
1030 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1031 for (i = 0; i < team->t.t_nproc; i++) {
1032 kmp_info_t *thr = team->t.t_threads[i];
1033 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1034 thr->th.th_prev_level != team->t.t_level) {
1035 team->t.t_display_affinity = 1;
1036 break;
1037 }
1038 }
1039 }
1040
1041 KMP_MB();
1042}
1043
1044#if KMP_ARCH_X860 || KMP_ARCH_X86_641
1045// Propagate any changes to the floating point control registers out to the team
1046// We try to avoid unnecessary writes to the relevant cache line in the team
1047// structure, so we don't make changes unless they are needed.
1048inline static void propagateFPControl(kmp_team_t *team) {
1049 if (__kmp_inherit_fp_control) {
1050 kmp_int16 x87_fpu_control_word;
1051 kmp_uint32 mxcsr;
1052
1053 // Get primary thread's values of FPU control flags (both X87 and vector)
1054 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1055 __kmp_store_mxcsr(&mxcsr);
1056 mxcsr &= KMP_X86_MXCSR_MASK0xffffffc0;
1057
1058 // There is no point looking at t_fp_control_saved here.
1059 // If it is TRUE, we still have to update the values if they are different
1060 // from those we now have. If it is FALSE we didn't save anything yet, but
1061 // our objective is the same. We have to ensure that the values in the team
1062 // are the same as those we have.
1063 // So, this code achieves what we need whether or not t_fp_control_saved is
1064 // true. By checking whether the value needs updating we avoid unnecessary
1065 // writes that would put the cache-line into a written state, causing all
1066 // threads in the team to have to read it again.
1067 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word)if ((team->t.t_x87_fpu_control_word) != (x87_fpu_control_word
)) (team->t.t_x87_fpu_control_word) = (x87_fpu_control_word
)
;
1068 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr)if ((team->t.t_mxcsr) != (mxcsr)) (team->t.t_mxcsr) = (
mxcsr)
;
1069 // Although we don't use this value, other code in the runtime wants to know
1070 // whether it should restore them. So we must ensure it is correct.
1071 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE)if ((team->t.t_fp_control_saved) != ((!0))) (team->t.t_fp_control_saved
) = ((!0))
;
1072 } else {
1073 // Similarly here. Don't write to this cache-line in the team structure
1074 // unless we have to.
1075 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE)if ((team->t.t_fp_control_saved) != (0)) (team->t.t_fp_control_saved
) = (0)
;
1076 }
1077}
1078
1079// Do the opposite, setting the hardware registers to the updated values from
1080// the team.
1081inline static void updateHWFPControl(kmp_team_t *team) {
1082 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1083 // Only reset the fp control regs if they have been changed in the team.
1084 // the parallel region that we are exiting.
1085 kmp_int16 x87_fpu_control_word;
1086 kmp_uint32 mxcsr;
1087 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1088 __kmp_store_mxcsr(&mxcsr);
1089 mxcsr &= KMP_X86_MXCSR_MASK0xffffffc0;
1090
1091 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1092 __kmp_clear_x87_fpu_status_word();
1093 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1094 }
1095
1096 if (team->t.t_mxcsr != mxcsr) {
1097 __kmp_load_mxcsr(&team->t.t_mxcsr);
1098 }
1099 }
1100}
1101#else
1102#define propagateFPControl(x) ((void)0)
1103#define updateHWFPControl(x) ((void)0)
1104#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1105
1106static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team,
1107 int realloc); // forward declaration
1108
1109/* Run a parallel region that has been serialized, so runs only in a team of the
1110 single primary thread. */
1111void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
1112 kmp_info_t *this_thr;
1113 kmp_team_t *serial_team;
1114
1115 KC_TRACE(10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: called by T#%d\n"
, global_tid); }
;
1116
1117 /* Skip all this code for autopar serialized loops since it results in
1118 unacceptable overhead */
1119 if (loc != NULL__null && (loc->flags & KMP_IDENT_AUTOPAR))
1120 return;
1121
1122 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
1123 __kmp_parallel_initialize();
1124 __kmp_resume_if_soft_paused();
1125
1126 this_thr = __kmp_threads[global_tid];
1127 serial_team = this_thr->th.th_serial_team;
1128
1129 /* utilize the serialized team held by this thread */
1130 KMP_DEBUG_ASSERT(serial_team)if (!(serial_team)) { __kmp_debug_assert("serial_team", "openmp/runtime/src/kmp_runtime.cpp"
, 1130); }
;
1131 KMP_MB();
1132
1133 if (__kmp_tasking_mode != tskm_immediate_exec) {
1134 KMP_DEBUG_ASSERT(if (!(this_thr->th.th_task_team == this_thr->th.th_team
->t.t_task_team[this_thr->th.th_task_state])) { __kmp_debug_assert
("this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 1136); }
1135 this_thr->th.th_task_team ==if (!(this_thr->th.th_task_team == this_thr->th.th_team
->t.t_task_team[this_thr->th.th_task_state])) { __kmp_debug_assert
("this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 1136); }
1136 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state])if (!(this_thr->th.th_task_team == this_thr->th.th_team
->t.t_task_team[this_thr->th.th_task_state])) { __kmp_debug_assert
("this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 1136); }
;
1137 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==if (!(serial_team->t.t_task_team[this_thr->th.th_task_state
] == __null)) { __kmp_debug_assert("serial_team->t.t_task_team[this_thr->th.th_task_state] == __null"
, "openmp/runtime/src/kmp_runtime.cpp", 1138); }
1138 NULL)if (!(serial_team->t.t_task_team[this_thr->th.th_task_state
] == __null)) { __kmp_debug_assert("serial_team->t.t_task_team[this_thr->th.th_task_state] == __null"
, "openmp/runtime/src/kmp_runtime.cpp", 1138); }
;
1139 KA_TRACE(20, ("__kmpc_serialized_parallel: T#%d pushing task_team %p / "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d pushing task_team %p / "
"team %p, new task_team = NULL\n", global_tid, this_thr->
th.th_task_team, this_thr->th.th_team); }
1140 "team %p, new task_team = NULL\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d pushing task_team %p / "
"team %p, new task_team = NULL\n", global_tid, this_thr->
th.th_task_team, this_thr->th.th_team); }
1141 global_tid, this_thr->th.th_task_team, this_thr->th.th_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d pushing task_team %p / "
"team %p, new task_team = NULL\n", global_tid, this_thr->
th.th_task_team, this_thr->th.th_team); }
;
1142 this_thr->th.th_task_team = NULL__null;
1143 }
1144
1145 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1146 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1147 proc_bind = proc_bind_false;
1148 } else if (proc_bind == proc_bind_default) {
1149 // No proc_bind clause was specified, so use the current value
1150 // of proc-bind-var for this parallel region.
1151 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1152 }
1153 // Reset for next parallel region
1154 this_thr->th.th_set_proc_bind = proc_bind_default;
1155
1156#if OMPT_SUPPORT1
1157 ompt_data_t ompt_parallel_data = ompt_data_none{0};
1158 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid)__ompt_load_return_address(global_tid);
1159 if (ompt_enabled.enabled &&
1160 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1161
1162 ompt_task_info_t *parent_task_info;
1163 parent_task_info = OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info));
1164
1165 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
1166 if (ompt_enabled.ompt_callback_parallel_begin) {
1167 int team_size = 1;
1168
1169 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)ompt_callback_parallel_begin_callback(
1170 &(parent_task_info->task_data), &(parent_task_info->frame),
1171 &ompt_parallel_data, team_size,
1172 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1173 }
1174 }
1175#endif // OMPT_SUPPORT
1176
1177 if (this_thr->th.th_team != serial_team) {
1178 // Nested level will be an index in the nested nthreads array
1179 int level = this_thr->th.th_team->t.t_level;
1180
1181 if (serial_team->t.t_serialized) {
1182 /* this serial team was already used
1183 TODO increase performance by making this locks more specific */
1184 kmp_team_t *new_team;
1185
1186 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1187
1188 new_team =
1189 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1190#if OMPT_SUPPORT1
1191 ompt_parallel_data,
1192#endif
1193 proc_bind, &this_thr->th.th_current_task->td_icvs,
1194 0 USE_NESTED_HOT_ARG(NULL), __null);
1195 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1196 KMP_ASSERT(new_team)if (!(new_team)) { __kmp_debug_assert("new_team", "openmp/runtime/src/kmp_runtime.cpp"
, 1196); }
;
1197
1198 /* setup new serialized team and install it */
1199 new_team->t.t_threads[0] = this_thr;
1200 new_team->t.t_parent = this_thr->th.th_team;
1201 serial_team = new_team;
1202 this_thr->th.th_serial_team = serial_team;
1203
1204 KF_TRACE(if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n"
, global_tid, serial_team); }
1205 10,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n"
, global_tid, serial_team); }
1206 ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n"
, global_tid, serial_team); }
1207 global_tid, serial_team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n"
, global_tid, serial_team); }
;
1208
1209 /* TODO the above breaks the requirement that if we run out of resources,
1210 then we can still guarantee that serialized teams are ok, since we may
1211 need to allocate a new one */
1212 } else {
1213 KF_TRACE(if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n"
, global_tid, serial_team); }
1214 10,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n"
, global_tid, serial_team); }
1215 ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n"
, global_tid, serial_team); }
1216 global_tid, serial_team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n"
, global_tid, serial_team); }
;
1217 }
1218
1219 /* we have to initialize this serial team */
1220 KMP_DEBUG_ASSERT(serial_team->t.t_threads)if (!(serial_team->t.t_threads)) { __kmp_debug_assert("serial_team->t.t_threads"
, "openmp/runtime/src/kmp_runtime.cpp", 1220); }
;
1221 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr)if (!(serial_team->t.t_threads[0] == this_thr)) { __kmp_debug_assert
("serial_team->t.t_threads[0] == this_thr", "openmp/runtime/src/kmp_runtime.cpp"
, 1221); }
;
1222 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team)if (!(this_thr->th.th_team != serial_team)) { __kmp_debug_assert
("this_thr->th.th_team != serial_team", "openmp/runtime/src/kmp_runtime.cpp"
, 1222); }
;
1223 serial_team->t.t_ident = loc;
1224 serial_team->t.t_serialized = 1;
1225 serial_team->t.t_nproc = 1;
1226 serial_team->t.t_parent = this_thr->th.th_team;
1227 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1228 this_thr->th.th_team = serial_team;
1229 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1230
1231 KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d curtask=%p\n"
, global_tid, this_thr->th.th_current_task); }
1232 this_thr->th.th_current_task))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d curtask=%p\n"
, global_tid, this_thr->th.th_current_task); }
;
1233 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1)if (!(this_thr->th.th_current_task->td_flags.executing ==
1)) { __kmp_debug_assert("this_thr->th.th_current_task->td_flags.executing == 1"
, "openmp/runtime/src/kmp_runtime.cpp", 1233); }
;
1234 this_thr->th.th_current_task->td_flags.executing = 0;
1235
1236 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1237
1238 /* TODO: GEH: do ICVs work for nested serialized teams? Don't we need an
1239 implicit task for each serialized task represented by
1240 team->t.t_serialized? */
1241 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1242 &this_thr->th.th_current_task->td_parent->td_icvs);
1243
1244 // Thread value exists in the nested nthreads array for the next nested
1245 // level
1246 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1247 this_thr->th.th_current_task->td_icvs.nproc =
1248 __kmp_nested_nth.nth[level + 1];
1249 }
1250
1251 if (__kmp_nested_proc_bind.used &&
1252 (level + 1 < __kmp_nested_proc_bind.used)) {
1253 this_thr->th.th_current_task->td_icvs.proc_bind =
1254 __kmp_nested_proc_bind.bind_types[level + 1];
1255 }
1256
1257#if USE_DEBUGGER0
1258 serial_team->t.t_pkfn = (microtask_t)(~0); // For the debugger.
1259#endif
1260 this_thr->th.th_info.ds.ds_tid = 0;
1261
1262 /* set thread cache values */
1263 this_thr->th.th_team_nproc = 1;
1264 this_thr->th.th_team_master = this_thr;
1265 this_thr->th.th_team_serialized = 1;
1266
1267 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1268 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1269 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator; // save
1270
1271 propagateFPControl(serial_team);
1272
1273 /* check if we need to allocate dispatch buffers stack */
1274 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch)if (!(serial_team->t.t_dispatch)) { __kmp_debug_assert("serial_team->t.t_dispatch"
, "openmp/runtime/src/kmp_runtime.cpp", 1274); }
;
1275 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1276 serial_team->t.t_dispatch->th_disp_buffer =
1277 (dispatch_private_info_t *)__kmp_allocate(___kmp_allocate((sizeof(dispatch_private_info_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 1278)
1278 sizeof(dispatch_private_info_t))___kmp_allocate((sizeof(dispatch_private_info_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 1278)
;
1279 }
1280 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1281
1282 KMP_MB();
1283
1284 } else {
1285 /* this serialized team is already being used,
1286 * that's fine, just add another nested level */
1287 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team)if (!(this_thr->th.th_team == serial_team)) { __kmp_debug_assert
("this_thr->th.th_team == serial_team", "openmp/runtime/src/kmp_runtime.cpp"
, 1287); }
;
1288 KMP_DEBUG_ASSERT(serial_team->t.t_threads)if (!(serial_team->t.t_threads)) { __kmp_debug_assert("serial_team->t.t_threads"
, "openmp/runtime/src/kmp_runtime.cpp", 1288); }
;
1289 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr)if (!(serial_team->t.t_threads[0] == this_thr)) { __kmp_debug_assert
("serial_team->t.t_threads[0] == this_thr", "openmp/runtime/src/kmp_runtime.cpp"
, 1289); }
;
1290 ++serial_team->t.t_serialized;
1291 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1292
1293 // Nested level will be an index in the nested nthreads array
1294 int level = this_thr->th.th_team->t.t_level;
1295 // Thread value exists in the nested nthreads array for the next nested
1296 // level
1297 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1298 this_thr->th.th_current_task->td_icvs.nproc =
1299 __kmp_nested_nth.nth[level + 1];
1300 }
1301 serial_team->t.t_level++;
1302 KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d increasing nesting level "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d increasing nesting level "
"of serial team %p to %d\n", global_tid, serial_team, serial_team
->t.t_level); }
1303 "of serial team %p to %d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d increasing nesting level "
"of serial team %p to %d\n", global_tid, serial_team, serial_team
->t.t_level); }
1304 global_tid, serial_team, serial_team->t.t_level))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d increasing nesting level "
"of serial team %p to %d\n", global_tid, serial_team, serial_team
->t.t_level); }
;
1305
1306 /* allocate/push dispatch buffers stack */
1307 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch)if (!(serial_team->t.t_dispatch)) { __kmp_debug_assert("serial_team->t.t_dispatch"
, "openmp/runtime/src/kmp_runtime.cpp", 1307); }
;
1308 {
1309 dispatch_private_info_t *disp_buffer =
1310 (dispatch_private_info_t *)__kmp_allocate(___kmp_allocate((sizeof(dispatch_private_info_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 1311)
1311 sizeof(dispatch_private_info_t))___kmp_allocate((sizeof(dispatch_private_info_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 1311)
;
1312 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1313 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1314 }
1315 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1316
1317 KMP_MB();
1318 }
1319 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq)if ((serial_team->t.t_cancel_request) != (cancel_noreq)) (
serial_team->t.t_cancel_request) = (cancel_noreq)
;
1320
1321 // Perform the display affinity functionality for
1322 // serialized parallel regions
1323 if (__kmp_display_affinity) {
1324 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1325 this_thr->th.th_prev_num_threads != 1) {
1326 // NULL means use the affinity-format-var ICV
1327 __kmp_aux_display_affinity(global_tid, NULL__null);
1328 this_thr->th.th_prev_level = serial_team->t.t_level;
1329 this_thr->th.th_prev_num_threads = 1;
1330 }
1331 }
1332
1333 if (__kmp_env_consistency_check)
1334 __kmp_push_parallel(global_tid, NULL__null);
1335#if OMPT_SUPPORT1
1336 serial_team->t.ompt_team_info.master_return_address = codeptr;
1337 if (ompt_enabled.enabled &&
1338 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1339 OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info))->frame.exit_frame.ptr =
1340 OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
1341
1342 ompt_lw_taskteam_t lw_taskteam;
1343 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1344 &ompt_parallel_data, codeptr);
1345
1346 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1347 // don't use lw_taskteam after linking. content was swaped
1348
1349 /* OMPT implicit task begin */
1350 if (ompt_enabled.ompt_callback_implicit_task) {
1351 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1352 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr)(&(this_thr->th.th_team->t.ompt_team_info.parallel_data
))
,
1353 OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data
))
, 1, __kmp_tid_from_gtid(global_tid),
1354 ompt_task_implicit); // TODO: Can this be ompt_task_initial?
1355 OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info))->thread_num =
1356 __kmp_tid_from_gtid(global_tid);
1357 }
1358
1359 /* OMPT state */
1360 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1361 OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info))->frame.exit_frame.ptr =
1362 OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
1363 }
1364#endif
1365}
1366
1367// Test if this fork is for a team closely nested in a teams construct
1368static inline bool __kmp_is_fork_in_teams(kmp_info_t *master_th,
1369 microtask_t microtask, int level,
1370 int teams_level, kmp_va_list ap) {
1371 return (master_th->th.th_teams_microtask && ap &&
1372 microtask != (microtask_t)__kmp_teams_master && level == teams_level);
1373}
1374
1375// Test if this fork is for the teams construct, i.e. to form the outer league
1376// of teams
1377static inline bool __kmp_is_entering_teams(int active_level, int level,
1378 int teams_level, kmp_va_list ap) {
1379 return ((ap == NULL__null && active_level == 0) ||
1380 (ap && teams_level > 0 && teams_level == level));
1381}
1382
1383// AC: This is start of parallel that is nested inside teams construct.
1384// The team is actual (hot), all workers are ready at the fork barrier.
1385// No lock needed to initialize the team a bit, then free workers.
1386static inline int
1387__kmp_fork_in_teams(ident_t *loc, int gtid, kmp_team_t *parent_team,
1388 kmp_int32 argc, kmp_info_t *master_th, kmp_root_t *root,
1389 enum fork_context_e call_context, microtask_t microtask,
1390 launch_t invoker, int master_set_numthreads, int level,
1391#if OMPT_SUPPORT1
1392 ompt_data_t ompt_parallel_data, void *return_address,
1393#endif
1394 kmp_va_list ap) {
1395 void **argv;
1396 int i;
1397
1398 parent_team->t.t_ident = loc;
1399 __kmp_alloc_argv_entries(argc, parent_team, TRUE(!0));
1400 parent_team->t.t_argc = argc;
1401 argv = (void **)parent_team->t.t_argv;
1402 for (i = argc - 1; i >= 0; --i) {
1403 *argv++ = va_arg(kmp_va_deref(ap), void *)__builtin_va_arg((*(ap)), void *);
1404 }
1405 // Increment our nested depth levels, but not increase the serialization
1406 if (parent_team == master_th->th.th_serial_team) {
1407 // AC: we are in serialized parallel
1408 __kmpc_serialized_parallel(loc, gtid);
1409 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1)if (!(parent_team->t.t_serialized > 1)) { __kmp_debug_assert
("parent_team->t.t_serialized > 1", "openmp/runtime/src/kmp_runtime.cpp"
, 1409); }
;
1410
1411 if (call_context == fork_context_gnu) {
1412 // AC: need to decrement t_serialized for enquiry functions to work
1413 // correctly, will restore at join time
1414 parent_team->t.t_serialized--;
1415 return TRUE(!0);
1416 }
1417
1418#if OMPD_SUPPORT1
1419 parent_team->t.t_pkfn = microtask;
1420#endif
1421
1422#if OMPT_SUPPORT1
1423 void *dummy;
1424 void **exit_frame_p;
1425 ompt_data_t *implicit_task_data;
1426 ompt_lw_taskteam_t lw_taskteam;
1427
1428 if (ompt_enabled.enabled) {
1429 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1430 &ompt_parallel_data, return_address);
1431 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1432
1433 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1434 // Don't use lw_taskteam after linking. Content was swapped.
1435
1436 /* OMPT implicit task begin */
1437 implicit_task_data = OMPT_CUR_TASK_DATA(master_th)(&(master_th->th.th_current_task->ompt_task_info.task_data
))
;
1438 if (ompt_enabled.ompt_callback_implicit_task) {
1439 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num = __kmp_tid_from_gtid(gtid);
1440 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1441 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
, implicit_task_data,
1442 1, OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, ompt_task_implicit);
1443 }
1444
1445 /* OMPT state */
1446 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1447 } else {
1448 exit_frame_p = &dummy;
1449 }
1450#endif
1451
1452 // AC: need to decrement t_serialized for enquiry functions to work
1453 // correctly, will restore at join time
1454 parent_team->t.t_serialized--;
1455
1456 {
1457 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel)((void)0);
1458 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK)((void)0);
1459 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1460#if OMPT_SUPPORT1
1461 ,
1462 exit_frame_p
1463#endif
1464 );
1465 }
1466
1467#if OMPT_SUPPORT1
1468 if (ompt_enabled.enabled) {
1469 *exit_frame_p = NULL__null;
1470 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->frame.exit_frame = ompt_data_none{0};
1471 if (ompt_enabled.ompt_callback_implicit_task) {
1472 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1473 ompt_scope_end, NULL__null, implicit_task_data, 1,
1474 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, ompt_task_implicit);
1475 }
1476 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
;
1477 __ompt_lw_taskteam_unlink(master_th);
1478 if (ompt_enabled.ompt_callback_parallel_end) {
1479 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)ompt_callback_parallel_end_callback(
1480 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th)(&(master_th->th.th_current_task->ompt_task_info.task_data
))
,
1481 OMPT_INVOKER(call_context)((call_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
| ompt_parallel_team, return_address);
1482 }
1483 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1484 }
1485#endif
1486 return TRUE(!0);
1487 }
1488
1489 parent_team->t.t_pkfn = microtask;
1490 parent_team->t.t_invoke = invoker;
1491 KMP_ATOMIC_INC(&root->r.r_in_parallel)(&root->r.r_in_parallel)->fetch_add(1, std::memory_order_acq_rel
)
;
1492 parent_team->t.t_active_level++;
1493 parent_team->t.t_level++;
1494 parent_team->t.t_def_allocator = master_th->th.th_def_allocator; // save
1495
1496 // If the threads allocated to the team are less than the thread limit, update
1497 // the thread limit here. th_teams_size.nth is specific to this team nested
1498 // in a teams construct, the team is fully created, and we're about to do
1499 // the actual fork. Best to do this here so that the subsequent uses below
1500 // and in the join have the correct value.
1501 master_th->th.th_teams_size.nth = parent_team->t.t_nproc;
1502
1503#if OMPT_SUPPORT1
1504 if (ompt_enabled.enabled) {
1505 ompt_lw_taskteam_t lw_taskteam;
1506 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, &ompt_parallel_data,
1507 return_address);
1508 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1, true);
1509 }
1510#endif
1511
1512 /* Change number of threads in the team if requested */
1513 if (master_set_numthreads) { // The parallel has num_threads clause
1514 if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1515 // AC: only can reduce number of threads dynamically, can't increase
1516 kmp_info_t **other_threads = parent_team->t.t_threads;
1517 // NOTE: if using distributed barrier, we need to run this code block
1518 // even when the team size appears not to have changed from the max.
1519 int old_proc = master_th->th.th_teams_size.nth;
1520 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1521 __kmp_resize_dist_barrier(parent_team, old_proc, master_set_numthreads);
1522 __kmp_add_threads_to_team(parent_team, master_set_numthreads);
1523 }
1524 parent_team->t.t_nproc = master_set_numthreads;
1525 for (i = 0; i < master_set_numthreads; ++i) {
1526 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1527 }
1528 }
1529 // Keep extra threads hot in the team for possible next parallels
1530 master_th->th.th_set_nproc = 0;
1531 }
1532
1533#if USE_DEBUGGER0
1534 if (__kmp_debugging) { // Let debugger override number of threads.
1535 int nth = __kmp_omp_num_threads(loc);
1536 if (nth > 0) { // 0 means debugger doesn't want to change num threads
1537 master_set_numthreads = nth;
1538 }
1539 }
1540#endif
1541
1542 // Figure out the proc_bind policy for the nested parallel within teams
1543 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1544 // proc_bind_default means don't update
1545 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1546 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1547 proc_bind = proc_bind_false;
1548 } else {
1549 // No proc_bind clause specified; use current proc-bind-var
1550 if (proc_bind == proc_bind_default) {
1551 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1552 }
1553 /* else: The proc_bind policy was specified explicitly on parallel clause.
1554 This overrides proc-bind-var for this parallel region, but does not
1555 change proc-bind-var. */
1556 // Figure the value of proc-bind-var for the child threads.
1557 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1558 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1559 master_th->th.th_current_task->td_icvs.proc_bind)) {
1560 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1561 }
1562 }
1563 KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind)if ((parent_team->t.t_proc_bind) != (proc_bind)) (parent_team
->t.t_proc_bind) = (proc_bind)
;
1564 // Need to change the bind-var ICV to correct value for each implicit task
1565 if (proc_bind_icv != proc_bind_default &&
1566 master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1567 kmp_info_t **other_threads = parent_team->t.t_threads;
1568 for (i = 0; i < master_th->th.th_team_nproc; ++i) {
1569 other_threads[i]->th.th_current_task->td_icvs.proc_bind = proc_bind_icv;
1570 }
1571 }
1572 // Reset for next parallel region
1573 master_th->th.th_set_proc_bind = proc_bind_default;
1574
1575#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
1576 if (((__itt_frame_submit_v3_ptr__kmp_itt_frame_submit_v3_ptr__3_0 && __itt_get_timestamp_ptr__kmp_itt_get_timestamp_ptr__3_0) ||
1577 KMP_ITT_DEBUG0) &&
1578 __kmp_forkjoin_frames_mode == 3 &&
1579 parent_team->t.t_active_level == 1 // only report frames at level 1
1580 && master_th->th.th_teams_size.nteams == 1) {
1581 kmp_uint64 tmp_time = __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0();
1582 master_th->th.th_frame_time = tmp_time;
1583 parent_team->t.t_region_time = tmp_time;
1584 }
1585 if (__itt_stack_caller_create_ptr__kmp_itt_stack_caller_create_ptr__3_0) {
1586 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL)if (!(parent_team->t.t_stack_id == __null)) { __kmp_debug_assert
("parent_team->t.t_stack_id == __null", "openmp/runtime/src/kmp_runtime.cpp"
, 1586); }
;
1587 // create new stack stitching id before entering fork barrier
1588 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1589 }
1590#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1591#if KMP_AFFINITY_SUPPORTED1
1592 __kmp_partition_places(parent_team);
1593#endif
1594
1595 KF_TRACE(10, ("__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid
); }
1596 "master_th=%p, gtid=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid
); }
1597 root, parent_team, master_th, gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid
); }
;
1598 __kmp_internal_fork(loc, gtid, parent_team);
1599 KF_TRACE(10, ("__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid
); }
1600 "master_th=%p, gtid=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid
); }
1601 root, parent_team, master_th, gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid
); }
;
1602
1603 if (call_context == fork_context_gnu)
1604 return TRUE(!0);
1605
1606 /* Invoke microtask for PRIMARY thread */
1607 KA_TRACE(20, ("__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n"
, gtid, parent_team->t.t_id, parent_team->t.t_pkfn); }
1608 parent_team->t.t_id, parent_team->t.t_pkfn))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n"
, gtid, parent_team->t.t_id, parent_team->t.t_pkfn); }
;
1609
1610 if (!parent_team->t.t_invoke(gtid)) {
1611 KMP_ASSERT2(0, "cannot invoke microtask for PRIMARY thread")if (!(0)) { __kmp_debug_assert(("cannot invoke microtask for PRIMARY thread"
), "openmp/runtime/src/kmp_runtime.cpp", 1611); }
;
1612 }
1613 KA_TRACE(20, ("__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n"
, gtid, parent_team->t.t_id, parent_team->t.t_pkfn); }
1614 parent_team->t.t_id, parent_team->t.t_pkfn))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n"
, gtid, parent_team->t.t_id, parent_team->t.t_pkfn); }
;
1615 KMP_MB(); /* Flush all pending memory write invalidates. */
1616
1617 KA_TRACE(20, ("__kmp_fork_in_teams: parallel exit T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_in_teams: parallel exit T#%d\n"
, gtid); }
;
1618
1619 return TRUE(!0);
1620}
1621
1622// Create a serialized parallel region
1623static inline int
1624__kmp_serial_fork_call(ident_t *loc, int gtid, enum fork_context_e call_context,
1625 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1626 kmp_info_t *master_th, kmp_team_t *parent_team,
1627#if OMPT_SUPPORT1
1628 ompt_data_t *ompt_parallel_data, void **return_address,
1629 ompt_data_t **parent_task_data,
1630#endif
1631 kmp_va_list ap) {
1632 kmp_team_t *team;
1633 int i;
1634 void **argv;
1635
1636/* josh todo: hypothetical question: what do we do for OS X*? */
1637#if KMP_OS_LINUX1 && \
1638 (KMP_ARCH_X860 || KMP_ARCH_X86_641 || KMP_ARCH_ARM || KMP_ARCH_AARCH640)
1639 void *args[argc];
1640#else
1641 void **args = (void **)KMP_ALLOCA(argc * sizeof(void *))__builtin_alloca (argc * sizeof(void *));
1642#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || \
1643 KMP_ARCH_AARCH64) */
1644
1645 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_serial_fork_call: T#%d serializing parallel region\n"
, gtid); }
1646 20, ("__kmp_serial_fork_call: T#%d serializing parallel region\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_serial_fork_call: T#%d serializing parallel region\n"
, gtid); }
;
1647
1648 __kmpc_serialized_parallel(loc, gtid);
1649
1650#if OMPD_SUPPORT1
1651 master_th->th.th_serial_team->t.t_pkfn = microtask;
1652#endif
1653
1654 if (call_context == fork_context_intel) {
1655 /* TODO this sucks, use the compiler itself to pass args! :) */
1656 master_th->th.th_serial_team->t.t_ident = loc;
1657 if (!ap) {
1658 // revert change made in __kmpc_serialized_parallel()
1659 master_th->th.th_serial_team->t.t_level--;
1660// Get args from parent team for teams construct
1661
1662#if OMPT_SUPPORT1
1663 void *dummy;
1664 void **exit_frame_p;
1665 ompt_task_info_t *task_info;
1666 ompt_lw_taskteam_t lw_taskteam;
1667
1668 if (ompt_enabled.enabled) {
1669 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1670 ompt_parallel_data, *return_address);
1671
1672 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1673 // don't use lw_taskteam after linking. content was swaped
1674 task_info = OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info));
1675 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1676 if (ompt_enabled.ompt_callback_implicit_task) {
1677 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num = __kmp_tid_from_gtid(gtid);
1678 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1679 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
,
1680 &(task_info->task_data), 1,
1681 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, ompt_task_implicit);
1682 }
1683
1684 /* OMPT state */
1685 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1686 } else {
1687 exit_frame_p = &dummy;
1688 }
1689#endif
1690
1691 {
1692 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel)((void)0);
1693 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK)((void)0);
1694 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1695#if OMPT_SUPPORT1
1696 ,
1697 exit_frame_p
1698#endif
1699 );
1700 }
1701
1702#if OMPT_SUPPORT1
1703 if (ompt_enabled.enabled) {
1704 *exit_frame_p = NULL__null;
1705 if (ompt_enabled.ompt_callback_implicit_task) {
1706 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1707 ompt_scope_end, NULL__null, &(task_info->task_data), 1,
1708 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, ompt_task_implicit);
1709 }
1710 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
;
1711 __ompt_lw_taskteam_unlink(master_th);
1712 if (ompt_enabled.ompt_callback_parallel_end) {
1713 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)ompt_callback_parallel_end_callback(
1714 ompt_parallel_data, *parent_task_data,
1715 OMPT_INVOKER(call_context)((call_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
| ompt_parallel_team, *return_address);
1716 }
1717 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1718 }
1719#endif
1720 } else if (microtask == (microtask_t)__kmp_teams_master) {
1721 KMP_DEBUG_ASSERT(master_th->th.th_team == master_th->th.th_serial_team)if (!(master_th->th.th_team == master_th->th.th_serial_team
)) { __kmp_debug_assert("master_th->th.th_team == master_th->th.th_serial_team"
, "openmp/runtime/src/kmp_runtime.cpp", 1721); }
;
1722 team = master_th->th.th_team;
1723 // team->t.t_pkfn = microtask;
1724 team->t.t_invoke = invoker;
1725 __kmp_alloc_argv_entries(argc, team, TRUE(!0));
1726 team->t.t_argc = argc;
1727 argv = (void **)team->t.t_argv;
1728 if (ap) {
1729 for (i = argc - 1; i >= 0; --i)
1730 *argv++ = va_arg(kmp_va_deref(ap), void *)__builtin_va_arg((*(ap)), void *);
1731 } else {
1732 for (i = 0; i < argc; ++i)
1733 // Get args from parent team for teams construct
1734 argv[i] = parent_team->t.t_argv[i];
1735 }
1736 // AC: revert change made in __kmpc_serialized_parallel()
1737 // because initial code in teams should have level=0
1738 team->t.t_level--;
1739 // AC: call special invoker for outer "parallel" of teams construct
1740 invoker(gtid);
1741#if OMPT_SUPPORT1
1742 if (ompt_enabled.enabled) {
1743 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info));
1744 if (ompt_enabled.ompt_callback_implicit_task) {
1745 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1746 ompt_scope_end, NULL__null, &(task_info->task_data), 0,
1747 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, ompt_task_initial);
1748 }
1749 if (ompt_enabled.ompt_callback_parallel_end) {
1750 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)ompt_callback_parallel_end_callback(
1751 ompt_parallel_data, *parent_task_data,
1752 OMPT_INVOKER(call_context)((call_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
| ompt_parallel_league,
1753 *return_address);
1754 }
1755 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1756 }
1757#endif
1758 } else {
1759 argv = args;
1760 for (i = argc - 1; i >= 0; --i)
1761 *argv++ = va_arg(kmp_va_deref(ap), void *)__builtin_va_arg((*(ap)), void *);
1762 KMP_MB();
1763
1764#if OMPT_SUPPORT1
1765 void *dummy;
1766 void **exit_frame_p;
1767 ompt_task_info_t *task_info;
1768 ompt_lw_taskteam_t lw_taskteam;
1769 ompt_data_t *implicit_task_data;
1770
1771 if (ompt_enabled.enabled) {
1772 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1773 ompt_parallel_data, *return_address);
1774 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1775 // don't use lw_taskteam after linking. content was swaped
1776 task_info = OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info));
1777 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1778
1779 /* OMPT implicit task begin */
1780 implicit_task_data = OMPT_CUR_TASK_DATA(master_th)(&(master_th->th.th_current_task->ompt_task_info.task_data
))
;
1781 if (ompt_enabled.ompt_callback_implicit_task) {
1782 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1783 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
,
1784 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1785 ompt_task_implicit);
1786 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num = __kmp_tid_from_gtid(gtid);
1787 }
1788
1789 /* OMPT state */
1790 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1791 } else {
1792 exit_frame_p = &dummy;
1793 }
1794#endif
1795
1796 {
1797 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel)((void)0);
1798 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK)((void)0);
1799 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1800#if OMPT_SUPPORT1
1801 ,
1802 exit_frame_p
1803#endif
1804 );
1805 }
1806
1807#if OMPT_SUPPORT1
1808 if (ompt_enabled.enabled) {
1809 *exit_frame_p = NULL__null;
1810 if (ompt_enabled.ompt_callback_implicit_task) {
1811 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1812 ompt_scope_end, NULL__null, &(task_info->task_data), 1,
1813 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, ompt_task_implicit);
1814 }
1815
1816 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
;
1817 __ompt_lw_taskteam_unlink(master_th);
1818 if (ompt_enabled.ompt_callback_parallel_end) {
1819 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)ompt_callback_parallel_end_callback(
1820 ompt_parallel_data, *parent_task_data,
1821 OMPT_INVOKER(call_context)((call_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
| ompt_parallel_team, *return_address);
1822 }
1823 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1824 }
1825#endif
1826 }
1827 } else if (call_context == fork_context_gnu) {
1828#if OMPT_SUPPORT1
1829 if (ompt_enabled.enabled) {
1830 ompt_lw_taskteam_t lwt;
1831 __ompt_lw_taskteam_init(&lwt, master_th, gtid, ompt_parallel_data,
1832 *return_address);
1833
1834 lwt.ompt_task_info.frame.exit_frame = ompt_data_none{0};
1835 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1836 }
1837// don't use lw_taskteam after linking. content was swaped
1838#endif
1839
1840 // we were called from GNU native code
1841 KA_TRACE(20, ("__kmp_serial_fork_call: T#%d serial exit\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_serial_fork_call: T#%d serial exit\n"
, gtid); }
;
1842 return FALSE0;
1843 } else {
1844 KMP_ASSERT2(call_context < fork_context_last,if (!(call_context < fork_context_last)) { __kmp_debug_assert
(("__kmp_serial_fork_call: unknown fork_context parameter"), "openmp/runtime/src/kmp_runtime.cpp"
, 1845); }
1845 "__kmp_serial_fork_call: unknown fork_context parameter")if (!(call_context < fork_context_last)) { __kmp_debug_assert
(("__kmp_serial_fork_call: unknown fork_context parameter"), "openmp/runtime/src/kmp_runtime.cpp"
, 1845); }
;
1846 }
1847
1848 KA_TRACE(20, ("__kmp_serial_fork_call: T#%d serial exit\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_serial_fork_call: T#%d serial exit\n"
, gtid); }
;
1849 KMP_MB();
1850 return FALSE0;
1851}
1852
1853/* most of the work for a fork */
1854/* return true if we really went parallel, false if serialized */
1855int __kmp_fork_call(ident_t *loc, int gtid,
1856 enum fork_context_e call_context, // Intel, GNU, ...
1857 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1858 kmp_va_list ap) {
1859 void **argv;
1860 int i;
1861 int master_tid;
1862 int master_this_cons;
1863 kmp_team_t *team;
1864 kmp_team_t *parent_team;
1865 kmp_info_t *master_th;
1866 kmp_root_t *root;
1867 int nthreads;
1868 int master_active;
1869 int master_set_numthreads;
1870 int level;
1871 int active_level;
1872 int teams_level;
1873#if KMP_NESTED_HOT_TEAMS1
1874 kmp_hot_team_ptr_t **p_hot_teams;
1875#endif
1876 { // KMP_TIME_BLOCK
1877 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call)((void)0);
1878 KMP_COUNT_VALUE(OMP_PARALLEL_args, argc)((void)0);
1879
1880 KA_TRACE(20, ("__kmp_fork_call: enter T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: enter T#%d\n"
, gtid); }
;
1
Assuming 'kmp_a_debug' is < 20
1881 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL__null) {
2
Assuming '__kmp_stkpadding' is <= 0
1882 /* Some systems prefer the stack for the root thread(s) to start with */
1883 /* some gap from the parent stack to prevent false sharing. */
1884 void *dummy = KMP_ALLOCA(__kmp_stkpadding)__builtin_alloca (__kmp_stkpadding);
1885 /* These 2 lines below are so this does not get optimized out */
1886 if (__kmp_stkpadding > KMP_MAX_STKPADDING(2 * 1024 * 1024))
1887 __kmp_stkpadding += (short)((kmp_int64)dummy);
1888 }
1889
1890 /* initialize if needed */
1891 KMP_DEBUG_ASSERT(if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 1892); }
3
Assuming '__kmp_init_serial' is not equal to 0
4
Taking false branch
1892 __kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 1892); }
; // AC: potentially unsafe, not in sync with shutdown
1893 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
5
Assuming '__kmp_init_parallel' is not equal to 0
6
Taking false branch
1894 __kmp_parallel_initialize();
1895 __kmp_resume_if_soft_paused();
1896
1897 /* setup current data */
1898 // AC: potentially unsafe, not in sync with library shutdown,
1899 // __kmp_threads can be freed
1900 master_th = __kmp_threads[gtid];
1901
1902 parent_team = master_th->th.th_team;
1903 master_tid = master_th->th.th_info.ds.ds_tid;
1904 master_this_cons = master_th->th.th_local.this_construct;
1905 root = master_th->th.th_root;
1906 master_active = root->r.r_active;
1907 master_set_numthreads = master_th->th.th_set_nproc;
1908
1909#if OMPT_SUPPORT1
1910 ompt_data_t ompt_parallel_data = ompt_data_none{0};
1911 ompt_data_t *parent_task_data;
7
'parent_task_data' declared without an initial value
1912 ompt_frame_t *ompt_frame;
1913 void *return_address = NULL__null;
1914
1915 if (ompt_enabled.enabled) {
8
Assuming field 'enabled' is 0
9
Taking false branch
1916 __ompt_get_task_info_internal(0, NULL__null, &parent_task_data, &ompt_frame,
1917 NULL__null, NULL__null);
1918 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
1919 }
1920#endif
1921
1922 // Assign affinity to root thread if it hasn't happened yet
1923 __kmp_assign_root_init_mask();
10
Calling '__kmp_assign_root_init_mask'
14
Returning from '__kmp_assign_root_init_mask'
1924
1925 // Nested level will be an index in the nested nthreads array
1926 level = parent_team->t.t_level;
1927 // used to launch non-serial teams even if nested is not allowed
1928 active_level = parent_team->t.t_active_level;
1929 // needed to check nesting inside the teams
1930 teams_level = master_th->th.th_teams_level;
1931#if KMP_NESTED_HOT_TEAMS1
1932 p_hot_teams = &master_th->th.th_hot_teams;
1933 if (*p_hot_teams == NULL__null && __kmp_hot_teams_max_level > 0) {
15
Assuming the condition is false
1934 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(___kmp_allocate((sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level
), "openmp/runtime/src/kmp_runtime.cpp", 1935)
1935 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level)___kmp_allocate((sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level
), "openmp/runtime/src/kmp_runtime.cpp", 1935)
;
1936 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1937 // it is either actual or not needed (when active_level > 0)
1938 (*p_hot_teams)[0].hot_team_nth = 1;
1939 }
1940#endif
1941
1942#if OMPT_SUPPORT1
1943 if (ompt_enabled.enabled) {
16
Assuming field 'enabled' is not equal to 0
17
Taking true branch
1944 if (ompt_enabled.ompt_callback_parallel_begin) {
18
Assuming field 'ompt_callback_parallel_begin' is not equal to 0
1945 int team_size = master_set_numthreads
19
Taking true branch
20
Assuming 'master_set_numthreads' is not equal to 0
21
'?' condition is true
1946 ? master_set_numthreads
1947 : get__nproc_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.nproc)
;
1948 int flags = OMPT_INVOKER(call_context)((call_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
|
22
Assuming 'call_context' is not equal to fork_context_gnu
23
'?' condition is false
1949 ((microtask == (microtask_t)__kmp_teams_master)
24
Assuming 'microtask' is not equal to __kmp_teams_master
25
'?' condition is false
1950 ? ompt_parallel_league
1951 : ompt_parallel_team);
1952 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)ompt_callback_parallel_begin_callback(
26
1st function call argument is an uninitialized value
1953 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
1954 return_address);
1955 }
1956 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1957 }
1958#endif
1959
1960 master_th->th.th_ident = loc;
1961
1962 // Parallel closely nested in teams construct:
1963 if (__kmp_is_fork_in_teams(master_th, microtask, level, teams_level, ap)) {
1964 return __kmp_fork_in_teams(loc, gtid, parent_team, argc, master_th, root,
1965 call_context, microtask, invoker,
1966 master_set_numthreads, level,
1967#if OMPT_SUPPORT1
1968 ompt_parallel_data, return_address,
1969#endif
1970 ap);
1971 } // End parallel closely nested in teams construct
1972
1973#if KMP_DEBUG1
1974 if (__kmp_tasking_mode != tskm_immediate_exec) {
1975 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==if (!(master_th->th.th_task_team == parent_team->t.t_task_team
[master_th->th.th_task_state])) { __kmp_debug_assert("master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 1976); }
1976 parent_team->t.t_task_team[master_th->th.th_task_state])if (!(master_th->th.th_task_team == parent_team->t.t_task_team
[master_th->th.th_task_state])) { __kmp_debug_assert("master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 1976); }
;
1977 }
1978#endif
1979
1980 // Need this to happen before we determine the number of threads, not while
1981 // we are allocating the team
1982 //__kmp_push_current_task_to_thread(master_th, parent_team, 0);
1983
1984 // Determine the number of threads
1985 int enter_teams =
1986 __kmp_is_entering_teams(active_level, level, teams_level, ap);
1987 if ((!enter_teams &&
1988 (parent_team->t.t_active_level >=
1989 master_th->th.th_current_task->td_icvs.max_active_levels)) ||
1990 (__kmp_library == library_serial)) {
1991 KC_TRACE(10, ("__kmp_fork_call: T#%d serializing team\n", gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: T#%d serializing team\n"
, gtid); }
;
1992 nthreads = 1;
1993 } else {
1994 nthreads = master_set_numthreads
1995 ? master_set_numthreads
1996 // TODO: get nproc directly from current task
1997 : get__nproc_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.nproc)
;
1998 // Check if we need to take forkjoin lock? (no need for serialized
1999 // parallel out of teams construct).
2000 if (nthreads > 1) {
2001 /* determine how many new threads we can use */
2002 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2003 /* AC: If we execute teams from parallel region (on host), then teams
2004 should be created but each can only have 1 thread if nesting is
2005 disabled. If teams called from serial region, then teams and their
2006 threads should be created regardless of the nesting setting. */
2007 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
2008 nthreads, enter_teams);
2009 if (nthreads == 1) {
2010 // Free lock for single thread execution here; for multi-thread
2011 // execution it will be freed later after team of threads created
2012 // and initialized
2013 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2014 }
2015 }
2016 }
2017 KMP_DEBUG_ASSERT(nthreads > 0)if (!(nthreads > 0)) { __kmp_debug_assert("nthreads > 0"
, "openmp/runtime/src/kmp_runtime.cpp", 2017); }
;
2018
2019 // If we temporarily changed the set number of threads then restore it now
2020 master_th->th.th_set_nproc = 0;
2021
2022 if (nthreads == 1) {
2023 return __kmp_serial_fork_call(loc, gtid, call_context, argc, microtask,
2024 invoker, master_th, parent_team,
2025#if OMPT_SUPPORT1
2026 &ompt_parallel_data, &return_address,
2027 &parent_task_data,
2028#endif
2029 ap);
2030 } // if (nthreads == 1)
2031
2032 // GEH: only modify the executing flag in the case when not serialized
2033 // serialized case is handled in kmpc_serialized_parallel
2034 KF_TRACE(10, ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
"curtask=%p, curtask_max_aclevel=%d\n", parent_team->t.t_active_level
, master_th, master_th->th.th_current_task, master_th->
th.th_current_task->td_icvs.max_active_levels); }
2035 "curtask=%p, curtask_max_aclevel=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
"curtask=%p, curtask_max_aclevel=%d\n", parent_team->t.t_active_level
, master_th, master_th->th.th_current_task, master_th->
th.th_current_task->td_icvs.max_active_levels); }
2036 parent_team->t.t_active_level, master_th,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
"curtask=%p, curtask_max_aclevel=%d\n", parent_team->t.t_active_level
, master_th, master_th->th.th_current_task, master_th->
th.th_current_task->td_icvs.max_active_levels); }
2037 master_th->th.th_current_task,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
"curtask=%p, curtask_max_aclevel=%d\n", parent_team->t.t_active_level
, master_th, master_th->th.th_current_task, master_th->
th.th_current_task->td_icvs.max_active_levels); }
2038 master_th->th.th_current_task->td_icvs.max_active_levels))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
"curtask=%p, curtask_max_aclevel=%d\n", parent_team->t.t_active_level
, master_th, master_th->th.th_current_task, master_th->
th.th_current_task->td_icvs.max_active_levels); }
;
2039 // TODO: GEH - cannot do this assertion because root thread not set up as
2040 // executing
2041 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );
2042 master_th->th.th_current_task->td_flags.executing = 0;
2043
2044 if (!master_th->th.th_teams_microtask || level > teams_level) {
2045 /* Increment our nested depth level */
2046 KMP_ATOMIC_INC(&root->r.r_in_parallel)(&root->r.r_in_parallel)->fetch_add(1, std::memory_order_acq_rel
)
;
2047 }
2048
2049 // See if we need to make a copy of the ICVs.
2050 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
2051 if ((level + 1 < __kmp_nested_nth.used) &&
2052 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
2053 nthreads_icv = __kmp_nested_nth.nth[level + 1];
2054 } else {
2055 nthreads_icv = 0; // don't update
2056 }
2057
2058 // Figure out the proc_bind_policy for the new team.
2059 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2060 // proc_bind_default means don't update
2061 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
2062 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2063 proc_bind = proc_bind_false;
2064 } else {
2065 // No proc_bind clause specified; use current proc-bind-var for this
2066 // parallel region
2067 if (proc_bind == proc_bind_default) {
2068 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2069 }
2070 // Have teams construct take proc_bind value from KMP_TEAMS_PROC_BIND
2071 if (master_th->th.th_teams_microtask &&
2072 microtask == (microtask_t)__kmp_teams_master) {
2073 proc_bind = __kmp_teams_proc_bind;
2074 }
2075 /* else: The proc_bind policy was specified explicitly on parallel clause.
2076 This overrides proc-bind-var for this parallel region, but does not
2077 change proc-bind-var. */
2078 // Figure the value of proc-bind-var for the child threads.
2079 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2080 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2081 master_th->th.th_current_task->td_icvs.proc_bind)) {
2082 // Do not modify the proc bind icv for the two teams construct forks
2083 // They just let the proc bind icv pass through
2084 if (!master_th->th.th_teams_microtask ||
2085 !(microtask == (microtask_t)__kmp_teams_master || ap == NULL__null))
2086 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2087 }
2088 }
2089
2090 // Reset for next parallel region
2091 master_th->th.th_set_proc_bind = proc_bind_default;
2092
2093 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
2094 kmp_internal_control_t new_icvs;
2095 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2096 new_icvs.next = NULL__null;
2097 if (nthreads_icv > 0) {
2098 new_icvs.nproc = nthreads_icv;
2099 }
2100 if (proc_bind_icv != proc_bind_default) {
2101 new_icvs.proc_bind = proc_bind_icv;
2102 }
2103
2104 /* allocate a new parallel team */
2105 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: before __kmp_allocate_team\n"
); }
;
2106 team = __kmp_allocate_team(root, nthreads, nthreads,
2107#if OMPT_SUPPORT1
2108 ompt_parallel_data,
2109#endif
2110 proc_bind, &new_icvs,
2111 argc USE_NESTED_HOT_ARG(master_th), master_th);
2112 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2113 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
2114 } else {
2115 /* allocate a new parallel team */
2116 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: before __kmp_allocate_team\n"
); }
;
2117 team = __kmp_allocate_team(root, nthreads, nthreads,
2118#if OMPT_SUPPORT1
2119 ompt_parallel_data,
2120#endif
2121 proc_bind,
2122 &master_th->th.th_current_task->td_icvs,
2123 argc USE_NESTED_HOT_ARG(master_th), master_th);
2124 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2125 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
2126 &master_th->th.th_current_task->td_icvs);
2127 }
2128 KF_TRACE(if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n"
, team); }
2129 10, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n"
, team); }
;
2130
2131 /* setup the new team */
2132 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid)if ((team->t.t_master_tid) != (master_tid)) (team->t.t_master_tid
) = (master_tid)
;
2133 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons)if ((team->t.t_master_this_cons) != (master_this_cons)) (team
->t.t_master_this_cons) = (master_this_cons)
;
2134 KMP_CHECK_UPDATE(team->t.t_ident, loc)if ((team->t.t_ident) != (loc)) (team->t.t_ident) = (loc
)
;
2135 KMP_CHECK_UPDATE(team->t.t_parent, parent_team)if ((team->t.t_parent) != (parent_team)) (team->t.t_parent
) = (parent_team)
;
2136 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask)if ((team->t.t_pkfn) != (microtask)) (((team->t.t_pkfn)
)) = (((microtask)))
;
2137#if OMPT_SUPPORT1
2138 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,if ((team->t.ompt_team_info.master_return_address) != (return_address
)) (((team->t.ompt_team_info.master_return_address))) = ((
(return_address)))
2139 return_address)if ((team->t.ompt_team_info.master_return_address) != (return_address
)) (((team->t.ompt_team_info.master_return_address))) = ((
(return_address)))
;
2140#endif
2141 KMP_CHECK_UPDATE(team->t.t_invoke, invoker)if ((team->t.t_invoke) != (invoker)) (team->t.t_invoke)
= (invoker)
; // TODO move to root, maybe
2142 // TODO: parent_team->t.t_level == INT_MAX ???
2143 if (!master_th->th.th_teams_microtask || level > teams_level) {
2144 int new_level = parent_team->t.t_level + 1;
2145 KMP_CHECK_UPDATE(team->t.t_level, new_level)if ((team->t.t_level) != (new_level)) (team->t.t_level)
= (new_level)
;
2146 new_level = parent_team->t.t_active_level + 1;
2147 KMP_CHECK_UPDATE(team->t.t_active_level, new_level)if ((team->t.t_active_level) != (new_level)) (team->t.t_active_level
) = (new_level)
;
2148 } else {
2149 // AC: Do not increase parallel level at start of the teams construct
2150 int new_level = parent_team->t.t_level;
2151 KMP_CHECK_UPDATE(team->t.t_level, new_level)if ((team->t.t_level) != (new_level)) (team->t.t_level)
= (new_level)
;
2152 new_level = parent_team->t.t_active_level;
2153 KMP_CHECK_UPDATE(team->t.t_active_level, new_level)if ((team->t.t_active_level) != (new_level)) (team->t.t_active_level
) = (new_level)
;
2154 }
2155 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.sched)
;
2156 // set primary thread's schedule as new run-time schedule
2157 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched)if ((team->t.t_sched.sched) != (new_sched.sched)) (team->
t.t_sched.sched) = (new_sched.sched)
;
2158
2159 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq)if ((team->t.t_cancel_request) != (cancel_noreq)) (team->
t.t_cancel_request) = (cancel_noreq)
;
2160 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator)if ((team->t.t_def_allocator) != (master_th->th.th_def_allocator
)) (team->t.t_def_allocator) = (master_th->th.th_def_allocator
)
;
2161
2162 // Update the floating point rounding in the team if required.
2163 propagateFPControl(team);
2164#if OMPD_SUPPORT1
2165 if (ompd_state & OMPD_ENABLE_BP0x1)
2166 ompd_bp_parallel_begin();
2167#endif
2168
2169 if (__kmp_tasking_mode != tskm_immediate_exec) {
2170 // Set primary thread's task team to team's task team. Unless this is hot
2171 // team, it should be NULL.
2172 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==if (!(master_th->th.th_task_team == parent_team->t.t_task_team
[master_th->th.th_task_state])) { __kmp_debug_assert("master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 2173); }
2173 parent_team->t.t_task_team[master_th->th.th_task_state])if (!(master_th->th.th_task_team == parent_team->t.t_task_team
[master_th->th.th_task_state])) { __kmp_debug_assert("master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 2173); }
;
2174 KA_TRACE(20, ("__kmp_fork_call: Primary T#%d pushing task_team %p / team "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: Primary T#%d pushing task_team %p / team "
"%p, new task_team %p / team %p\n", __kmp_gtid_from_thread(master_th
), master_th->th.th_task_team, parent_team, team->t.t_task_team
[master_th->th.th_task_state], team); }
2175 "%p, new task_team %p / team %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: Primary T#%d pushing task_team %p / team "
"%p, new task_team %p / team %p\n", __kmp_gtid_from_thread(master_th
), master_th->th.th_task_team, parent_team, team->t.t_task_team
[master_th->th.th_task_state], team); }
2176 __kmp_gtid_from_thread(master_th),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: Primary T#%d pushing task_team %p / team "
"%p, new task_team %p / team %p\n", __kmp_gtid_from_thread(master_th
), master_th->th.th_task_team, parent_team, team->t.t_task_team
[master_th->th.th_task_state], team); }
2177 master_th->th.th_task_team, parent_team,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: Primary T#%d pushing task_team %p / team "
"%p, new task_team %p / team %p\n", __kmp_gtid_from_thread(master_th
), master_th->th.th_task_team, parent_team, team->t.t_task_team
[master_th->th.th_task_state], team); }
2178 team->t.t_task_team[master_th->th.th_task_state], team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: Primary T#%d pushing task_team %p / team "
"%p, new task_team %p / team %p\n", __kmp_gtid_from_thread(master_th
), master_th->th.th_task_team, parent_team, team->t.t_task_team
[master_th->th.th_task_state], team); }
;
2179
2180 if (active_level || master_th->th.th_task_team) {
2181 // Take a memo of primary thread's task_state
2182 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack)if (!(master_th->th.th_task_state_memo_stack)) { __kmp_debug_assert
("master_th->th.th_task_state_memo_stack", "openmp/runtime/src/kmp_runtime.cpp"
, 2182); }
;
2183 if (master_th->th.th_task_state_top >=
2184 master_th->th.th_task_state_stack_sz) { // increase size
2185 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2186 kmp_uint8 *old_stack, *new_stack;
2187 kmp_uint32 i;
2188 new_stack = (kmp_uint8 *)__kmp_allocate(new_size)___kmp_allocate((new_size), "openmp/runtime/src/kmp_runtime.cpp"
, 2188)
;
2189 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2190 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2191 }
2192 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2193 ++i) { // zero-init rest of stack
2194 new_stack[i] = 0;
2195 }
2196 old_stack = master_th->th.th_task_state_memo_stack;
2197 master_th->th.th_task_state_memo_stack = new_stack;
2198 master_th->th.th_task_state_stack_sz = new_size;
2199 __kmp_free(old_stack)___kmp_free((old_stack), "openmp/runtime/src/kmp_runtime.cpp"
, 2199)
;
2200 }
2201 // Store primary thread's task_state on stack
2202 master_th->th
2203 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2204 master_th->th.th_task_state;
2205 master_th->th.th_task_state_top++;
2206#if KMP_NESTED_HOT_TEAMS1
2207 if (master_th->th.th_hot_teams &&
2208 active_level < __kmp_hot_teams_max_level &&
2209 team == master_th->th.th_hot_teams[active_level].hot_team) {
2210 // Restore primary thread's nested state if nested hot team
2211 master_th->th.th_task_state =
2212 master_th->th
2213 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2214 } else {
2215#endif
2216 master_th->th.th_task_state = 0;
2217#if KMP_NESTED_HOT_TEAMS1
2218 }
2219#endif
2220 }
2221#if !KMP_NESTED_HOT_TEAMS1
2222 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||if (!((master_th->th.th_task_team == __null) || (team == root
->r.r_hot_team))) { __kmp_debug_assert("(master_th->th.th_task_team == __null) || (team == root->r.r_hot_team)"
, "openmp/runtime/src/kmp_runtime.cpp", 2223); }
2223 (team == root->r.r_hot_team))if (!((master_th->th.th_task_team == __null) || (team == root
->r.r_hot_team))) { __kmp_debug_assert("(master_th->th.th_task_team == __null) || (team == root->r.r_hot_team)"
, "openmp/runtime/src/kmp_runtime.cpp", 2223); }
;
2224#endif
2225 }
2226
2227 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n"
, gtid, parent_team->t.t_id, team->t.t_master_tid, team
->t.t_id, team->t.t_nproc); }
2228 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n"
, gtid, parent_team->t.t_id, team->t.t_master_tid, team
->t.t_id, team->t.t_nproc); }
2229 ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n"
, gtid, parent_team->t.t_id, team->t.t_master_tid, team
->t.t_id, team->t.t_nproc); }
2230 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n"
, gtid, parent_team->t.t_id, team->t.t_master_tid, team
->t.t_id, team->t.t_nproc); }
2231 team->t.t_nproc))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n"
, gtid, parent_team->t.t_id, team->t.t_master_tid, team
->t.t_id, team->t.t_nproc); }
;
2232 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||if (!(team != root->r.r_hot_team || (team->t.t_master_tid
== 0 && (team->t.t_parent == root->r.r_root_team
|| team->t.t_parent->t.t_serialized)))) { __kmp_debug_assert
("team != root->r.r_hot_team || (team->t.t_master_tid == 0 && (team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized))"
, "openmp/runtime/src/kmp_runtime.cpp", 2235); }
2233 (team->t.t_master_tid == 0 &&if (!(team != root->r.r_hot_team || (team->t.t_master_tid
== 0 && (team->t.t_parent == root->r.r_root_team
|| team->t.t_parent->t.t_serialized)))) { __kmp_debug_assert
("team != root->r.r_hot_team || (team->t.t_master_tid == 0 && (team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized))"
, "openmp/runtime/src/kmp_runtime.cpp", 2235); }
2234 (team->t.t_parent == root->r.r_root_team ||if (!(team != root->r.r_hot_team || (team->t.t_master_tid
== 0 && (team->t.t_parent == root->r.r_root_team
|| team->t.t_parent->t.t_serialized)))) { __kmp_debug_assert
("team != root->r.r_hot_team || (team->t.t_master_tid == 0 && (team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized))"
, "openmp/runtime/src/kmp_runtime.cpp", 2235); }
2235 team->t.t_parent->t.t_serialized)))if (!(team != root->r.r_hot_team || (team->t.t_master_tid
== 0 && (team->t.t_parent == root->r.r_root_team
|| team->t.t_parent->t.t_serialized)))) { __kmp_debug_assert
("team != root->r.r_hot_team || (team->t.t_master_tid == 0 && (team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized))"
, "openmp/runtime/src/kmp_runtime.cpp", 2235); }
;
2236 KMP_MB();
2237
2238 /* now, setup the arguments */
2239 argv = (void **)team->t.t_argv;
2240 if (ap) {
2241 for (i = argc - 1; i >= 0; --i) {
2242 void *new_argv = va_arg(kmp_va_deref(ap), void *)__builtin_va_arg((*(ap)), void *);
2243 KMP_CHECK_UPDATE(*argv, new_argv)if ((*argv) != (new_argv)) (*argv) = (new_argv);
2244 argv++;
2245 }
2246 } else {
2247 for (i = 0; i < argc; ++i) {
2248 // Get args from parent team for teams construct
2249 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i])if ((argv[i]) != (team->t.t_parent->t.t_argv[i])) (argv
[i]) = (team->t.t_parent->t.t_argv[i])
;
2250 }
2251 }
2252
2253 /* now actually fork the threads */
2254 KMP_CHECK_UPDATE(team->t.t_master_active, master_active)if ((team->t.t_master_active) != (master_active)) (team->
t.t_master_active) = (master_active)
;
2255 if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
2256 root->r.r_active = TRUE(!0);
2257
2258 __kmp_fork_team_threads(root, team, master_th, gtid, !ap);
2259 __kmp_setup_icv_copy(team, nthreads,
2260 &master_th->th.th_current_task->td_icvs, loc);
2261
2262#if OMPT_SUPPORT1
2263 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2264#endif
2265
2266 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2267
2268#if USE_ITT_BUILD1
2269 if (team->t.t_active_level == 1 // only report frames at level 1
2270 && !master_th->th.th_teams_microtask) { // not in teams construct
2271#if USE_ITT_NOTIFY1
2272 if ((__itt_frame_submit_v3_ptr__kmp_itt_frame_submit_v3_ptr__3_0 || KMP_ITT_DEBUG0) &&
2273 (__kmp_forkjoin_frames_mode == 3 ||
2274 __kmp_forkjoin_frames_mode == 1)) {
2275 kmp_uint64 tmp_time = 0;
2276 if (__itt_get_timestamp_ptr__kmp_itt_get_timestamp_ptr__3_0)
2277 tmp_time = __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0();
2278 // Internal fork - report frame begin
2279 master_th->th.th_frame_time = tmp_time;
2280 if (__kmp_forkjoin_frames_mode == 3)
2281 team->t.t_region_time = tmp_time;
2282 } else
2283// only one notification scheme (either "submit" or "forking/joined", not both)
2284#endif /* USE_ITT_NOTIFY */
2285 if ((__itt_frame_begin_v3_ptr__kmp_itt_frame_begin_v3_ptr__3_0 || KMP_ITT_DEBUG0) &&
2286 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2287 // Mark start of "parallel" region for Intel(R) VTune(TM) analyzer.
2288 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2289 }
2290 }
2291#endif /* USE_ITT_BUILD */
2292
2293 /* now go on and do the work */
2294 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team)if (!(team == __kmp_threads[gtid]->th.th_team)) { __kmp_debug_assert
("team == __kmp_threads[gtid]->th.th_team", "openmp/runtime/src/kmp_runtime.cpp"
, 2294); }
;
2295 KMP_MB();
2296 KF_TRACE(10,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n"
, root, team, master_th, gtid); }
2297 ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n"
, root, team, master_th, gtid); }
2298 root, team, master_th, gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n"
, root, team, master_th, gtid); }
;
2299
2300#if USE_ITT_BUILD1
2301 if (__itt_stack_caller_create_ptr__kmp_itt_stack_caller_create_ptr__3_0) {
2302 // create new stack stitching id before entering fork barrier
2303 if (!enter_teams) {
2304 KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL)if (!(team->t.t_stack_id == __null)) { __kmp_debug_assert(
"team->t.t_stack_id == __null", "openmp/runtime/src/kmp_runtime.cpp"
, 2304); }
;
2305 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2306 } else if (parent_team->t.t_serialized) {
2307 // keep stack stitching id in the serialized parent_team;
2308 // current team will be used for parallel inside the teams;
2309 // if parent_team is active, then it already keeps stack stitching id
2310 // for the league of teams
2311 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL)if (!(parent_team->t.t_stack_id == __null)) { __kmp_debug_assert
("parent_team->t.t_stack_id == __null", "openmp/runtime/src/kmp_runtime.cpp"
, 2311); }
;
2312 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
2313 }
2314 }
2315#endif /* USE_ITT_BUILD */
2316
2317 // AC: skip __kmp_internal_fork at teams construct, let only primary
2318 // threads execute
2319 if (ap) {
2320 __kmp_internal_fork(loc, gtid, team);
2321 KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_internal_fork : after : root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, team, master_th, gtid); }
2322 "master_th=%p, gtid=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_internal_fork : after : root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, team, master_th, gtid); }
2323 root, team, master_th, gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_internal_fork : after : root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, team, master_th, gtid); }
;
2324 }
2325
2326 if (call_context == fork_context_gnu) {
2327 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: parallel exit T#%d\n"
, gtid); }
;
2328 return TRUE(!0);
2329 }
2330
2331 /* Invoke microtask for PRIMARY thread */
2332 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n"
, gtid, team->t.t_id, team->t.t_pkfn); }
2333 team->t.t_id, team->t.t_pkfn))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n"
, gtid, team->t.t_id, team->t.t_pkfn); }
;
2334 } // END of timer KMP_fork_call block
2335
2336#if KMP_STATS_ENABLED0
2337 // If beginning a teams construct, then change thread state
2338 stats_state_e previous_state = KMP_GET_THREAD_STATE()((void)0);
2339 if (!ap) {
2340 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION)((void)0);
2341 }
2342#endif
2343
2344 if (!team->t.t_invoke(gtid)) {
2345 KMP_ASSERT2(0, "cannot invoke microtask for PRIMARY thread")if (!(0)) { __kmp_debug_assert(("cannot invoke microtask for PRIMARY thread"
), "openmp/runtime/src/kmp_runtime.cpp", 2345); }
;
2346 }
2347
2348#if KMP_STATS_ENABLED0
2349 // If was beginning of a teams construct, then reset thread state
2350 if (!ap) {
2351 KMP_SET_THREAD_STATE(previous_state)((void)0);
2352 }
2353#endif
2354
2355 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n"
, gtid, team->t.t_id, team->t.t_pkfn); }
2356 team->t.t_id, team->t.t_pkfn))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n"
, gtid, team->t.t_id, team->t.t_pkfn); }
;
2357 KMP_MB(); /* Flush all pending memory write invalidates. */
2358
2359 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: parallel exit T#%d\n"
, gtid); }
;
2360#if OMPT_SUPPORT1
2361 if (ompt_enabled.enabled) {
2362 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2363 }
2364#endif
2365
2366 return TRUE(!0);
2367}
2368
2369#if OMPT_SUPPORT1
2370static inline void __kmp_join_restore_state(kmp_info_t *thread,
2371 kmp_team_t *team) {
2372 // restore state outside the region
2373 thread->th.ompt_thread_info.state =
2374 ((team->t.t_serialized) ? ompt_state_work_serial
2375 : ompt_state_work_parallel);
2376}
2377
2378static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread,
2379 kmp_team_t *team, ompt_data_t *parallel_data,
2380 int flags, void *codeptr) {
2381 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2382 if (ompt_enabled.ompt_callback_parallel_end) {
2383 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)ompt_callback_parallel_end_callback(
2384 parallel_data, &(task_info->task_data), flags, codeptr);
2385 }
2386
2387 task_info->frame.enter_frame = ompt_data_none{0};
2388 __kmp_join_restore_state(thread, team);
2389}
2390#endif
2391
2392void __kmp_join_call(ident_t *loc, int gtid
2393#if OMPT_SUPPORT1
2394 ,
2395 enum fork_context_e fork_context
2396#endif
2397 ,
2398 int exit_teams) {
2399 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call)((void)0);
2400 kmp_team_t *team;
2401 kmp_team_t *parent_team;
2402 kmp_info_t *master_th;
2403 kmp_root_t *root;
2404 int master_active;
2405
2406 KA_TRACE(20, ("__kmp_join_call: enter T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: enter T#%d\n"
, gtid); }
;
2407
2408 /* setup current data */
2409 master_th = __kmp_threads[gtid];
2410 root = master_th->th.th_root;
2411 team = master_th->th.th_team;
2412 parent_team = team->t.t_parent;
2413
2414 master_th->th.th_ident = loc;
2415
2416#if OMPT_SUPPORT1
2417 void *team_microtask = (void *)team->t.t_pkfn;
2418 // For GOMP interface with serialized parallel, need the
2419 // __kmpc_end_serialized_parallel to call hooks for OMPT end-implicit-task
2420 // and end-parallel events.
2421 if (ompt_enabled.enabled &&
2422 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2423 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2424 }
2425#endif
2426
2427#if KMP_DEBUG1
2428 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2429 KA_TRACE(20, ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
"th_task_team = %p\n", __kmp_gtid_from_thread(master_th), team
, team->t.t_task_team[master_th->th.th_task_state], master_th
->th.th_task_team); }
2430 "th_task_team = %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
"th_task_team = %p\n", __kmp_gtid_from_thread(master_th), team
, team->t.t_task_team[master_th->th.th_task_state], master_th
->th.th_task_team); }
2431 __kmp_gtid_from_thread(master_th), team,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
"th_task_team = %p\n", __kmp_gtid_from_thread(master_th), team
, team->t.t_task_team[master_th->th.th_task_state], master_th
->th.th_task_team); }
2432 team->t.t_task_team[master_th->th.th_task_state],if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
"th_task_team = %p\n", __kmp_gtid_from_thread(master_th), team
, team->t.t_task_team[master_th->th.th_task_state], master_th
->th.th_task_team); }
2433 master_th->th.th_task_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
"th_task_team = %p\n", __kmp_gtid_from_thread(master_th), team
, team->t.t_task_team[master_th->th.th_task_state], master_th
->th.th_task_team); }
;
2434 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==if (!(master_th->th.th_task_team == team->t.t_task_team
[master_th->th.th_task_state])) { __kmp_debug_assert("master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 2435); }
2435 team->t.t_task_team[master_th->th.th_task_state])if (!(master_th->th.th_task_team == team->t.t_task_team
[master_th->th.th_task_state])) { __kmp_debug_assert("master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 2435); }
;
2436 }
2437#endif
2438
2439 if (team->t.t_serialized) {
2440 if (master_th->th.th_teams_microtask) {
2441 // We are in teams construct
2442 int level = team->t.t_level;
2443 int tlevel = master_th->th.th_teams_level;
2444 if (level == tlevel) {
2445 // AC: we haven't incremented it earlier at start of teams construct,
2446 // so do it here - at the end of teams construct
2447 team->t.t_level++;
2448 } else if (level == tlevel + 1) {
2449 // AC: we are exiting parallel inside teams, need to increment
2450 // serialization in order to restore it in the next call to
2451 // __kmpc_end_serialized_parallel
2452 team->t.t_serialized++;
2453 }
2454 }
2455 __kmpc_end_serialized_parallel(loc, gtid);
2456
2457#if OMPT_SUPPORT1
2458 if (ompt_enabled.enabled) {
2459 if (fork_context == fork_context_gnu) {
2460 __ompt_lw_taskteam_unlink(master_th);
2461 }
2462 __kmp_join_restore_state(master_th, parent_team);
2463 }
2464#endif
2465
2466 return;
2467 }
2468
2469 master_active = team->t.t_master_active;
2470
2471 if (!exit_teams) {
2472 // AC: No barrier for internal teams at exit from teams construct.
2473 // But there is barrier for external team (league).
2474 __kmp_internal_join(loc, gtid, team);
2475#if USE_ITT_BUILD1
2476 if (__itt_stack_caller_create_ptr__kmp_itt_stack_caller_create_ptr__3_0) {
2477 KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL)if (!(team->t.t_stack_id != __null)) { __kmp_debug_assert(
"team->t.t_stack_id != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 2477); }
;
2478 // destroy the stack stitching id after join barrier
2479 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2480 team->t.t_stack_id = NULL__null;
2481 }
2482#endif
2483 } else {
2484 master_th->th.th_task_state =
2485 0; // AC: no tasking in teams (out of any parallel)
2486#if USE_ITT_BUILD1
2487 if (__itt_stack_caller_create_ptr__kmp_itt_stack_caller_create_ptr__3_0 && parent_team->t.t_serialized) {
2488 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL)if (!(parent_team->t.t_stack_id != __null)) { __kmp_debug_assert
("parent_team->t.t_stack_id != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 2488); }
;
2489 // destroy the stack stitching id on exit from the teams construct
2490 // if parent_team is active, then the id will be destroyed later on
2491 // by master of the league of teams
2492 __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
2493 parent_team->t.t_stack_id = NULL__null;
2494 }
2495#endif
2496 }
2497
2498 KMP_MB();
2499
2500#if OMPT_SUPPORT1
2501 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2502 void *codeptr = team->t.ompt_team_info.master_return_address;
2503#endif
2504
2505#if USE_ITT_BUILD1
2506 // Mark end of "parallel" region for Intel(R) VTune(TM) analyzer.
2507 if (team->t.t_active_level == 1 &&
2508 (!master_th->th.th_teams_microtask || /* not in teams construct */
2509 master_th->th.th_teams_size.nteams == 1)) {
2510 master_th->th.th_ident = loc;
2511 // only one notification scheme (either "submit" or "forking/joined", not
2512 // both)
2513 if ((__itt_frame_submit_v3_ptr__kmp_itt_frame_submit_v3_ptr__3_0 || KMP_ITT_DEBUG0) &&
2514 __kmp_forkjoin_frames_mode == 3)
2515 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2516 master_th->th.th_frame_time, 0, loc,
2517 master_th->th.th_team_nproc, 1);
2518 else if ((__itt_frame_end_v3_ptr__kmp_itt_frame_end_v3_ptr__3_0 || KMP_ITT_DEBUG0) &&
2519 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2520 __kmp_itt_region_joined(gtid);
2521 } // active_level == 1
2522#endif /* USE_ITT_BUILD */
2523
2524#if KMP_AFFINITY_SUPPORTED1
2525 if (!exit_teams) {
2526 // Restore master thread's partition.
2527 master_th->th.th_first_place = team->t.t_first_place;
2528 master_th->th.th_last_place = team->t.t_last_place;
2529 }
2530#endif // KMP_AFFINITY_SUPPORTED
2531
2532 if (master_th->th.th_teams_microtask && !exit_teams &&
2533 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2534 team->t.t_level == master_th->th.th_teams_level + 1) {
2535// AC: We need to leave the team structure intact at the end of parallel
2536// inside the teams construct, so that at the next parallel same (hot) team
2537// works, only adjust nesting levels
2538#if OMPT_SUPPORT1
2539 ompt_data_t ompt_parallel_data = ompt_data_none{0};
2540 if (ompt_enabled.enabled) {
2541 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2542 if (ompt_enabled.ompt_callback_implicit_task) {
2543 int ompt_team_size = team->t.t_nproc;
2544 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
2545 ompt_scope_end, NULL__null, &(task_info->task_data), ompt_team_size,
2546 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, ompt_task_implicit);
2547 }
2548 task_info->frame.exit_frame = ompt_data_none{0};
2549 task_info->task_data = ompt_data_none{0};
2550 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
;
2551 __ompt_lw_taskteam_unlink(master_th);
2552 }
2553#endif
2554 /* Decrement our nested depth level */
2555 team->t.t_level--;
2556 team->t.t_active_level--;
2557 KMP_ATOMIC_DEC(&root->r.r_in_parallel)(&root->r.r_in_parallel)->fetch_sub(1, std::memory_order_acq_rel
)
;
2558
2559 // Restore number of threads in the team if needed. This code relies on
2560 // the proper adjustment of th_teams_size.nth after the fork in
2561 // __kmp_teams_master on each teams primary thread in the case that
2562 // __kmp_reserve_threads reduced it.
2563 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2564 int old_num = master_th->th.th_team_nproc;
2565 int new_num = master_th->th.th_teams_size.nth;
2566 kmp_info_t **other_threads = team->t.t_threads;
2567 team->t.t_nproc = new_num;
2568 for (int i = 0; i < old_num; ++i) {
2569 other_threads[i]->th.th_team_nproc = new_num;
2570 }
2571 // Adjust states of non-used threads of the team
2572 for (int i = old_num; i < new_num; ++i) {
2573 // Re-initialize thread's barrier data.
2574 KMP_DEBUG_ASSERT(other_threads[i])if (!(other_threads[i])) { __kmp_debug_assert("other_threads[i]"
, "openmp/runtime/src/kmp_runtime.cpp", 2574); }
;
2575 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2576 for (int b = 0; b < bs_last_barrier; ++b) {
2577 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2578 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG)if (!(balign[b].bb.wait_flag != 2)) { __kmp_debug_assert("balign[b].bb.wait_flag != 2"
, "openmp/runtime/src/kmp_runtime.cpp", 2578); }
;
2579#if USE_DEBUGGER0
2580 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2581#endif
2582 }
2583 if (__kmp_tasking_mode != tskm_immediate_exec) {
2584 // Synchronize thread's task state
2585 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2586 }
2587 }
2588 }
2589
2590#if OMPT_SUPPORT1
2591 if (ompt_enabled.enabled) {
2592 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2593 OMPT_INVOKER(fork_context)((fork_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
| ompt_parallel_team, codeptr);
2594 }
2595#endif
2596
2597 return;
2598 }
2599
2600 /* do cleanup and restore the parent team */
2601 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2602 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2603
2604 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2605
2606 /* jc: The following lock has instructions with REL and ACQ semantics,
2607 separating the parallel user code called in this parallel region
2608 from the serial user code called after this function returns. */
2609 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2610
2611 if (!master_th->th.th_teams_microtask ||
2612 team->t.t_level > master_th->th.th_teams_level) {
2613 /* Decrement our nested depth level */
2614 KMP_ATOMIC_DEC(&root->r.r_in_parallel)(&root->r.r_in_parallel)->fetch_sub(1, std::memory_order_acq_rel
)
;
2615 }
2616 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0)if (!(root->r.r_in_parallel >= 0)) { __kmp_debug_assert
("root->r.r_in_parallel >= 0", "openmp/runtime/src/kmp_runtime.cpp"
, 2616); }
;
2617
2618#if OMPT_SUPPORT1
2619 if (ompt_enabled.enabled) {
2620 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2621 if (ompt_enabled.ompt_callback_implicit_task) {
2622 int flags = (team_microtask == (void *)__kmp_teams_master)
2623 ? ompt_task_initial
2624 : ompt_task_implicit;
2625 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2626 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
2627 ompt_scope_end, NULL__null, &(task_info->task_data), ompt_team_size,
2628 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, flags);
2629 }
2630 task_info->frame.exit_frame = ompt_data_none{0};
2631 task_info->task_data = ompt_data_none{0};
2632 }
2633#endif
2634
2635 KF_TRACE(10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n"
, 0, master_th, team); }
2636 master_th, team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n"
, 0, master_th, team); }
;
2637 __kmp_pop_current_task_from_thread(master_th);
2638
2639 master_th->th.th_def_allocator = team->t.t_def_allocator;
2640
2641#if OMPD_SUPPORT1
2642 if (ompd_state & OMPD_ENABLE_BP0x1)
2643 ompd_bp_parallel_end();
2644#endif
2645 updateHWFPControl(team);
2646
2647 if (root->r.r_active != master_active)
2648 root->r.r_active = master_active;
2649
2650 __kmp_free_team(root, team USE_NESTED_HOT_ARG(, master_th
2651 master_th), master_th); // this will free worker threads
2652
2653 /* this race was fun to find. make sure the following is in the critical
2654 region otherwise assertions may fail occasionally since the old team may be
2655 reallocated and the hierarchy appears inconsistent. it is actually safe to
2656 run and won't cause any bugs, but will cause those assertion failures. it's
2657 only one deref&assign so might as well put this in the critical region */
2658 master_th->th.th_team = parent_team;
2659 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2660 master_th->th.th_team_master = parent_team->t.t_threads[0];
2661 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2662
2663 /* restore serialized team, if need be */
2664 if (parent_team->t.t_serialized &&
2665 parent_team != master_th->th.th_serial_team &&
2666 parent_team != root->r.r_root_team) {
2667 __kmp_free_team(root,
2668 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL), __null);
2669 master_th->th.th_serial_team = parent_team;
2670 }
2671
2672 if (__kmp_tasking_mode != tskm_immediate_exec) {
2673 if (master_th->th.th_task_state_top >
2674 0) { // Restore task state from memo stack
2675 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack)if (!(master_th->th.th_task_state_memo_stack)) { __kmp_debug_assert
("master_th->th.th_task_state_memo_stack", "openmp/runtime/src/kmp_runtime.cpp"
, 2675); }
;
2676 // Remember primary thread's state if we re-use this nested hot team
2677 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2678 master_th->th.th_task_state;
2679 --master_th->th.th_task_state_top; // pop
2680 // Now restore state at this level
2681 master_th->th.th_task_state =
2682 master_th->th
2683 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2684 } else if (team != root->r.r_hot_team) {
2685 // Reset the task state of primary thread if we are not hot team because
2686 // in this case all the worker threads will be free, and their task state
2687 // will be reset. If not reset the primary's, the task state will be
2688 // inconsistent.
2689 master_th->th.th_task_state = 0;
2690 }
2691 // Copy the task team from the parent team to the primary thread
2692 master_th->th.th_task_team =
2693 parent_team->t.t_task_team[master_th->th.th_task_state];
2694 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n"
, __kmp_gtid_from_thread(master_th), master_th->th.th_task_team
, parent_team); }
2695 ("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n"
, __kmp_gtid_from_thread(master_th), master_th->th.th_task_team
, parent_team); }
2696 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n"
, __kmp_gtid_from_thread(master_th), master_th->th.th_task_team
, parent_team); }
2697 parent_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n"
, __kmp_gtid_from_thread(master_th), master_th->th.th_task_team
, parent_team); }
;
2698 }
2699
2700 // TODO: GEH - cannot do this assertion because root thread not set up as
2701 // executing
2702 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 );
2703 master_th->th.th_current_task->td_flags.executing = 1;
2704
2705 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2706
2707#if KMP_AFFINITY_SUPPORTED1
2708 if (master_th->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
2709 __kmp_reset_root_init_mask(gtid);
2710 }
2711#endif
2712#if OMPT_SUPPORT1
2713 int flags =
2714 OMPT_INVOKER(fork_context)((fork_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
|
2715 ((team_microtask == (void *)__kmp_teams_master) ? ompt_parallel_league
2716 : ompt_parallel_team);
2717 if (ompt_enabled.enabled) {
2718 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2719 codeptr);
2720 }
2721#endif
2722
2723 KMP_MB();
2724 KA_TRACE(20, ("__kmp_join_call: exit T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: exit T#%d\n"
, gtid); }
;
2725}
2726
2727/* Check whether we should push an internal control record onto the
2728 serial team stack. If so, do it. */
2729void __kmp_save_internal_controls(kmp_info_t *thread) {
2730
2731 if (thread->th.th_team != thread->th.th_serial_team) {
2732 return;
2733 }
2734 if (thread->th.th_team->t.t_serialized > 1) {
2735 int push = 0;
2736
2737 if (thread->th.th_team->t.t_control_stack_top == NULL__null) {
2738 push = 1;
2739 } else {
2740 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2741 thread->th.th_team->t.t_serialized) {
2742 push = 1;
2743 }
2744 }
2745 if (push) { /* push a record on the serial team's stack */
2746 kmp_internal_control_t *control =
2747 (kmp_internal_control_t *)__kmp_allocate(___kmp_allocate((sizeof(kmp_internal_control_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 2748)
2748 sizeof(kmp_internal_control_t))___kmp_allocate((sizeof(kmp_internal_control_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 2748)
;
2749
2750 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2751
2752 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2753
2754 control->next = thread->th.th_team->t.t_control_stack_top;
2755 thread->th.th_team->t.t_control_stack_top = control;
2756 }
2757 }
2758}
2759
2760/* Changes set_nproc */
2761void __kmp_set_num_threads(int new_nth, int gtid) {
2762 kmp_info_t *thread;
2763 kmp_root_t *root;
2764
2765 KF_TRACE(10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_num_threads: new __kmp_nth = %d\n"
, new_nth); }
;
2766 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 2766); }
;
2767
2768 if (new_nth < 1)
2769 new_nth = 1;
2770 else if (new_nth > __kmp_max_nth)
2771 new_nth = __kmp_max_nth;
2772
2773 KMP_COUNT_VALUE(OMP_set_numthreads, new_nth)((void)0);
2774 thread = __kmp_threads[gtid];
2775 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2776 return; // nothing to do
2777
2778 __kmp_save_internal_controls(thread);
2779
2780 set__nproc(thread, new_nth)(((thread)->th.th_current_task->td_icvs.nproc) = (new_nth
))
;
2781
2782 // If this omp_set_num_threads() call will cause the hot team size to be
2783 // reduced (in the absence of a num_threads clause), then reduce it now,
2784 // rather than waiting for the next parallel region.
2785 root = thread->th.th_root;
2786 if (__kmp_init_parallel && (!root->r.r_active) &&
2787 (root->r.r_hot_team->t.t_nproc > new_nth)
2788#if KMP_NESTED_HOT_TEAMS1
2789 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2790#endif
2791 ) {
2792 kmp_team_t *hot_team = root->r.r_hot_team;
2793 int f;
2794
2795 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2796
2797 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2798 __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
2799 }
2800 // Release the extra threads we don't need any more.
2801 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2802 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL)if (!(hot_team->t.t_threads[f] != __null)) { __kmp_debug_assert
("hot_team->t.t_threads[f] != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 2802); }
;
2803 if (__kmp_tasking_mode != tskm_immediate_exec) {
2804 // When decreasing team size, threads no longer in the team should unref
2805 // task team.
2806 hot_team->t.t_threads[f]->th.th_task_team = NULL__null;
2807 }
2808 __kmp_free_thread(hot_team->t.t_threads[f]);
2809 hot_team->t.t_threads[f] = NULL__null;
2810 }
2811 hot_team->t.t_nproc = new_nth;
2812#if KMP_NESTED_HOT_TEAMS1
2813 if (thread->th.th_hot_teams) {
2814 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team)if (!(hot_team == thread->th.th_hot_teams[0].hot_team)) { __kmp_debug_assert
("hot_team == thread->th.th_hot_teams[0].hot_team", "openmp/runtime/src/kmp_runtime.cpp"
, 2814); }
;
2815 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2816 }
2817#endif
2818
2819 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2820 hot_team->t.b->update_num_threads(new_nth);
2821 __kmp_add_threads_to_team(hot_team, new_nth);
2822 }
2823
2824 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2825
2826 // Update the t_nproc field in the threads that are still active.
2827 for (f = 0; f < new_nth; f++) {
2828 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL)if (!(hot_team->t.t_threads[f] != __null)) { __kmp_debug_assert
("hot_team->t.t_threads[f] != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 2828); }
;
2829 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2830 }
2831 // Special flag in case omp_set_num_threads() call
2832 hot_team->t.t_size_changed = -1;
2833 }
2834}
2835
2836/* Changes max_active_levels */
2837void __kmp_set_max_active_levels(int gtid, int max_active_levels) {
2838 kmp_info_t *thread;
2839
2840 KF_TRACE(10, ("__kmp_set_max_active_levels: new max_active_levels for thread "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: new max_active_levels for thread "
"%d = (%d)\n", gtid, max_active_levels); }
2841 "%d = (%d)\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: new max_active_levels for thread "
"%d = (%d)\n", gtid, max_active_levels); }
2842 gtid, max_active_levels))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: new max_active_levels for thread "
"%d = (%d)\n", gtid, max_active_levels); }
;
2843 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 2843); }
;
2844
2845 // validate max_active_levels
2846 if (max_active_levels < 0) {
2847 KMP_WARNING(ActiveLevelsNegative, max_active_levels)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_ActiveLevelsNegative
, max_active_levels), __kmp_msg_null)
;
2848 // We ignore this call if the user has specified a negative value.
2849 // The current setting won't be changed. The last valid setting will be
2850 // used. A warning will be issued (if warnings are allowed as controlled by
2851 // the KMP_WARNINGS env var).
2852 KF_TRACE(10, ("__kmp_set_max_active_levels: the call is ignored: new "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: the call is ignored: new "
"max_active_levels for thread %d = (%d)\n", gtid, max_active_levels
); }
2853 "max_active_levels for thread %d = (%d)\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: the call is ignored: new "
"max_active_levels for thread %d = (%d)\n", gtid, max_active_levels
); }
2854 gtid, max_active_levels))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: the call is ignored: new "
"max_active_levels for thread %d = (%d)\n", gtid, max_active_levels
); }
;
2855 return;
2856 }
2857 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT2147483647) {
2858 // it's OK, the max_active_levels is within the valid range: [ 0;
2859 // KMP_MAX_ACTIVE_LEVELS_LIMIT ]
2860 // We allow a zero value. (implementation defined behavior)
2861 } else {
2862 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_ActiveLevelsExceedLimit
, max_active_levels, 2147483647), __kmp_msg_null)
2863 KMP_MAX_ACTIVE_LEVELS_LIMIT)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_ActiveLevelsExceedLimit
, max_active_levels, 2147483647), __kmp_msg_null)
;
2864 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT2147483647;
2865 // Current upper limit is MAX_INT. (implementation defined behavior)
2866 // If the input exceeds the upper limit, we correct the input to be the
2867 // upper limit. (implementation defined behavior)
2868 // Actually, the flow should never get here until we use MAX_INT limit.
2869 }
2870 KF_TRACE(10, ("__kmp_set_max_active_levels: after validation: new "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: after validation: new "
"max_active_levels for thread %d = (%d)\n", gtid, max_active_levels
); }
2871 "max_active_levels for thread %d = (%d)\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: after validation: new "
"max_active_levels for thread %d = (%d)\n", gtid, max_active_levels
); }
2872 gtid, max_active_levels))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: after validation: new "
"max_active_levels for thread %d = (%d)\n", gtid, max_active_levels
); }
;
2873
2874 thread = __kmp_threads[gtid];
2875
2876 __kmp_save_internal_controls(thread);
2877
2878 set__max_active_levels(thread, max_active_levels)(((thread)->th.th_current_task->td_icvs.max_active_levels
) = (max_active_levels))
;
2879}
2880
2881/* Gets max_active_levels */
2882int __kmp_get_max_active_levels(int gtid) {
2883 kmp_info_t *thread;
2884
2885 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d\n", gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_max_active_levels: thread %d\n"
, gtid); }
;
2886 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 2886); }
;
2887
2888 thread = __kmp_threads[gtid];
2889 KMP_DEBUG_ASSERT(thread->th.th_current_task)if (!(thread->th.th_current_task)) { __kmp_debug_assert("thread->th.th_current_task"
, "openmp/runtime/src/kmp_runtime.cpp", 2889); }
;
2890 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d, curtask=%p, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_max_active_levels: thread %d, curtask=%p, "
"curtask_maxaclevel=%d\n", gtid, thread->th.th_current_task
, thread->th.th_current_task->td_icvs.max_active_levels
); }
2891 "curtask_maxaclevel=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_max_active_levels: thread %d, curtask=%p, "
"curtask_maxaclevel=%d\n", gtid, thread->th.th_current_task
, thread->th.th_current_task->td_icvs.max_active_levels
); }
2892 gtid, thread->th.th_current_task,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_max_active_levels: thread %d, curtask=%p, "
"curtask_maxaclevel=%d\n", gtid, thread->th.th_current_task
, thread->th.th_current_task->td_icvs.max_active_levels
); }
2893 thread->th.th_current_task->td_icvs.max_active_levels))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_max_active_levels: thread %d, curtask=%p, "
"curtask_maxaclevel=%d\n", gtid, thread->th.th_current_task
, thread->th.th_current_task->td_icvs.max_active_levels
); }
;
2894 return thread->th.th_current_task->td_icvs.max_active_levels;
2895}
2896
2897// nteams-var per-device ICV
2898void __kmp_set_num_teams(int num_teams) {
2899 if (num_teams > 0)
2900 __kmp_nteams = num_teams;
2901}
2902int __kmp_get_max_teams(void) { return __kmp_nteams; }
2903// teams-thread-limit-var per-device ICV
2904void __kmp_set_teams_thread_limit(int limit) {
2905 if (limit > 0)
2906 __kmp_teams_thread_limit = limit;
2907}
2908int __kmp_get_teams_thread_limit(void) { return __kmp_teams_thread_limit; }
2909
2910KMP_BUILD_ASSERT(sizeof(kmp_sched_t) == sizeof(int))static_assert(sizeof(kmp_sched_t) == sizeof(int), "Build condition error"
)
;
2911KMP_BUILD_ASSERT(sizeof(enum sched_type) == sizeof(int))static_assert(sizeof(enum sched_type) == sizeof(int), "Build condition error"
)
;
2912
2913/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */
2914void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) {
2915 kmp_info_t *thread;
2916 kmp_sched_t orig_kind;
2917 // kmp_team_t *team;
2918
2919 KF_TRACE(10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n"
, gtid, (int)kind, chunk); }
2920 gtid, (int)kind, chunk))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n"
, gtid, (int)kind, chunk); }
;
2921 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 2921); }
;
2922
2923 // Check if the kind parameter is valid, correct if needed.
2924 // Valid parameters should fit in one of two intervals - standard or extended:
2925 // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper>
2926 // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103
2927 orig_kind = kind;
2928 kind = __kmp_sched_without_mods(kind);
2929
2930 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2931 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2932 // TODO: Hint needs attention in case we change the default schedule.
2933 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind)__kmp_msg_format(kmp_i18n_msg_ScheduleKindOutOfRange, kind),
2934 KMP_HNT(DefaultScheduleKindUsed, "static, no chunk")__kmp_msg_format(kmp_i18n_hnt_DefaultScheduleKindUsed, "static, no chunk"
)
,
2935 __kmp_msg_null);
2936 kind = kmp_sched_default;
2937 chunk = 0; // ignore chunk value in case of bad kind
2938 }
2939
2940 thread = __kmp_threads[gtid];
2941
2942 __kmp_save_internal_controls(thread);
2943
2944 if (kind < kmp_sched_upper_std) {
2945 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK1) {
2946 // differ static chunked vs. unchunked: chunk should be invalid to
2947 // indicate unchunked schedule (which is the default)
2948 thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static;
2949 } else {
2950 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2951 __kmp_sch_map[kind - kmp_sched_lower - 1];
2952 }
2953 } else {
2954 // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2955 // kmp_sched_lower - 2 ];
2956 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2957 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2958 kmp_sched_lower - 2];
2959 }
2960 __kmp_sched_apply_mods_intkind(
2961 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2962 if (kind == kmp_sched_auto || chunk < 1) {
2963 // ignore parameter chunk for schedule auto
2964 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK1;
2965 } else {
2966 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2967 }
2968}
2969
2970/* Gets def_sched_var ICV values */
2971void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk) {
2972 kmp_info_t *thread;
2973 enum sched_type th_type;
2974
2975 KF_TRACE(10, ("__kmp_get_schedule: thread %d\n", gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_schedule: thread %d\n"
, gtid); }
;
2976 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 2976); }
;
2977
2978 thread = __kmp_threads[gtid];
2979
2980 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2981 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)(enum sched_type)( (th_type) & ~(kmp_sch_modifier_nonmonotonic
| kmp_sch_modifier_monotonic))
) {
2982 case kmp_sch_static:
2983 case kmp_sch_static_greedy:
2984 case kmp_sch_static_balanced:
2985 *kind = kmp_sched_static;
2986 __kmp_sched_apply_mods_stdkind(kind, th_type);
2987 *chunk = 0; // chunk was not set, try to show this fact via zero value
2988 return;
2989 case kmp_sch_static_chunked:
2990 *kind = kmp_sched_static;
2991 break;
2992 case kmp_sch_dynamic_chunked:
2993 *kind = kmp_sched_dynamic;
2994 break;
2995 case kmp_sch_guided_chunked:
2996 case kmp_sch_guided_iterative_chunked:
2997 case kmp_sch_guided_analytical_chunked:
2998 *kind = kmp_sched_guided;
2999 break;
3000 case kmp_sch_auto:
3001 *kind = kmp_sched_auto;
3002 break;
3003 case kmp_sch_trapezoidal:
3004 *kind = kmp_sched_trapezoidal;
3005 break;
3006#if KMP_STATIC_STEAL_ENABLED1
3007 case kmp_sch_static_steal:
3008 *kind = kmp_sched_static_steal;
3009 break;
3010#endif
3011 default:
3012 KMP_FATAL(UnknownSchedulingType, th_type)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_UnknownSchedulingType
, th_type), __kmp_msg_null)
;
3013 }
3014
3015 __kmp_sched_apply_mods_stdkind(kind, th_type);
3016 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
3017}
3018
3019int __kmp_get_ancestor_thread_num(int gtid, int level) {
3020
3021 int ii, dd;
3022 kmp_team_t *team;
3023 kmp_info_t *thr;
3024
3025 KF_TRACE(10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_ancestor_thread_num: thread %d %d\n"
, gtid, level); }
;
3026 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 3026); }
;
3027
3028 // validate level
3029 if (level == 0)
3030 return 0;
3031 if (level < 0)
3032 return -1;
3033 thr = __kmp_threads[gtid];
3034 team = thr->th.th_team;
3035 ii = team->t.t_level;
3036 if (level > ii)
3037 return -1;
3038
3039 if (thr->th.th_teams_microtask) {
3040 // AC: we are in teams region where multiple nested teams have same level
3041 int tlevel = thr->th.th_teams_level; // the level of the teams construct
3042 if (level <=
3043 tlevel) { // otherwise usual algorithm works (will not touch the teams)
3044 KMP_DEBUG_ASSERT(ii >= tlevel)if (!(ii >= tlevel)) { __kmp_debug_assert("ii >= tlevel"
, "openmp/runtime/src/kmp_runtime.cpp", 3044); }
;
3045 // AC: As we need to pass by the teams league, we need to artificially
3046 // increase ii
3047 if (ii == tlevel) {
3048 ii += 2; // three teams have same level
3049 } else {
3050 ii++; // two teams have same level
3051 }
3052 }
3053 }
3054
3055 if (ii == level)
3056 return __kmp_tid_from_gtid(gtid);
3057
3058 dd = team->t.t_serialized;
3059 level++;
3060 while (ii > level) {
3061 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3062 }
3063 if ((team->t.t_serialized) && (!dd)) {
3064 team = team->t.t_parent;
3065 continue;
3066 }
3067 if (ii > level) {
3068 team = team->t.t_parent;
3069 dd = team->t.t_serialized;
3070 ii--;
3071 }
3072 }
3073
3074 return (dd > 1) ? (0) : (team->t.t_master_tid);
3075}
3076
3077int __kmp_get_team_size(int gtid, int level) {
3078
3079 int ii, dd;
3080 kmp_team_t *team;
3081 kmp_info_t *thr;
3082
3083 KF_TRACE(10, ("__kmp_get_team_size: thread %d %d\n", gtid, level))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_team_size: thread %d %d\n"
, gtid, level); }
;
3084 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 3084); }
;
3085
3086 // validate level
3087 if (level == 0)
3088 return 1;
3089 if (level < 0)
3090 return -1;
3091 thr = __kmp_threads[gtid];
3092 team = thr->th.th_team;
3093 ii = team->t.t_level;
3094 if (level > ii)
3095 return -1;
3096
3097 if (thr->th.th_teams_microtask) {
3098 // AC: we are in teams region where multiple nested teams have same level
3099 int tlevel = thr->th.th_teams_level; // the level of the teams construct
3100 if (level <=
3101 tlevel) { // otherwise usual algorithm works (will not touch the teams)
3102 KMP_DEBUG_ASSERT(ii >= tlevel)if (!(ii >= tlevel)) { __kmp_debug_assert("ii >= tlevel"
, "openmp/runtime/src/kmp_runtime.cpp", 3102); }
;
3103 // AC: As we need to pass by the teams league, we need to artificially
3104 // increase ii
3105 if (ii == tlevel) {
3106 ii += 2; // three teams have same level
3107 } else {
3108 ii++; // two teams have same level
3109 }
3110 }
3111 }
3112
3113 while (ii > level) {
3114 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3115 }
3116 if (team->t.t_serialized && (!dd)) {
3117 team = team->t.t_parent;
3118 continue;
3119 }
3120 if (ii > level) {
3121 team = team->t.t_parent;
3122 ii--;
3123 }
3124 }
3125
3126 return team->t.t_nproc;
3127}
3128
3129kmp_r_sched_t __kmp_get_schedule_global() {
3130 // This routine created because pairs (__kmp_sched, __kmp_chunk) and
3131 // (__kmp_static, __kmp_guided) may be changed by kmp_set_defaults
3132 // independently. So one can get the updated schedule here.
3133
3134 kmp_r_sched_t r_sched;
3135
3136 // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static,
3137 // __kmp_guided. __kmp_sched should keep original value, so that user can set
3138 // KMP_SCHEDULE multiple times, and thus have different run-time schedules in
3139 // different roots (even in OMP 2.5)
3140 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched)(enum sched_type)( (__kmp_sched) & ~(kmp_sch_modifier_nonmonotonic
| kmp_sch_modifier_monotonic))
;
3141 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched)((enum sched_type)( (__kmp_sched) & (kmp_sch_modifier_nonmonotonic
| kmp_sch_modifier_monotonic)))
;
3142 if (s == kmp_sch_static) {
3143 // replace STATIC with more detailed schedule (balanced or greedy)
3144 r_sched.r_sched_type = __kmp_static;
3145 } else if (s == kmp_sch_guided_chunked) {
3146 // replace GUIDED with more detailed schedule (iterative or analytical)
3147 r_sched.r_sched_type = __kmp_guided;
3148 } else { // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other
3149 r_sched.r_sched_type = __kmp_sched;
3150 }
3151 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers)(r_sched.r_sched_type = (enum sched_type)((kmp_int32)r_sched.
r_sched_type | (kmp_int32)sched_modifiers))
;
3152
3153 if (__kmp_chunk < KMP_DEFAULT_CHUNK1) {
3154 // __kmp_chunk may be wrong here (if it was not ever set)
3155 r_sched.chunk = KMP_DEFAULT_CHUNK1;
3156 } else {
3157 r_sched.chunk = __kmp_chunk;
3158 }
3159
3160 return r_sched;
3161}
3162
3163/* Allocate (realloc == FALSE) * or reallocate (realloc == TRUE)
3164 at least argc number of *t_argv entries for the requested team. */
3165static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, int realloc) {
3166
3167 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 3167); }
;
3168 if (!realloc || argc > team->t.t_max_argc) {
3169
3170 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: needed entries=%d, "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: needed entries=%d, "
"current entries=%d\n", team->t.t_id, argc, (realloc) ? team
->t.t_max_argc : 0); }
3171 "current entries=%d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: needed entries=%d, "
"current entries=%d\n", team->t.t_id, argc, (realloc) ? team
->t.t_max_argc : 0); }
3172 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: needed entries=%d, "
"current entries=%d\n", team->t.t_id, argc, (realloc) ? team
->t.t_max_argc : 0); }
;
3173 /* if previously allocated heap space for args, free them */
3174 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3175 __kmp_free((void *)team->t.t_argv)___kmp_free(((void *)team->t.t_argv), "openmp/runtime/src/kmp_runtime.cpp"
, 3175)
;
3176
3177 if (argc <= KMP_INLINE_ARGV_ENTRIES(int)((4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 *
sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) %
64)) / (sizeof(void *)))
) {
3178 /* use unused space in the cache line for arguments */
3179 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES(int)((4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 *
sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) %
64)) / (sizeof(void *)))
;
3180 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: inline allocate %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: inline allocate %d "
"argv entries\n", team->t.t_id, team->t.t_max_argc); }
3181 "argv entries\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: inline allocate %d "
"argv entries\n", team->t.t_id, team->t.t_max_argc); }
3182 team->t.t_id, team->t.t_max_argc))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: inline allocate %d "
"argv entries\n", team->t.t_id, team->t.t_max_argc); }
;
3183 team->t.t_argv = &team->t.t_inline_argv[0];
3184 if (__kmp_storage_map) {
3185 __kmp_print_storage_map_gtid(
3186 -1, &team->t.t_inline_argv[0],
3187 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES(int)((4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 *
sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) %
64)) / (sizeof(void *)))
],
3188 (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES(int)((4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 *
sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) %
64)) / (sizeof(void *)))
), "team_%d.t_inline_argv",
3189 team->t.t_id);
3190 }
3191 } else {
3192 /* allocate space for arguments in the heap */
3193 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES100 >> 1))
3194 ? KMP_MIN_MALLOC_ARGV_ENTRIES100
3195 : 2 * argc;
3196 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
"argv entries\n", team->t.t_id, team->t.t_max_argc); }
3197 "argv entries\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
"argv entries\n", team->t.t_id, team->t.t_max_argc); }
3198 team->t.t_id, team->t.t_max_argc))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
"argv entries\n", team->t.t_id, team->t.t_max_argc); }
;
3199 team->t.t_argv =
3200 (void **)__kmp_page_allocate(sizeof(void *) * team->t.t_max_argc)___kmp_page_allocate((sizeof(void *) * team->t.t_max_argc)
, "openmp/runtime/src/kmp_runtime.cpp", 3200)
;
3201 if (__kmp_storage_map) {
3202 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3203 &team->t.t_argv[team->t.t_max_argc],
3204 sizeof(void *) * team->t.t_max_argc,
3205 "team_%d.t_argv", team->t.t_id);
3206 }
3207 }
3208 }
3209}
3210
3211static void __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) {
3212 int i;
3213 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3214 team->t.t_threads =
3215 (kmp_info_t **)__kmp_allocate(sizeof(kmp_info_t *) * max_nth)___kmp_allocate((sizeof(kmp_info_t *) * max_nth), "openmp/runtime/src/kmp_runtime.cpp"
, 3215)
;
3216 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(___kmp_allocate((sizeof(dispatch_shared_info_t) * num_disp_buff
), "openmp/runtime/src/kmp_runtime.cpp", 3217)
3217 sizeof(dispatch_shared_info_t) * num_disp_buff)___kmp_allocate((sizeof(dispatch_shared_info_t) * num_disp_buff
), "openmp/runtime/src/kmp_runtime.cpp", 3217)
;
3218 team->t.t_dispatch =
3219 (kmp_disp_t *)__kmp_allocate(sizeof(kmp_disp_t) * max_nth)___kmp_allocate((sizeof(kmp_disp_t) * max_nth), "openmp/runtime/src/kmp_runtime.cpp"
, 3219)
;
3220 team->t.t_implicit_task_taskdata =
3221 (kmp_taskdata_t *)__kmp_allocate(sizeof(kmp_taskdata_t) * max_nth)___kmp_allocate((sizeof(kmp_taskdata_t) * max_nth), "openmp/runtime/src/kmp_runtime.cpp"
, 3221)
;
3222 team->t.t_max_nproc = max_nth;
3223
3224 /* setup dispatch buffers */
3225 for (i = 0; i < num_disp_buff; ++i) {
3226 team->t.t_disp_buffer[i].buffer_index = i;
3227 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3228 }
3229}
3230
3231static void __kmp_free_team_arrays(kmp_team_t *team) {
3232 /* Note: this does not free the threads in t_threads (__kmp_free_threads) */
3233 int i;
3234 for (i = 0; i < team->t.t_max_nproc; ++i) {
3235 if (team->t.t_dispatch[i].th_disp_buffer != NULL__null) {
3236 __kmp_free(team->t.t_dispatch[i].th_disp_buffer)___kmp_free((team->t.t_dispatch[i].th_disp_buffer), "openmp/runtime/src/kmp_runtime.cpp"
, 3236)
;
3237 team->t.t_dispatch[i].th_disp_buffer = NULL__null;
3238 }
3239 }
3240#if KMP_USE_HIER_SCHED0
3241 __kmp_dispatch_free_hierarchies(team);
3242#endif
3243 __kmp_free(team->t.t_threads)___kmp_free((team->t.t_threads), "openmp/runtime/src/kmp_runtime.cpp"
, 3243)
;
3244 __kmp_free(team->t.t_disp_buffer)___kmp_free((team->t.t_disp_buffer), "openmp/runtime/src/kmp_runtime.cpp"
, 3244)
;
3245 __kmp_free(team->t.t_dispatch)___kmp_free((team->t.t_dispatch), "openmp/runtime/src/kmp_runtime.cpp"
, 3245)
;
3246 __kmp_free(team->t.t_implicit_task_taskdata)___kmp_free((team->t.t_implicit_task_taskdata), "openmp/runtime/src/kmp_runtime.cpp"
, 3246)
;
3247 team->t.t_threads = NULL__null;
3248 team->t.t_disp_buffer = NULL__null;
3249 team->t.t_dispatch = NULL__null;
3250 team->t.t_implicit_task_taskdata = 0;
3251}
3252
3253static void __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) {
3254 kmp_info_t **oldThreads = team->t.t_threads;
3255
3256 __kmp_free(team->t.t_disp_buffer)___kmp_free((team->t.t_disp_buffer), "openmp/runtime/src/kmp_runtime.cpp"
, 3256)
;
3257 __kmp_free(team->t.t_dispatch)___kmp_free((team->t.t_dispatch), "openmp/runtime/src/kmp_runtime.cpp"
, 3257)
;
3258 __kmp_free(team->t.t_implicit_task_taskdata)___kmp_free((team->t.t_implicit_task_taskdata), "openmp/runtime/src/kmp_runtime.cpp"
, 3258)
;
3259 __kmp_allocate_team_arrays(team, max_nth);
3260
3261 KMP_MEMCPYmemcpy(team->t.t_threads, oldThreads,
3262 team->t.t_nproc * sizeof(kmp_info_t *));
3263
3264 __kmp_free(oldThreads)___kmp_free((oldThreads), "openmp/runtime/src/kmp_runtime.cpp"
, 3264)
;
3265}
3266
3267static kmp_internal_control_t __kmp_get_global_icvs(void) {
3268
3269 kmp_r_sched_t r_sched =
3270 __kmp_get_schedule_global(); // get current state of scheduling globals
3271
3272 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0)if (!(__kmp_nested_proc_bind.used > 0)) { __kmp_debug_assert
("__kmp_nested_proc_bind.used > 0", "openmp/runtime/src/kmp_runtime.cpp"
, 3272); }
;
3273
3274 kmp_internal_control_t g_icvs = {
3275 0, // int serial_nesting_level; //corresponds to value of th_team_serialized
3276 (kmp_int8)__kmp_global.g.g_dynamic, // internal control for dynamic
3277 // adjustment of threads (per thread)
3278 (kmp_int8)__kmp_env_blocktime, // int bt_set; //internal control for
3279 // whether blocktime is explicitly set
3280 __kmp_dflt_blocktime, // int blocktime; //internal control for blocktime
3281#if KMP_USE_MONITOR
3282 __kmp_bt_intervals, // int bt_intervals; //internal control for blocktime
3283// intervals
3284#endif
3285 __kmp_dflt_team_nth, // int nproc; //internal control for # of threads for
3286 // next parallel region (per thread)
3287 // (use a max ub on value if __kmp_parallel_initialize not called yet)
3288 __kmp_cg_max_nth, // int thread_limit;
3289 __kmp_dflt_max_active_levels, // int max_active_levels; //internal control
3290 // for max_active_levels
3291 r_sched, // kmp_r_sched_t sched; //internal control for runtime schedule
3292 // {sched,chunk} pair
3293 __kmp_nested_proc_bind.bind_types[0],
3294 __kmp_default_device,
3295 NULL__null // struct kmp_internal_control *next;
3296 };
3297
3298 return g_icvs;
3299}
3300
3301static kmp_internal_control_t __kmp_get_x_global_icvs(const kmp_team_t *team) {
3302
3303 kmp_internal_control_t gx_icvs;
3304 gx_icvs.serial_nesting_level =
3305 0; // probably =team->t.t_serial like in save_inter_controls
3306 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3307 gx_icvs.next = NULL__null;
3308
3309 return gx_icvs;
3310}
3311
3312static void __kmp_initialize_root(kmp_root_t *root) {
3313 int f;
3314 kmp_team_t *root_team;
3315 kmp_team_t *hot_team;
3316 int hot_team_max_nth;
3317 kmp_r_sched_t r_sched =
3318 __kmp_get_schedule_global(); // get current state of scheduling globals
3319 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3320 KMP_DEBUG_ASSERT(root)if (!(root)) { __kmp_debug_assert("root", "openmp/runtime/src/kmp_runtime.cpp"
, 3320); }
;
3321 KMP_ASSERT(!root->r.r_begin)if (!(!root->r.r_begin)) { __kmp_debug_assert("!root->r.r_begin"
, "openmp/runtime/src/kmp_runtime.cpp", 3321); }
;
3322
3323 /* setup the root state structure */
3324 __kmp_init_lock(&root->r.r_begin_lock);
3325 root->r.r_begin = FALSE0;
3326 root->r.r_active = FALSE0;
3327 root->r.r_in_parallel = 0;
3328 root->r.r_blocktime = __kmp_dflt_blocktime;
3329#if KMP_AFFINITY_SUPPORTED1
3330 root->r.r_affinity_assigned = FALSE0;
3331#endif
3332
3333 /* setup the root team for this task */
3334 /* allocate the root team structure */
3335 KF_TRACE(10, ("__kmp_initialize_root: before root_team\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_root: before root_team\n"
); }
;
3336
3337 root_team =
3338 __kmp_allocate_team(root,
3339 1, // new_nproc
3340 1, // max_nproc
3341#if OMPT_SUPPORT1
3342 ompt_data_none{0}, // root parallel id
3343#endif
3344 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3345 0 // argc
3346 USE_NESTED_HOT_ARG(NULL), __null // primary thread is unknown
3347 );
3348#if USE_DEBUGGER0
3349 // Non-NULL value should be assigned to make the debugger display the root
3350 // team.
3351 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0))((root_team->t.t_pkfn)) = (((microtask_t)(~0)));
3352#endif
3353
3354 KF_TRACE(10, ("__kmp_initialize_root: after root_team = %p\n", root_team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_root: after root_team = %p\n"
, root_team); }
;
3355
3356 root->r.r_root_team = root_team;
3357 root_team->t.t_control_stack_top = NULL__null;
3358
3359 /* initialize root team */
3360 root_team->t.t_threads[0] = NULL__null;
3361 root_team->t.t_nproc = 1;
3362 root_team->t.t_serialized = 1;
3363 // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3364 root_team->t.t_sched.sched = r_sched.sched;
3365 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n"
, root_team->t.t_id, 0, 0); }
3366 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n"
, root_team->t.t_id, 0, 0); }
3367 ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n"
, root_team->t.t_id, 0, 0); }
3368 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n"
, root_team->t.t_id, 0, 0); }
;
3369
3370 /* setup the hot team for this task */
3371 /* allocate the hot team structure */
3372 KF_TRACE(10, ("__kmp_initialize_root: before hot_team\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_root: before hot_team\n"
); }
;
3373
3374 hot_team =
3375 __kmp_allocate_team(root,
3376 1, // new_nproc
3377 __kmp_dflt_team_nth_ub * 2, // max_nproc
3378#if OMPT_SUPPORT1
3379 ompt_data_none{0}, // root parallel id
3380#endif
3381 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3382 0 // argc
3383 USE_NESTED_HOT_ARG(NULL), __null // primary thread is unknown
3384 );
3385 KF_TRACE(10, ("__kmp_initialize_root: after hot_team = %p\n", hot_team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_root: after hot_team = %p\n"
, hot_team); }
;
3386
3387 root->r.r_hot_team = hot_team;
3388 root_team->t.t_control_stack_top = NULL__null;
3389
3390 /* first-time initialization */
3391 hot_team->t.t_parent = root_team;
3392
3393 /* initialize hot team */
3394 hot_team_max_nth = hot_team->t.t_max_nproc;
3395 for (f = 0; f < hot_team_max_nth; ++f) {
3396 hot_team->t.t_threads[f] = NULL__null;
3397 }
3398 hot_team->t.t_nproc = 1;
3399 // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3400 hot_team->t.t_sched.sched = r_sched.sched;
3401 hot_team->t.t_size_changed = 0;
3402}
3403
3404#ifdef KMP_DEBUG1
3405
3406typedef struct kmp_team_list_item {
3407 kmp_team_p const *entry;
3408 struct kmp_team_list_item *next;
3409} kmp_team_list_item_t;
3410typedef kmp_team_list_item_t *kmp_team_list_t;
3411
3412static void __kmp_print_structure_team_accum( // Add team to list of teams.
3413 kmp_team_list_t list, // List of teams.
3414 kmp_team_p const *team // Team to add.
3415) {
3416
3417 // List must terminate with item where both entry and next are NULL.
3418 // Team is added to the list only once.
3419 // List is sorted in ascending order by team id.
3420 // Team id is *not* a key.
3421
3422 kmp_team_list_t l;
3423
3424 KMP_DEBUG_ASSERT(list != NULL)if (!(list != __null)) { __kmp_debug_assert("list != __null",
"openmp/runtime/src/kmp_runtime.cpp", 3424); }
;
3425 if (team == NULL__null) {
3426 return;
3427 }
3428
3429 __kmp_print_structure_team_accum(list, team->t.t_parent);
3430 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3431
3432 // Search list for the team.
3433 l = list;
3434 while (l->next != NULL__null && l->entry != team) {
3435 l = l->next;
3436 }
3437 if (l->next != NULL__null) {
3438 return; // Team has been added before, exit.
3439 }
3440
3441 // Team is not found. Search list again for insertion point.
3442 l = list;
3443 while (l->next != NULL__null && l->entry->t.t_id <= team->t.t_id) {
3444 l = l->next;
3445 }
3446
3447 // Insert team.
3448 {
3449 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(malloc(sizeof(kmp_team_list_item_t))
3450 sizeof(kmp_team_list_item_t))malloc(sizeof(kmp_team_list_item_t));
3451 *item = *l;
3452 l->entry = team;
3453 l->next = item;
3454 }
3455}
3456
3457static void __kmp_print_structure_team(char const *title, kmp_team_p const *team
3458
3459) {
3460 __kmp_printf("%s", title);
3461 if (team != NULL__null) {
3462 __kmp_printf("%2x %p\n", team->t.t_id, team);
3463 } else {
3464 __kmp_printf(" - (nil)\n");
3465 }
3466}
3467
3468static void __kmp_print_structure_thread(char const *title,
3469 kmp_info_p const *thread) {
3470 __kmp_printf("%s", title);
3471 if (thread != NULL__null) {
3472 __kmp_printf("%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3473 } else {
3474 __kmp_printf(" - (nil)\n");
3475 }
3476}
3477
3478void __kmp_print_structure(void) {
3479
3480 kmp_team_list_t list;
3481
3482 // Initialize list of teams.
3483 list =
3484 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(sizeof(kmp_team_list_item_t))malloc(sizeof(kmp_team_list_item_t));
3485 list->entry = NULL__null;
3486 list->next = NULL__null;
3487
3488 __kmp_printf("\n------------------------------\nGlobal Thread "
3489 "Table\n------------------------------\n");
3490 {
3491 int gtid;
3492 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3493 __kmp_printf("%2d", gtid);
3494 if (__kmp_threads != NULL__null) {
3495 __kmp_printf(" %p", __kmp_threads[gtid]);
3496 }
3497 if (__kmp_root != NULL__null) {
3498 __kmp_printf(" %p", __kmp_root[gtid]);
3499 }
3500 __kmp_printf("\n");
3501 }
3502 }
3503
3504 // Print out __kmp_threads array.
3505 __kmp_printf("\n------------------------------\nThreads\n--------------------"
3506 "----------\n");
3507 if (__kmp_threads != NULL__null) {
3508 int gtid;
3509 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3510 kmp_info_t const *thread = __kmp_threads[gtid];
3511 if (thread != NULL__null) {
3512 __kmp_printf("GTID %2d %p:\n", gtid, thread);
3513 __kmp_printf(" Our Root: %p\n", thread->th.th_root);
3514 __kmp_print_structure_team(" Our Team: ", thread->th.th_team);
3515 __kmp_print_structure_team(" Serial Team: ",
3516 thread->th.th_serial_team);
3517 __kmp_printf(" Threads: %2d\n", thread->th.th_team_nproc);
3518 __kmp_print_structure_thread(" Primary: ",
3519 thread->th.th_team_master);
3520 __kmp_printf(" Serialized?: %2d\n", thread->th.th_team_serialized);
3521 __kmp_printf(" Set NProc: %2d\n", thread->th.th_set_nproc);
3522 __kmp_printf(" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3523 __kmp_print_structure_thread(" Next in pool: ",
3524 thread->th.th_next_pool);
3525 __kmp_printf("\n");
3526 __kmp_print_structure_team_accum(list, thread->th.th_team);
3527 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3528 }
3529 }
3530 } else {
3531 __kmp_printf("Threads array is not allocated.\n");
3532 }
3533
3534 // Print out __kmp_root array.
3535 __kmp_printf("\n------------------------------\nUbers\n----------------------"
3536 "--------\n");
3537 if (__kmp_root != NULL__null) {
3538 int gtid;
3539 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3540 kmp_root_t const *root = __kmp_root[gtid];
3541 if (root != NULL__null) {
3542 __kmp_printf("GTID %2d %p:\n", gtid, root);
3543 __kmp_print_structure_team(" Root Team: ", root->r.r_root_team);
3544 __kmp_print_structure_team(" Hot Team: ", root->r.r_hot_team);
3545 __kmp_print_structure_thread(" Uber Thread: ",
3546 root->r.r_uber_thread);
3547 __kmp_printf(" Active?: %2d\n", root->r.r_active);
3548 __kmp_printf(" In Parallel: %2d\n",
3549 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel)(&root->r.r_in_parallel)->load(std::memory_order_relaxed
)
);
3550 __kmp_printf("\n");
3551 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3552 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3553 }
3554 }
3555 } else {
3556 __kmp_printf("Ubers array is not allocated.\n");
3557 }
3558
3559 __kmp_printf("\n------------------------------\nTeams\n----------------------"
3560 "--------\n");
3561 while (list->next != NULL__null) {
3562 kmp_team_p const *team = list->entry;
3563 int i;
3564 __kmp_printf("Team %2x %p:\n", team->t.t_id, team);
3565 __kmp_print_structure_team(" Parent Team: ", team->t.t_parent);
3566 __kmp_printf(" Primary TID: %2d\n", team->t.t_master_tid);
3567 __kmp_printf(" Max threads: %2d\n", team->t.t_max_nproc);
3568 __kmp_printf(" Levels of serial: %2d\n", team->t.t_serialized);
3569 __kmp_printf(" Number threads: %2d\n", team->t.t_nproc);
3570 for (i = 0; i < team->t.t_nproc; ++i) {
3571 __kmp_printf(" Thread %2d: ", i);
3572 __kmp_print_structure_thread("", team->t.t_threads[i]);
3573 }
3574 __kmp_print_structure_team(" Next in pool: ", team->t.t_next_pool);
3575 __kmp_printf("\n");
3576 list = list->next;
3577 }
3578
3579 // Print out __kmp_thread_pool and __kmp_team_pool.
3580 __kmp_printf("\n------------------------------\nPools\n----------------------"
3581 "--------\n");
3582 __kmp_print_structure_thread("Thread pool: ",
3583 CCAST(kmp_info_t *, __kmp_thread_pool)const_cast<kmp_info_t *>(__kmp_thread_pool));
3584 __kmp_print_structure_team("Team pool: ",
3585 CCAST(kmp_team_t *, __kmp_team_pool)const_cast<kmp_team_t *>(__kmp_team_pool));
3586 __kmp_printf("\n");
3587
3588 // Free team list.
3589 while (list != NULL__null) {
3590 kmp_team_list_item_t *item = list;
3591 list = list->next;
3592 KMP_INTERNAL_FREE(item)free(item);
3593 }
3594}
3595
3596#endif
3597
3598//---------------------------------------------------------------------------
3599// Stuff for per-thread fast random number generator
3600// Table of primes
3601static const unsigned __kmp_primes[] = {
3602 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3603 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3604 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3605 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3606 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3607 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3608 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3609 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3610 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3611 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3612 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3613
3614//---------------------------------------------------------------------------
3615// __kmp_get_random: Get a random number using a linear congruential method.
3616unsigned short __kmp_get_random(kmp_info_t *thread) {
3617 unsigned x = thread->th.th_x;
3618 unsigned short r = (unsigned short)(x >> 16);
3619
3620 thread->th.th_x = x * thread->th.th_a + 1;
3621
3622 KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n",if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_get_random: THREAD: %d, RETURN: %u\n"
, thread->th.th_info.ds.ds_tid, r); }
3623 thread->th.th_info.ds.ds_tid, r))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_get_random: THREAD: %d, RETURN: %u\n"
, thread->th.th_info.ds.ds_tid, r); }
;
3624
3625 return r;
3626}
3627//--------------------------------------------------------
3628// __kmp_init_random: Initialize a random number generator
3629void __kmp_init_random(kmp_info_t *thread) {
3630 unsigned seed = thread->th.th_info.ds.ds_tid;
3631
3632 thread->th.th_a =
3633 __kmp_primes[seed % (sizeof(__kmp_primes) / sizeof(__kmp_primes[0]))];
3634 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3635 KA_TRACE(30,if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_init_random: THREAD: %u; A: %u\n"
, seed, thread->th.th_a); }
3636 ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_init_random: THREAD: %u; A: %u\n"
, seed, thread->th.th_a); }
;
3637}
3638
3639#if KMP_OS_WINDOWS0
3640/* reclaim array entries for root threads that are already dead, returns number
3641 * reclaimed */
3642static int __kmp_reclaim_dead_roots(void) {
3643 int i, r = 0;
3644
3645 for (i = 0; i < __kmp_threads_capacity; ++i) {
3646 if (KMP_UBER_GTID(i) &&
3647 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])((void *)(__kmp_threads[i]))) &&
3648 !__kmp_root[i]
3649 ->r.r_active) { // AC: reclaim only roots died in non-active state
3650 r += __kmp_unregister_root_other_thread(i);
3651 }
3652 }
3653 return r;
3654}
3655#endif
3656
3657/* This function attempts to create free entries in __kmp_threads and
3658 __kmp_root, and returns the number of free entries generated.
3659
3660 For Windows* OS static library, the first mechanism used is to reclaim array
3661 entries for root threads that are already dead.
3662
3663 On all platforms, expansion is attempted on the arrays __kmp_threads_ and
3664 __kmp_root, with appropriate update to __kmp_threads_capacity. Array
3665 capacity is increased by doubling with clipping to __kmp_tp_capacity, if
3666 threadprivate cache array has been created. Synchronization with
3667 __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock.
3668
3669 After any dead root reclamation, if the clipping value allows array expansion
3670 to result in the generation of a total of nNeed free slots, the function does
3671 that expansion. If not, nothing is done beyond the possible initial root
3672 thread reclamation.
3673
3674 If any argument is negative, the behavior is undefined. */
3675static int __kmp_expand_threads(int nNeed) {
3676 int added = 0;
3677 int minimumRequiredCapacity;
3678 int newCapacity;
3679 kmp_info_t **newThreads;
3680 kmp_root_t **newRoot;
3681
3682 // All calls to __kmp_expand_threads should be under __kmp_forkjoin_lock, so
3683 // resizing __kmp_threads does not need additional protection if foreign
3684 // threads are present
3685
3686#if KMP_OS_WINDOWS0 && !KMP_DYNAMIC_LIB1
3687 /* only for Windows static library */
3688 /* reclaim array entries for root threads that are already dead */
3689 added = __kmp_reclaim_dead_roots();
3690
3691 if (nNeed) {
3692 nNeed -= added;
3693 if (nNeed < 0)
3694 nNeed = 0;
3695 }
3696#endif
3697 if (nNeed <= 0)
3698 return added;
3699
3700 // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. If
3701 // __kmp_max_nth is set to some value less than __kmp_sys_max_nth by the
3702 // user via KMP_DEVICE_THREAD_LIMIT, then __kmp_threads_capacity may become
3703 // > __kmp_max_nth in one of two ways:
3704 //
3705 // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0]
3706 // may not be reused by another thread, so we may need to increase
3707 // __kmp_threads_capacity to __kmp_max_nth + 1.
3708 //
3709 // 2) New foreign root(s) are encountered. We always register new foreign
3710 // roots. This may cause a smaller # of threads to be allocated at
3711 // subsequent parallel regions, but the worker threads hang around (and
3712 // eventually go to sleep) and need slots in the __kmp_threads[] array.
3713 //
3714 // Anyway, that is the reason for moving the check to see if
3715 // __kmp_max_nth was exceeded into __kmp_reserve_threads()
3716 // instead of having it performed here. -BB
3717
3718 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity)if (!(__kmp_sys_max_nth >= __kmp_threads_capacity)) { __kmp_debug_assert
("__kmp_sys_max_nth >= __kmp_threads_capacity", "openmp/runtime/src/kmp_runtime.cpp"
, 3718); }
;
3719
3720 /* compute expansion headroom to check if we can expand */
3721 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3722 /* possible expansion too small -- give up */
3723 return added;
3724 }
3725 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3726
3727 newCapacity = __kmp_threads_capacity;
3728 do {
3729 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3730 : __kmp_sys_max_nth;
3731 } while (newCapacity < minimumRequiredCapacity);
3732 newThreads = (kmp_info_t **)__kmp_allocate(___kmp_allocate(((sizeof(kmp_info_t *) + sizeof(kmp_root_t *)
) * newCapacity + 64), "openmp/runtime/src/kmp_runtime.cpp", 3733
)
3733 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE)___kmp_allocate(((sizeof(kmp_info_t *) + sizeof(kmp_root_t *)
) * newCapacity + 64), "openmp/runtime/src/kmp_runtime.cpp", 3733
)
;
3734 newRoot =
3735 (kmp_root_t **)((char *)newThreads + sizeof(kmp_info_t *) * newCapacity);
3736 KMP_MEMCPYmemcpy(newThreads, __kmp_threads,
3737 __kmp_threads_capacity * sizeof(kmp_info_t *));
3738 KMP_MEMCPYmemcpy(newRoot, __kmp_root,
3739 __kmp_threads_capacity * sizeof(kmp_root_t *));
3740 // Put old __kmp_threads array on a list. Any ongoing references to the old
3741 // list will be valid. This list is cleaned up at library shutdown.
3742 kmp_old_threads_list_t *node =
3743 (kmp_old_threads_list_t *)__kmp_allocate(sizeof(kmp_old_threads_list_t))___kmp_allocate((sizeof(kmp_old_threads_list_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 3743)
;
3744 node->threads = __kmp_threads;
3745 node->next = __kmp_old_threads_list;
3746 __kmp_old_threads_list = node;
3747
3748 *(kmp_info_t * *volatile *)&__kmp_threads = newThreads;
3749 *(kmp_root_t * *volatile *)&__kmp_root = newRoot;
3750 added += newCapacity - __kmp_threads_capacity;
3751 *(volatile int *)&__kmp_threads_capacity = newCapacity;
3752
3753 if (newCapacity > __kmp_tp_capacity) {
3754 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3755 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3756 __kmp_threadprivate_resize_cache(newCapacity);
3757 } else { // increase __kmp_tp_capacity to correspond with kmp_threads size
3758 *(volatile int *)&__kmp_tp_capacity = newCapacity;
3759 }
3760 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3761 }
3762
3763 return added;
3764}
3765
3766/* Register the current thread as a root thread and obtain our gtid. We must
3767 have the __kmp_initz_lock held at this point. Argument TRUE only if are the
3768 thread that calls from __kmp_do_serial_initialize() */
3769int __kmp_register_root(int initial_thread) {
3770 kmp_info_t *root_thread;
3771 kmp_root_t *root;
3772 int gtid;
3773 int capacity;
3774 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3775 KA_TRACE(20, ("__kmp_register_root: entered\n"))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_register_root: entered\n"
); }
;
3776 KMP_MB();
3777
3778 /* 2007-03-02:
3779 If initial thread did not invoke OpenMP RTL yet, and this thread is not an
3780 initial one, "__kmp_all_nth >= __kmp_threads_capacity" condition does not
3781 work as expected -- it may return false (that means there is at least one
3782 empty slot in __kmp_threads array), but it is possible the only free slot
3783 is #0, which is reserved for initial thread and so cannot be used for this
3784 one. Following code workarounds this bug.
3785
3786 However, right solution seems to be not reserving slot #0 for initial
3787 thread because:
3788 (1) there is no magic in slot #0,
3789 (2) we cannot detect initial thread reliably (the first thread which does
3790 serial initialization may be not a real initial thread).
3791 */
3792 capacity = __kmp_threads_capacity;
3793 if (!initial_thread && TCR_PTR(__kmp_threads[0])((void *)(__kmp_threads[0])) == NULL__null) {
3794 --capacity;
3795 }
3796
3797 // If it is not for initializing the hidden helper team, we need to take
3798 // __kmp_hidden_helper_threads_num out of the capacity because it is included
3799 // in __kmp_threads_capacity.
3800 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)(__kmp_init_hidden_helper_threads)) {
3801 capacity -= __kmp_hidden_helper_threads_num;
3802 }
3803
3804 /* see if there are too many threads */
3805 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3806 if (__kmp_tp_cached) {
3807 __kmp_fatal(KMP_MSG(CantRegisterNewThread)__kmp_msg_format(kmp_i18n_msg_CantRegisterNewThread),
3808 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity)__kmp_msg_format(kmp_i18n_hnt_Set_ALL_THREADPRIVATE, __kmp_tp_capacity
)
,
3809 KMP_HNT(PossibleSystemLimitOnThreads)__kmp_msg_format(kmp_i18n_hnt_PossibleSystemLimitOnThreads), __kmp_msg_null);
3810 } else {
3811 __kmp_fatal(KMP_MSG(CantRegisterNewThread)__kmp_msg_format(kmp_i18n_msg_CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads)__kmp_msg_format(kmp_i18n_hnt_SystemLimitOnThreads),
3812 __kmp_msg_null);
3813 }
3814 }
3815
3816 // When hidden helper task is enabled, __kmp_threads is organized as follows:
3817 // 0: initial thread, also a regular OpenMP thread.
3818 // [1, __kmp_hidden_helper_threads_num]: slots for hidden helper threads.
3819 // [__kmp_hidden_helper_threads_num + 1, __kmp_threads_capacity): slots for
3820 // regular OpenMP threads.
3821 if (TCR_4(__kmp_init_hidden_helper_threads)(__kmp_init_hidden_helper_threads)) {
3822 // Find an available thread slot for hidden helper thread. Slots for hidden
3823 // helper threads start from 1 to __kmp_hidden_helper_threads_num.
3824 for (gtid = 1; TCR_PTR(__kmp_threads[gtid])((void *)(__kmp_threads[gtid])) != NULL__null &&
3825 gtid <= __kmp_hidden_helper_threads_num;
3826 gtid++)
3827 ;
3828 KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num)if (!(gtid <= __kmp_hidden_helper_threads_num)) { __kmp_debug_assert
("gtid <= __kmp_hidden_helper_threads_num", "openmp/runtime/src/kmp_runtime.cpp"
, 3828); }
;
3829 KA_TRACE(1, ("__kmp_register_root: found slot in threads array for "if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_register_root: found slot in threads array for "
"hidden helper thread: T#%d\n", gtid); }
3830 "hidden helper thread: T#%d\n",if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_register_root: found slot in threads array for "
"hidden helper thread: T#%d\n", gtid); }
3831 gtid))if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_register_root: found slot in threads array for "
"hidden helper thread: T#%d\n", gtid); }
;
3832 } else {
3833 /* find an available thread slot */
3834 // Don't reassign the zero slot since we need that to only be used by
3835 // initial thread. Slots for hidden helper threads should also be skipped.
3836 if (initial_thread && TCR_PTR(__kmp_threads[0])((void *)(__kmp_threads[0])) == NULL__null) {
3837 gtid = 0;
3838 } else {
3839 for (gtid = __kmp_hidden_helper_threads_num + 1;
3840 TCR_PTR(__kmp_threads[gtid])((void *)(__kmp_threads[gtid])) != NULL__null; gtid++)
3841 ;
3842 }
3843 KA_TRACE(if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_register_root: found slot in threads array: T#%d\n"
, gtid); }
3844 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid))if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_register_root: found slot in threads array: T#%d\n"
, gtid); }
;
3845 KMP_ASSERT(gtid < __kmp_threads_capacity)if (!(gtid < __kmp_threads_capacity)) { __kmp_debug_assert
("gtid < __kmp_threads_capacity", "openmp/runtime/src/kmp_runtime.cpp"
, 3845); }
;
3846 }
3847
3848 /* update global accounting */
3849 __kmp_all_nth++;
3850 TCW_4(__kmp_nth, __kmp_nth + 1)(__kmp_nth) = (__kmp_nth + 1);
3851
3852 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
3853 // numbers of procs, and method #2 (keyed API call) for higher numbers.
3854 if (__kmp_adjust_gtid_mode) {
3855 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3856 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) != 2) {
3857 TCW_4(__kmp_gtid_mode, 2)(__kmp_gtid_mode) = (2);
3858 }
3859 } else {
3860 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) != 1) {
3861 TCW_4(__kmp_gtid_mode, 1)(__kmp_gtid_mode) = (1);
3862 }
3863 }
3864 }
3865
3866#ifdef KMP_ADJUST_BLOCKTIME1
3867 /* Adjust blocktime to zero if necessary */
3868 /* Middle initialization might not have occurred yet */
3869 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3870 if (__kmp_nth > __kmp_avail_proc) {
3871 __kmp_zero_bt = TRUE(!0);
3872 }
3873 }
3874#endif /* KMP_ADJUST_BLOCKTIME */
3875
3876 /* setup this new hierarchy */
3877 if (!(root = __kmp_root[gtid])) {
3878 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(sizeof(kmp_root_t))___kmp_allocate((sizeof(kmp_root_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 3878)
;
3879 KMP_DEBUG_ASSERT(!root->r.r_root_team)if (!(!root->r.r_root_team)) { __kmp_debug_assert("!root->r.r_root_team"
, "openmp/runtime/src/kmp_runtime.cpp", 3879); }
;
3880 }
3881
3882#if KMP_STATS_ENABLED0
3883 // Initialize stats as soon as possible (right after gtid assignment).
3884 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3885 __kmp_stats_thread_ptr->startLife();
3886 KMP_SET_THREAD_STATE(SERIAL_REGION)((void)0);
3887 KMP_INIT_PARTITIONED_TIMERS(OMP_serial)((void)0);
3888#endif
3889 __kmp_initialize_root(root);
3890
3891 /* setup new root thread structure */
3892 if (root->r.r_uber_thread) {
3893 root_thread = root->r.r_uber_thread;
3894 } else {
3895 root_thread = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t))___kmp_allocate((sizeof(kmp_info_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 3895)
;
3896 if (__kmp_storage_map) {
3897 __kmp_print_thread_storage_map(root_thread, gtid);
3898 }
3899 root_thread->th.th_info.ds.ds_gtid = gtid;
3900#if OMPT_SUPPORT1
3901 root_thread->th.ompt_thread_info.thread_data = ompt_data_none{0};
3902#endif
3903 root_thread->th.th_root = root;
3904 if (__kmp_env_consistency_check) {
3905 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3906 }
3907#if USE_FAST_MEMORY3
3908 __kmp_initialize_fast_memory(root_thread);
3909#endif /* USE_FAST_MEMORY */
3910
3911#if KMP_USE_BGET1
3912 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL)if (!(root_thread->th.th_local.bget_data == __null)) { __kmp_debug_assert
("root_thread->th.th_local.bget_data == __null", "openmp/runtime/src/kmp_runtime.cpp"
, 3912); }
;
3913 __kmp_initialize_bget(root_thread);
3914#endif
3915 __kmp_init_random(root_thread); // Initialize random number generator
3916 }
3917
3918 /* setup the serial team held in reserve by the root thread */
3919 if (!root_thread->th.th_serial_team) {
3920 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3921 KF_TRACE(10, ("__kmp_register_root: before serial_team\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_register_root: before serial_team\n"
); }
;
3922 root_thread->th.th_serial_team = __kmp_allocate_team(
3923 root, 1, 1,
3924#if OMPT_SUPPORT1
3925 ompt_data_none{0}, // root parallel id
3926#endif
3927 proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL), __null);
3928 }
3929 KMP_ASSERT(root_thread->th.th_serial_team)if (!(root_thread->th.th_serial_team)) { __kmp_debug_assert
("root_thread->th.th_serial_team", "openmp/runtime/src/kmp_runtime.cpp"
, 3929); }
;
3930 KF_TRACE(10, ("__kmp_register_root: after serial_team = %p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_register_root: after serial_team = %p\n"
, root_thread->th.th_serial_team); }
3931 root_thread->th.th_serial_team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_register_root: after serial_team = %p\n"
, root_thread->th.th_serial_team); }
;
3932
3933 /* drop root_thread into place */
3934 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread)((__kmp_threads[gtid])) = ((root_thread));
3935
3936 root->r.r_root_team->t.t_threads[0] = root_thread;
3937 root->r.r_hot_team->t.t_threads[0] = root_thread;
3938 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3939 // AC: the team created in reserve, not for execution (it is unused for now).
3940 root_thread->th.th_serial_team->t.t_serialized = 0;
3941 root->r.r_uber_thread = root_thread;
3942
3943 /* initialize the thread, get it ready to go */
3944 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3945 TCW_4(__kmp_init_gtid, TRUE)(__kmp_init_gtid) = ((!0));
3946
3947 /* prepare the primary thread for get_gtid() */
3948 __kmp_gtid_set_specific(gtid);
3949
3950#if USE_ITT_BUILD1
3951 __kmp_itt_thread_name(gtid);
3952#endif /* USE_ITT_BUILD */
3953
3954#ifdef KMP_TDATA_GTID1
3955 __kmp_gtid = gtid;
3956#endif
3957 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3958 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid)if (!(__kmp_gtid_get_specific() == gtid)) { __kmp_debug_assert
("__kmp_gtid_get_specific() == gtid", "openmp/runtime/src/kmp_runtime.cpp"
, 3958); }
;
3959
3960 KA_TRACE(20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
"plain=%u\n", gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team
), root->r.r_hot_team->t.t_id, 0, 0, 0); }
3961 "plain=%u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
"plain=%u\n", gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team
), root->r.r_hot_team->t.t_id, 0, 0, 0); }
3962 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
"plain=%u\n", gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team
), root->r.r_hot_team->t.t_id, 0, 0, 0); }
3963 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
"plain=%u\n", gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team
), root->r.r_hot_team->t.t_id, 0, 0, 0); }
3964 KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
"plain=%u\n", gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team
), root->r.r_hot_team->t.t_id, 0, 0, 0); }
;
3965 { // Initialize barrier data.
3966 int b;
3967 for (b = 0; b < bs_last_barrier; ++b) {
3968 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE0;
3969#if USE_DEBUGGER0
3970 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3971#endif
3972 }
3973 }
3974 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==if (!(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].
b_arrived == 0)) { __kmp_debug_assert("root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived == 0"
, "openmp/runtime/src/kmp_runtime.cpp", 3975); }
3975 KMP_INIT_BARRIER_STATE)if (!(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].
b_arrived == 0)) { __kmp_debug_assert("root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived == 0"
, "openmp/runtime/src/kmp_runtime.cpp", 3975); }
;
3976
3977#if KMP_AFFINITY_SUPPORTED1
3978 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED(-2);
3979 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED(-2);
3980 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED(-2);
3981 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED(-2);
3982#endif /* KMP_AFFINITY_SUPPORTED */
3983 root_thread->th.th_def_allocator = __kmp_def_allocator;
3984 root_thread->th.th_prev_level = 0;
3985 root_thread->th.th_prev_num_threads = 1;
3986
3987 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(sizeof(kmp_cg_root_t))___kmp_allocate((sizeof(kmp_cg_root_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 3987)
;
3988 tmp->cg_root = root_thread;
3989 tmp->cg_thread_limit = __kmp_cg_max_nth;
3990 tmp->cg_nthreads = 1;
3991 KA_TRACE(100, ("__kmp_register_root: Thread %p created node %p with"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_register_root: Thread %p created node %p with"
" cg_nthreads init to 1\n", root_thread, tmp); }
3992 " cg_nthreads init to 1\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_register_root: Thread %p created node %p with"
" cg_nthreads init to 1\n", root_thread, tmp); }
3993 root_thread, tmp))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_register_root: Thread %p created node %p with"
" cg_nthreads init to 1\n", root_thread, tmp); }
;
3994 tmp->up = NULL__null;
3995 root_thread->th.th_cg_roots = tmp;
3996
3997 __kmp_root_counter++;
3998
3999#if OMPT_SUPPORT1
4000 if (!initial_thread && ompt_enabled.enabled) {
4001
4002 kmp_info_t *root_thread = ompt_get_thread();
4003
4004 ompt_set_thread_state(root_thread, ompt_state_overhead);
4005
4006 if (ompt_enabled.ompt_callback_thread_begin) {
4007 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)ompt_callback_thread_begin_callback(
4008 ompt_thread_initial, __ompt_get_thread_data_internal());
4009 }
4010 ompt_data_t *task_data;
4011 ompt_data_t *parallel_data;
4012 __ompt_get_task_info_internal(0, NULL__null, &task_data, NULL__null, &parallel_data,
4013 NULL__null);
4014 if (ompt_enabled.ompt_callback_implicit_task) {
4015 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
4016 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
4017 }
4018
4019 ompt_set_thread_state(root_thread, ompt_state_work_serial);
4020 }
4021#endif
4022#if OMPD_SUPPORT1
4023 if (ompd_state & OMPD_ENABLE_BP0x1)
4024 ompd_bp_thread_begin();
4025#endif
4026
4027 KMP_MB();
4028 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4029
4030 return gtid;
4031}
4032
4033#if KMP_NESTED_HOT_TEAMS1
4034static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr, int level,
4035 const int max_level) {
4036 int i, n, nth;
4037 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
4038 if (!hot_teams || !hot_teams[level].hot_team) {
4039 return 0;
4040 }
4041 KMP_DEBUG_ASSERT(level < max_level)if (!(level < max_level)) { __kmp_debug_assert("level < max_level"
, "openmp/runtime/src/kmp_runtime.cpp", 4041); }
;
4042 kmp_team_t *team = hot_teams[level].hot_team;
4043 nth = hot_teams[level].hot_team_nth;
4044 n = nth - 1; // primary thread is not freed
4045 if (level < max_level - 1) {
4046 for (i = 0; i < nth; ++i) {
4047 kmp_info_t *th = team->t.t_threads[i];
4048 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
4049 if (i > 0 && th->th.th_hot_teams) {
4050 __kmp_free(th->th.th_hot_teams)___kmp_free((th->th.th_hot_teams), "openmp/runtime/src/kmp_runtime.cpp"
, 4050)
;
4051 th->th.th_hot_teams = NULL__null;
4052 }
4053 }
4054 }
4055 __kmp_free_team(root, team, NULL__null);
4056 return n;
4057}
4058#endif
4059
4060// Resets a root thread and clear its root and hot teams.
4061// Returns the number of __kmp_threads entries directly and indirectly freed.
4062static int __kmp_reset_root(int gtid, kmp_root_t *root) {
4063 kmp_team_t *root_team = root->r.r_root_team;
4064 kmp_team_t *hot_team = root->r.r_hot_team;
4065 int n = hot_team->t.t_nproc;
4066 int i;
4067
4068 KMP_DEBUG_ASSERT(!root->r.r_active)if (!(!root->r.r_active)) { __kmp_debug_assert("!root->r.r_active"
, "openmp/runtime/src/kmp_runtime.cpp", 4068); }
;
4069
4070 root->r.r_root_team = NULL__null;
4071 root->r.r_hot_team = NULL__null;
4072 // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team
4073 // before call to __kmp_free_team().
4074 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL), __null);
4075#if KMP_NESTED_HOT_TEAMS1
4076 if (__kmp_hot_teams_max_level >
4077 0) { // need to free nested hot teams and their threads if any
4078 for (i = 0; i < hot_team->t.t_nproc; ++i) {
4079 kmp_info_t *th = hot_team->t.t_threads[i];
4080 if (__kmp_hot_teams_max_level > 1) {
4081 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
4082 }
4083 if (th->th.th_hot_teams) {
4084 __kmp_free(th->th.th_hot_teams)___kmp_free((th->th.th_hot_teams), "openmp/runtime/src/kmp_runtime.cpp"
, 4084)
;
4085 th->th.th_hot_teams = NULL__null;
4086 }
4087 }
4088 }
4089#endif
4090 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL), __null);
4091
4092 // Before we can reap the thread, we need to make certain that all other
4093 // threads in the teams that had this root as ancestor have stopped trying to
4094 // steal tasks.
4095 if (__kmp_tasking_mode != tskm_immediate_exec) {
4096 __kmp_wait_to_unref_task_teams();
4097 }
4098
4099#if KMP_OS_WINDOWS0
4100 /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */
4101 KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_reset_root: free handle, th = %p, handle = %"
"lu" "\n", (LPVOID) & (root->r.r_uber_thread->th),
root->r.r_uber_thread->th.th_info.ds.ds_thread); }
4102 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPECif (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_reset_root: free handle, th = %p, handle = %"
"lu" "\n", (LPVOID) & (root->r.r_uber_thread->th),
root->r.r_uber_thread->th.th_info.ds.ds_thread); }
4103 "\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_reset_root: free handle, th = %p, handle = %"
"lu" "\n", (LPVOID) & (root->r.r_uber_thread->th),
root->r.r_uber_thread->th.th_info.ds.ds_thread); }
4104 (LPVOID) & (root->r.r_uber_thread->th),if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_reset_root: free handle, th = %p, handle = %"
"lu" "\n", (LPVOID) & (root->r.r_uber_thread->th),
root->r.r_uber_thread->th.th_info.ds.ds_thread); }
4105 root->r.r_uber_thread->th.th_info.ds.ds_thread))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_reset_root: free handle, th = %p, handle = %"
"lu" "\n", (LPVOID) & (root->r.r_uber_thread->th),
root->r.r_uber_thread->th.th_info.ds.ds_thread); }
;
4106 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
4107#endif /* KMP_OS_WINDOWS */
4108
4109#if OMPD_SUPPORT1
4110 if (ompd_state & OMPD_ENABLE_BP0x1)
4111 ompd_bp_thread_end();
4112#endif
4113
4114#if OMPT_SUPPORT1
4115 ompt_data_t *task_data;
4116 ompt_data_t *parallel_data;
4117 __ompt_get_task_info_internal(0, NULL__null, &task_data, NULL__null, &parallel_data,
4118 NULL__null);
4119 if (ompt_enabled.ompt_callback_implicit_task) {
4120 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
4121 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
4122 }
4123 if (ompt_enabled.ompt_callback_thread_end) {
4124 ompt_callbacks.ompt_callback(ompt_callback_thread_end)ompt_callback_thread_end_callback(
4125 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4126 }
4127#endif
4128
4129 TCW_4(__kmp_nth,(__kmp_nth) = (__kmp_nth - 1)
4130 __kmp_nth - 1)(__kmp_nth) = (__kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
4131 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4132 KA_TRACE(100, ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
" to %d\n", root->r.r_uber_thread, root->r.r_uber_thread
->th.th_cg_roots, root->r.r_uber_thread->th.th_cg_roots
->cg_nthreads); }
4133 " to %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
" to %d\n", root->r.r_uber_thread, root->r.r_uber_thread
->th.th_cg_roots, root->r.r_uber_thread->th.th_cg_roots
->cg_nthreads); }
4134 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
" to %d\n", root->r.r_uber_thread, root->r.r_uber_thread
->th.th_cg_roots, root->r.r_uber_thread->th.th_cg_roots
->cg_nthreads); }
4135 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
" to %d\n", root->r.r_uber_thread, root->r.r_uber_thread
->th.th_cg_roots, root->r.r_uber_thread->th.th_cg_roots
->cg_nthreads); }
;
4136 if (i == 1) {
4137 // need to free contention group structure
4138 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==if (!(root->r.r_uber_thread == root->r.r_uber_thread->
th.th_cg_roots->cg_root)) { __kmp_debug_assert("root->r.r_uber_thread == root->r.r_uber_thread->th.th_cg_roots->cg_root"
, "openmp/runtime/src/kmp_runtime.cpp", 4139); }
4139 root->r.r_uber_thread->th.th_cg_roots->cg_root)if (!(root->r.r_uber_thread == root->r.r_uber_thread->
th.th_cg_roots->cg_root)) { __kmp_debug_assert("root->r.r_uber_thread == root->r.r_uber_thread->th.th_cg_roots->cg_root"
, "openmp/runtime/src/kmp_runtime.cpp", 4139); }
;
4140 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL)if (!(root->r.r_uber_thread->th.th_cg_roots->up == __null
)) { __kmp_debug_assert("root->r.r_uber_thread->th.th_cg_roots->up == __null"
, "openmp/runtime/src/kmp_runtime.cpp", 4140); }
;
4141 __kmp_free(root->r.r_uber_thread->th.th_cg_roots)___kmp_free((root->r.r_uber_thread->th.th_cg_roots), "openmp/runtime/src/kmp_runtime.cpp"
, 4141)
;
4142 root->r.r_uber_thread->th.th_cg_roots = NULL__null;
4143 }
4144 __kmp_reap_thread(root->r.r_uber_thread, 1);
4145
4146 // We canot put root thread to __kmp_thread_pool, so we have to reap it
4147 // instead of freeing.
4148 root->r.r_uber_thread = NULL__null;
4149 /* mark root as no longer in use */
4150 root->r.r_begin = FALSE0;
4151
4152 return n;
4153}
4154
4155void __kmp_unregister_root_current_thread(int gtid) {
4156 KA_TRACE(1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid))if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_unregister_root_current_thread: enter T#%d\n"
, gtid); }
;
4157 /* this lock should be ok, since unregister_root_current_thread is never
4158 called during an abort, only during a normal close. furthermore, if you
4159 have the forkjoin lock, you should never try to get the initz lock */
4160 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
4161 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done) || !__kmp_init_serial) {
4162 KC_TRACE(10, ("__kmp_unregister_root_current_thread: already finished, "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_current_thread: already finished, "
"exiting T#%d\n", gtid); }
4163 "exiting T#%d\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_current_thread: already finished, "
"exiting T#%d\n", gtid); }
4164 gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_current_thread: already finished, "
"exiting T#%d\n", gtid); }
;
4165 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4166 return;
4167 }
4168 kmp_root_t *root = __kmp_root[gtid];
4169
4170 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid])if (!(__kmp_threads && __kmp_threads[gtid])) { __kmp_debug_assert
("__kmp_threads && __kmp_threads[gtid]", "openmp/runtime/src/kmp_runtime.cpp"
, 4170); }
;
4171 KMP_ASSERT(KMP_UBER_GTID(gtid))if (!(KMP_UBER_GTID(gtid))) { __kmp_debug_assert("KMP_UBER_GTID(gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 4171); }
;
4172 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root)if (!(root == __kmp_threads[gtid]->th.th_root)) { __kmp_debug_assert
("root == __kmp_threads[gtid]->th.th_root", "openmp/runtime/src/kmp_runtime.cpp"
, 4172); }
;
4173 KMP_ASSERT(root->r.r_active == FALSE)if (!(root->r.r_active == 0)) { __kmp_debug_assert("root->r.r_active == FALSE"
, "openmp/runtime/src/kmp_runtime.cpp", 4173); }
;
4174
4175 KMP_MB();
4176
4177 kmp_info_t *thread = __kmp_threads[gtid];
4178 kmp_team_t *team = thread->th.th_team;
4179 kmp_task_team_t *task_team = thread->th.th_task_team;
4180
4181 // we need to wait for the proxy tasks before finishing the thread
4182 if (task_team != NULL__null && (task_team->tt.tt_found_proxy_tasks ||
4183 task_team->tt.tt_hidden_helper_task_encountered)) {
4184#if OMPT_SUPPORT1
4185 // the runtime is shutting down so we won't report any events
4186 thread->th.ompt_thread_info.state = ompt_state_undefined;
4187#endif
4188 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL), __null);
4189 }
4190
4191 __kmp_reset_root(gtid, root);
4192
4193 KMP_MB();
4194 KC_TRACE(10,if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_current_thread: T#%d unregistered\n"
, gtid); }
4195 ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_current_thread: T#%d unregistered\n"
, gtid); }
;
4196
4197 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4198}
4199
4200#if KMP_OS_WINDOWS0
4201/* __kmp_forkjoin_lock must be already held
4202 Unregisters a root thread that is not the current thread. Returns the number
4203 of __kmp_threads entries freed as a result. */
4204static int __kmp_unregister_root_other_thread(int gtid) {
4205 kmp_root_t *root = __kmp_root[gtid];
4206 int r;
4207
4208 KA_TRACE(1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid))if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_unregister_root_other_thread: enter T#%d\n"
, gtid); }
;
4209 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid])if (!(__kmp_threads && __kmp_threads[gtid])) { __kmp_debug_assert
("__kmp_threads && __kmp_threads[gtid]", "openmp/runtime/src/kmp_runtime.cpp"
, 4209); }
;
4210 KMP_ASSERT(KMP_UBER_GTID(gtid))if (!(KMP_UBER_GTID(gtid))) { __kmp_debug_assert("KMP_UBER_GTID(gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 4210); }
;
4211 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root)if (!(root == __kmp_threads[gtid]->th.th_root)) { __kmp_debug_assert
("root == __kmp_threads[gtid]->th.th_root", "openmp/runtime/src/kmp_runtime.cpp"
, 4211); }
;
4212 KMP_ASSERT(root->r.r_active == FALSE)if (!(root->r.r_active == 0)) { __kmp_debug_assert("root->r.r_active == FALSE"
, "openmp/runtime/src/kmp_runtime.cpp", 4212); }
;
4213
4214 r = __kmp_reset_root(gtid, root);
4215 KC_TRACE(10,if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_other_thread: T#%d unregistered\n"
, gtid); }
4216 ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_other_thread: T#%d unregistered\n"
, gtid); }
;
4217 return r;
4218}
4219#endif
4220
4221#if KMP_DEBUG1
4222void __kmp_task_info() {
4223
4224 kmp_int32 gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
4225 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4226 kmp_info_t *this_thr = __kmp_threads[gtid];
4227 kmp_team_t *steam = this_thr->th.th_serial_team;
4228 kmp_team_t *team = this_thr->th.th_team;
4229
4230 __kmp_printf(
4231 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4232 "ptask=%p\n",
4233 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4234 team->t.t_implicit_task_taskdata[tid].td_parent);
4235}
4236#endif // KMP_DEBUG
4237
4238/* TODO optimize with one big memclr, take out what isn't needed, split
4239 responsibility to workers as much as possible, and delay initialization of
4240 features as much as possible */
4241static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4242 int tid, int gtid) {
4243 /* this_thr->th.th_info.ds.ds_gtid is setup in
4244 kmp_allocate_thread/create_worker.
4245 this_thr->th.th_serial_team is setup in __kmp_allocate_thread */
4246 KMP_DEBUG_ASSERT(this_thr != NULL)if (!(this_thr != __null)) { __kmp_debug_assert("this_thr != __null"
, "openmp/runtime/src/kmp_runtime.cpp", 4246); }
;
4247 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team)if (!(this_thr->th.th_serial_team)) { __kmp_debug_assert("this_thr->th.th_serial_team"
, "openmp/runtime/src/kmp_runtime.cpp", 4247); }
;
4248 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 4248); }
;
4249 KMP_DEBUG_ASSERT(team->t.t_threads)if (!(team->t.t_threads)) { __kmp_debug_assert("team->t.t_threads"
, "openmp/runtime/src/kmp_runtime.cpp", 4249); }
;
4250 KMP_DEBUG_ASSERT(team->t.t_dispatch)if (!(team->t.t_dispatch)) { __kmp_debug_assert("team->t.t_dispatch"
, "openmp/runtime/src/kmp_runtime.cpp", 4250); }
;
4251 kmp_info_t *master = team->t.t_threads[0];
4252 KMP_DEBUG_ASSERT(master)if (!(master)) { __kmp_debug_assert("master", "openmp/runtime/src/kmp_runtime.cpp"
, 4252); }
;
4253 KMP_DEBUG_ASSERT(master->th.th_root)if (!(master->th.th_root)) { __kmp_debug_assert("master->th.th_root"
, "openmp/runtime/src/kmp_runtime.cpp", 4253); }
;
4254
4255 KMP_MB();
4256
4257 TCW_SYNC_PTR(this_thr->th.th_team, team)((this_thr->th.th_team)) = ((team));
4258
4259 this_thr->th.th_info.ds.ds_tid = tid;
4260 this_thr->th.th_set_nproc = 0;
4261 if (__kmp_tasking_mode != tskm_immediate_exec)
4262 // When tasking is possible, threads are not safe to reap until they are
4263 // done tasking; this will be set when tasking code is exited in wait
4264 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP0;
4265 else // no tasking --> always safe to reap
4266 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP1;
4267 this_thr->th.th_set_proc_bind = proc_bind_default;
4268#if KMP_AFFINITY_SUPPORTED1
4269 this_thr->th.th_new_place = this_thr->th.th_current_place;
4270#endif
4271 this_thr->th.th_root = master->th.th_root;
4272
4273 /* setup the thread's cache of the team structure */
4274 this_thr->th.th_team_nproc = team->t.t_nproc;
4275 this_thr->th.th_team_master = master;
4276 this_thr->th.th_team_serialized = team->t.t_serialized;
4277
4278 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata)if (!(team->t.t_implicit_task_taskdata)) { __kmp_debug_assert
("team->t.t_implicit_task_taskdata", "openmp/runtime/src/kmp_runtime.cpp"
, 4278); }
;
4279
4280 KF_TRACE(10, ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n"
, tid, gtid, this_thr, this_thr->th.th_current_task); }
4281 tid, gtid, this_thr, this_thr->th.th_current_task))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n"
, tid, gtid, this_thr, this_thr->th.th_current_task); }
;
4282
4283 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4284 team, tid, TRUE(!0));
4285
4286 KF_TRACE(10, ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n"
, tid, gtid, this_thr, this_thr->th.th_current_task); }
4287 tid, gtid, this_thr, this_thr->th.th_current_task))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n"
, tid, gtid, this_thr, this_thr->th.th_current_task); }
;
4288 // TODO: Initialize ICVs from parent; GEH - isn't that already done in
4289 // __kmp_initialize_team()?
4290
4291 /* TODO no worksharing in speculative threads */
4292 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4293
4294 this_thr->th.th_local.this_construct = 0;
4295
4296 if (!this_thr->th.th_pri_common) {
4297 this_thr->th.th_pri_common =
4298 (struct common_table *)__kmp_allocate(sizeof(struct common_table))___kmp_allocate((sizeof(struct common_table)), "openmp/runtime/src/kmp_runtime.cpp"
, 4298)
;
4299 if (__kmp_storage_map) {
4300 __kmp_print_storage_map_gtid(
4301 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4302 sizeof(struct common_table), "th_%d.th_pri_common\n", gtid);
4303 }
4304 this_thr->th.th_pri_head = NULL__null;
4305 }
4306
4307 if (this_thr != master && // Primary thread's CG root is initialized elsewhere
4308 this_thr->th.th_cg_roots != master->th.th_cg_roots) { // CG root not set
4309 // Make new thread's CG root same as primary thread's
4310 KMP_DEBUG_ASSERT(master->th.th_cg_roots)if (!(master->th.th_cg_roots)) { __kmp_debug_assert("master->th.th_cg_roots"
, "openmp/runtime/src/kmp_runtime.cpp", 4310); }
;
4311 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4312 if (tmp) {
4313 // worker changes CG, need to check if old CG should be freed
4314 int i = tmp->cg_nthreads--;
4315 KA_TRACE(100, ("__kmp_initialize_info: Thread %p decrement cg_nthreads"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p decrement cg_nthreads"
" on node %p of thread %p to %d\n", this_thr, tmp, tmp->cg_root
, tmp->cg_nthreads); }
4316 " on node %p of thread %p to %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p decrement cg_nthreads"
" on node %p of thread %p to %d\n", this_thr, tmp, tmp->cg_root
, tmp->cg_nthreads); }
4317 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p decrement cg_nthreads"
" on node %p of thread %p to %d\n", this_thr, tmp, tmp->cg_root
, tmp->cg_nthreads); }
;
4318 if (i == 1) {
4319 __kmp_free(tmp)___kmp_free((tmp), "openmp/runtime/src/kmp_runtime.cpp", 4319
)
; // last thread left CG --> free it
4320 }
4321 }
4322 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4323 // Increment new thread's CG root's counter to add the new thread
4324 this_thr->th.th_cg_roots->cg_nthreads++;
4325 KA_TRACE(100, ("__kmp_initialize_info: Thread %p increment cg_nthreads on"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p increment cg_nthreads on"
" node %p of thread %p to %d\n", this_thr, this_thr->th.th_cg_roots
, this_thr->th.th_cg_roots->cg_root, this_thr->th.th_cg_roots
->cg_nthreads); }
4326 " node %p of thread %p to %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p increment cg_nthreads on"
" node %p of thread %p to %d\n", this_thr, this_thr->th.th_cg_roots
, this_thr->th.th_cg_roots->cg_root, this_thr->th.th_cg_roots
->cg_nthreads); }
4327 this_thr, this_thr->th.th_cg_roots,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p increment cg_nthreads on"
" node %p of thread %p to %d\n", this_thr, this_thr->th.th_cg_roots
, this_thr->th.th_cg_roots->cg_root, this_thr->th.th_cg_roots
->cg_nthreads); }
4328 this_thr->th.th_cg_roots->cg_root,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p increment cg_nthreads on"
" node %p of thread %p to %d\n", this_thr, this_thr->th.th_cg_roots
, this_thr->th.th_cg_roots->cg_root, this_thr->th.th_cg_roots
->cg_nthreads); }
4329 this_thr->th.th_cg_roots->cg_nthreads))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p increment cg_nthreads on"
" node %p of thread %p to %d\n", this_thr, this_thr->th.th_cg_roots
, this_thr->th.th_cg_roots->cg_root, this_thr->th.th_cg_roots
->cg_nthreads); }
;
4330 this_thr->th.th_current_task->td_icvs.thread_limit =
4331 this_thr->th.th_cg_roots->cg_thread_limit;
4332 }
4333
4334 /* Initialize dynamic dispatch */
4335 {
4336 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4337 // Use team max_nproc since this will never change for the team.
4338 size_t disp_size =
4339 sizeof(dispatch_private_info_t) *
4340 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4341 KD_TRACE(10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_info: T#%d max_nproc: %d\n"
, gtid, team->t.t_max_nproc); }
4342 team->t.t_max_nproc))if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_info: T#%d max_nproc: %d\n"
, gtid, team->t.t_max_nproc); }
;
4343 KMP_ASSERT(dispatch)if (!(dispatch)) { __kmp_debug_assert("dispatch", "openmp/runtime/src/kmp_runtime.cpp"
, 4343); }
;
4344 KMP_DEBUG_ASSERT(team->t.t_dispatch)if (!(team->t.t_dispatch)) { __kmp_debug_assert("team->t.t_dispatch"
, "openmp/runtime/src/kmp_runtime.cpp", 4344); }
;
4345 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid])if (!(dispatch == &team->t.t_dispatch[tid])) { __kmp_debug_assert
("dispatch == &team->t.t_dispatch[tid]", "openmp/runtime/src/kmp_runtime.cpp"
, 4345); }
;
4346
4347 dispatch->th_disp_index = 0;
4348 dispatch->th_doacross_buf_idx = 0;
4349 if (!dispatch->th_disp_buffer) {
4350 dispatch->th_disp_buffer =
4351 (dispatch_private_info_t *)__kmp_allocate(disp_size)___kmp_allocate((disp_size), "openmp/runtime/src/kmp_runtime.cpp"
, 4351)
;
4352
4353 if (__kmp_storage_map) {
4354 __kmp_print_storage_map_gtid(
4355 gtid, &dispatch->th_disp_buffer[0],
4356 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4357 ? 1
4358 : __kmp_dispatch_num_buffers],
4359 disp_size,
4360 "th_%d.th_dispatch.th_disp_buffer "
4361 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4362 gtid, team->t.t_id, gtid);
4363 }
4364 } else {
4365 memset(&dispatch->th_disp_buffer[0], '\0', disp_size);
4366 }
4367
4368 dispatch->th_dispatch_pr_current = 0;
4369 dispatch->th_dispatch_sh_current = 0;
4370
4371 dispatch->th_deo_fcn = 0; /* ORDERED */
4372 dispatch->th_dxo_fcn = 0; /* END ORDERED */
4373 }
4374
4375 this_thr->th.th_next_pool = NULL__null;
4376
4377 if (!this_thr->th.th_task_state_memo_stack) {
4378 size_t i;
4379 this_thr->th.th_task_state_memo_stack =
4380 (kmp_uint8 *)__kmp_allocate(4 * sizeof(kmp_uint8))___kmp_allocate((4 * sizeof(kmp_uint8)), "openmp/runtime/src/kmp_runtime.cpp"
, 4380)
;
4381 this_thr->th.th_task_state_top = 0;
4382 this_thr->th.th_task_state_stack_sz = 4;
4383 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4384 ++i) // zero init the stack
4385 this_thr->th.th_task_state_memo_stack[i] = 0;
4386 }
4387
4388 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here)if (!(!this_thr->th.th_spin_here)) { __kmp_debug_assert("!this_thr->th.th_spin_here"
, "openmp/runtime/src/kmp_runtime.cpp", 4388); }
;
4389 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0)if (!(this_thr->th.th_next_waiting == 0)) { __kmp_debug_assert
("this_thr->th.th_next_waiting == 0", "openmp/runtime/src/kmp_runtime.cpp"
, 4389); }
;
4390
4391 KMP_MB();
4392}
4393
4394/* allocate a new thread for the requesting team. this is only called from
4395 within a forkjoin critical section. we will first try to get an available
4396 thread from the thread pool. if none is available, we will fork a new one
4397 assuming we are able to create a new one. this should be assured, as the
4398 caller should check on this first. */
4399kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4400 int new_tid) {
4401 kmp_team_t *serial_team;
4402 kmp_info_t *new_thr;
4403 int new_gtid;
4404
4405 KA_TRACE(20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d\n"
, __kmp_get_global_thread_id()); }
;
4406 KMP_DEBUG_ASSERT(root && team)if (!(root && team)) { __kmp_debug_assert("root && team"
, "openmp/runtime/src/kmp_runtime.cpp", 4406); }
;
4407#if !KMP_NESTED_HOT_TEAMS1
4408 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()))if (!((0 == __kmp_tid_from_gtid((__kmp_get_global_thread_id()
))))) { __kmp_debug_assert("(0 == __kmp_tid_from_gtid((__kmp_get_global_thread_id())))"
, "openmp/runtime/src/kmp_runtime.cpp", 4408); }
;
4409#endif
4410 KMP_MB();
4411
4412 /* first, try to get one from the thread pool */
4413 if (__kmp_thread_pool) {
4414 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool)const_cast<kmp_info_t *>(__kmp_thread_pool);
4415 __kmp_thread_pool = (volatile kmp_info_t *)new_thr->th.th_next_pool;
4416 if (new_thr == __kmp_thread_pool_insert_pt) {
4417 __kmp_thread_pool_insert_pt = NULL__null;
4418 }
4419 TCW_4(new_thr->th.th_in_pool, FALSE)(new_thr->th.th_in_pool) = (0);
4420 __kmp_suspend_initialize_thread(new_thr);
4421 __kmp_lock_suspend_mx(new_thr);
4422 if (new_thr->th.th_active_in_pool == TRUE(!0)) {
4423 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE)if (!(new_thr->th.th_active == (!0))) { __kmp_debug_assert
("new_thr->th.th_active == (!0)", "openmp/runtime/src/kmp_runtime.cpp"
, 4423); }
;
4424 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth)(&__kmp_thread_pool_active_nth)->fetch_sub(1, std::memory_order_acq_rel
)
;
4425 new_thr->th.th_active_in_pool = FALSE0;
4426 }
4427 __kmp_unlock_suspend_mx(new_thr);
4428
4429 KA_TRACE(20, ("__kmp_allocate_thread: T#%d using thread T#%d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d using thread T#%d\n"
, __kmp_get_global_thread_id(), new_thr->th.th_info.ds.ds_gtid
); }
4430 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d using thread T#%d\n"
, __kmp_get_global_thread_id(), new_thr->th.th_info.ds.ds_gtid
); }
;
4431 KMP_ASSERT(!new_thr->th.th_team)if (!(!new_thr->th.th_team)) { __kmp_debug_assert("!new_thr->th.th_team"
, "openmp/runtime/src/kmp_runtime.cpp", 4431); }
;
4432 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity)if (!(__kmp_nth < __kmp_threads_capacity)) { __kmp_debug_assert
("__kmp_nth < __kmp_threads_capacity", "openmp/runtime/src/kmp_runtime.cpp"
, 4432); }
;
4433
4434 /* setup the thread structure */
4435 __kmp_initialize_info(new_thr, team, new_tid,
4436 new_thr->th.th_info.ds.ds_gtid);
4437 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team)if (!(new_thr->th.th_serial_team)) { __kmp_debug_assert("new_thr->th.th_serial_team"
, "openmp/runtime/src/kmp_runtime.cpp", 4437); }
;
4438
4439 TCW_4(__kmp_nth, __kmp_nth + 1)(__kmp_nth) = (__kmp_nth + 1);
4440
4441 new_thr->th.th_task_state = 0;
4442 new_thr->th.th_task_state_top = 0;
4443 new_thr->th.th_task_state_stack_sz = 4;
4444
4445 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
4446 // Make sure pool thread has transitioned to waiting on own thread struct
4447 KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0)if (!(new_thr->th.th_used_in_team.load() == 0)) { __kmp_debug_assert
("new_thr->th.th_used_in_team.load() == 0", "openmp/runtime/src/kmp_runtime.cpp"
, 4447); }
;
4448 // Thread activated in __kmp_allocate_team when increasing team size
4449 }
4450
4451#ifdef KMP_ADJUST_BLOCKTIME1
4452 /* Adjust blocktime back to zero if necessary */
4453 /* Middle initialization might not have occurred yet */
4454 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4455 if (__kmp_nth > __kmp_avail_proc) {
4456 __kmp_zero_bt = TRUE(!0);
4457 }
4458 }
4459#endif /* KMP_ADJUST_BLOCKTIME */
4460
4461#if KMP_DEBUG1
4462 // If thread entered pool via __kmp_free_thread, wait_flag should !=
4463 // KMP_BARRIER_PARENT_FLAG.
4464 int b;
4465 kmp_balign_t *balign = new_thr->th.th_bar;
4466 for (b = 0; b < bs_last_barrier; ++b)
4467 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG)if (!(balign[b].bb.wait_flag != 2)) { __kmp_debug_assert("balign[b].bb.wait_flag != 2"
, "openmp/runtime/src/kmp_runtime.cpp", 4467); }
;
4468#endif
4469
4470 KF_TRACE(10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d using thread %p T#%d\n"
, __kmp_get_global_thread_id(), new_thr, new_thr->th.th_info
.ds.ds_gtid); }
4471 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d using thread %p T#%d\n"
, __kmp_get_global_thread_id(), new_thr, new_thr->th.th_info
.ds.ds_gtid); }
;
4472
4473 KMP_MB();
4474 return new_thr;
4475 }
4476
4477 /* no, well fork a new one */
4478 KMP_ASSERT(__kmp_nth == __kmp_all_nth)if (!(__kmp_nth == __kmp_all_nth)) { __kmp_debug_assert("__kmp_nth == __kmp_all_nth"
, "openmp/runtime/src/kmp_runtime.cpp", 4478); }
;
4479 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity)if (!(__kmp_all_nth < __kmp_threads_capacity)) { __kmp_debug_assert
("__kmp_all_nth < __kmp_threads_capacity", "openmp/runtime/src/kmp_runtime.cpp"
, 4479); }
;
4480
4481#if KMP_USE_MONITOR
4482 // If this is the first worker thread the RTL is creating, then also
4483 // launch the monitor thread. We try to do this as early as possible.
4484 if (!TCR_4(__kmp_init_monitor)(__kmp_init_monitor)) {
4485 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4486 if (!TCR_4(__kmp_init_monitor)(__kmp_init_monitor)) {
4487 KF_TRACE(10, ("before __kmp_create_monitor\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("before __kmp_create_monitor\n"
); }
;
4488 TCW_4(__kmp_init_monitor, 1)(__kmp_init_monitor) = (1);
4489 __kmp_create_monitor(&__kmp_monitor);
4490 KF_TRACE(10, ("after __kmp_create_monitor\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("after __kmp_create_monitor\n"
); }
;
4491#if KMP_OS_WINDOWS0
4492 // AC: wait until monitor has started. This is a fix for CQ232808.
4493 // The reason is that if the library is loaded/unloaded in a loop with
4494 // small (parallel) work in between, then there is high probability that
4495 // monitor thread started after the library shutdown. At shutdown it is
4496 // too late to cope with the problem, because when the primary thread is
4497 // in DllMain (process detach) the monitor has no chances to start (it is
4498 // blocked), and primary thread has no means to inform the monitor that
4499 // the library has gone, because all the memory which the monitor can
4500 // access is going to be released/reset.
4501 while (TCR_4(__kmp_init_monitor)(__kmp_init_monitor) < 2) {
4502 KMP_YIELD(TRUE){ __kmp_x86_pause(); if (((!0)) && (((__kmp_use_yield
== 1) || (__kmp_use_yield == 2 && (((__kmp_nth) >
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))))))) __kmp_yield
(); }
;
4503 }
4504 KF_TRACE(10, ("after monitor thread has started\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("after monitor thread has started\n"
); }
;
4505#endif
4506 }
4507 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4508 }
4509#endif
4510
4511 KMP_MB();
4512
4513 {
4514 int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)(__kmp_init_hidden_helper_threads)
4515 ? 1
4516 : __kmp_hidden_helper_threads_num + 1;
4517
4518 for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid])((void *)(__kmp_threads[new_gtid])) != NULL__null;
4519 ++new_gtid) {
4520 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity)if (!(new_gtid < __kmp_threads_capacity)) { __kmp_debug_assert
("new_gtid < __kmp_threads_capacity", "openmp/runtime/src/kmp_runtime.cpp"
, 4520); }
;
4521 }
4522
4523 if (TCR_4(__kmp_init_hidden_helper_threads)(__kmp_init_hidden_helper_threads)) {
4524 KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num)if (!(new_gtid <= __kmp_hidden_helper_threads_num)) { __kmp_debug_assert
("new_gtid <= __kmp_hidden_helper_threads_num", "openmp/runtime/src/kmp_runtime.cpp"
, 4524); }
;
4525 }
4526 }
4527
4528 /* allocate space for it. */
4529 new_thr = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t))___kmp_allocate((sizeof(kmp_info_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 4529)
;
4530
4531 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr)((__kmp_threads[new_gtid])) = ((new_thr));
4532
4533#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 && KMP_DEBUG1
4534 // suppress race conditions detection on synchronization flags in debug mode
4535 // this helps to analyze library internals eliminating false positives
4536 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(
4537 __itt_suppress_range, __itt_suppress_threading_errors0x000000ff,
4538 &new_thr->th.th_sleep_loc, sizeof(new_thr->th.th_sleep_loc));
4539 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(
4540 __itt_suppress_range, __itt_suppress_threading_errors0x000000ff,
4541 &new_thr->th.th_reap_state, sizeof(new_thr->th.th_reap_state));
4542#if KMP_OS_WINDOWS0
4543 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(
4544 __itt_suppress_range, __itt_suppress_threading_errors0x000000ff,
4545 &new_thr->th.th_suspend_init, sizeof(new_thr->th.th_suspend_init));
4546#else
4547 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(__itt_suppress_range,
4548 __itt_suppress_threading_errors0x000000ff,
4549 &new_thr->th.th_suspend_init_count,
4550 sizeof(new_thr->th.th_suspend_init_count));
4551#endif
4552 // TODO: check if we need to also suppress b_arrived flags
4553 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(__itt_suppress_range,
4554 __itt_suppress_threading_errors0x000000ff,
4555 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go)const_cast<kmp_uint64 *>(&new_thr->th.th_bar[0].
bb.b_go)
,
4556 sizeof(new_thr->th.th_bar[0].bb.b_go));
4557 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(__itt_suppress_range,
4558 __itt_suppress_threading_errors0x000000ff,
4559 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go)const_cast<kmp_uint64 *>(&new_thr->th.th_bar[1].
bb.b_go)
,
4560 sizeof(new_thr->th.th_bar[1].bb.b_go));
4561 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(__itt_suppress_range,
4562 __itt_suppress_threading_errors0x000000ff,
4563 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go)const_cast<kmp_uint64 *>(&new_thr->th.th_bar[2].
bb.b_go)
,
4564 sizeof(new_thr->th.th_bar[2].bb.b_go));
4565#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG */
4566 if (__kmp_storage_map) {
4567 __kmp_print_thread_storage_map(new_thr, new_gtid);
4568 }
4569
4570 // add the reserve serialized team, initialized from the team's primary thread
4571 {
4572 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4573 KF_TRACE(10, ("__kmp_allocate_thread: before th_serial/serial_team\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: before th_serial/serial_team\n"
); }
;
4574 new_thr->th.th_serial_team = serial_team =
4575 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4576#if OMPT_SUPPORT1
4577 ompt_data_none{0}, // root parallel id
4578#endif
4579 proc_bind_default, &r_icvs,
4580 0 USE_NESTED_HOT_ARG(NULL), __null);
4581 }
4582 KMP_ASSERT(serial_team)if (!(serial_team)) { __kmp_debug_assert("serial_team", "openmp/runtime/src/kmp_runtime.cpp"
, 4582); }
;
4583 serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for
4584 // execution (it is unused for now).
4585 serial_team->t.t_threads[0] = new_thr;
4586 KF_TRACE(10,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n"
, new_thr); }
4587 ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n"
, new_thr); }
4588 new_thr))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n"
, new_thr); }
;
4589
4590 /* setup the thread structures */
4591 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4592
4593#if USE_FAST_MEMORY3
4594 __kmp_initialize_fast_memory(new_thr);
4595#endif /* USE_FAST_MEMORY */
4596
4597#if KMP_USE_BGET1
4598 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL)if (!(new_thr->th.th_local.bget_data == __null)) { __kmp_debug_assert
("new_thr->th.th_local.bget_data == __null", "openmp/runtime/src/kmp_runtime.cpp"
, 4598); }
;
4599 __kmp_initialize_bget(new_thr);
4600#endif
4601
4602 __kmp_init_random(new_thr); // Initialize random number generator
4603
4604 /* Initialize these only once when thread is grabbed for a team allocation */
4605 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n"
, __kmp_get_global_thread_id(), 0, 0); }
4606 ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n"
, __kmp_get_global_thread_id(), 0, 0); }
4607 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n"
, __kmp_get_global_thread_id(), 0, 0); }
;
4608
4609 int b;
4610 kmp_balign_t *balign = new_thr->th.th_bar;
4611 for (b = 0; b < bs_last_barrier; ++b) {
4612 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE0;
4613 balign[b].bb.team = NULL__null;
4614 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING0;
4615 balign[b].bb.use_oncore_barrier = 0;
4616 }
4617
4618 TCW_PTR(new_thr->th.th_sleep_loc, NULL)((new_thr->th.th_sleep_loc)) = ((__null));
4619 new_thr->th.th_sleep_loc_type = flag_unset;
4620
4621 new_thr->th.th_spin_here = FALSE0;
4622 new_thr->th.th_next_waiting = 0;
4623#if KMP_OS_UNIX1
4624 new_thr->th.th_blocking = false;
4625#endif
4626
4627#if KMP_AFFINITY_SUPPORTED1
4628 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED(-2);
4629 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED(-2);
4630 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED(-2);
4631 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED(-2);
4632#endif
4633 new_thr->th.th_def_allocator = __kmp_def_allocator;
4634 new_thr->th.th_prev_level = 0;
4635 new_thr->th.th_prev_num_threads = 1;
4636
4637 TCW_4(new_thr->th.th_in_pool, FALSE)(new_thr->th.th_in_pool) = (0);
4638 new_thr->th.th_active_in_pool = FALSE0;
4639 TCW_4(new_thr->th.th_active, TRUE)(new_thr->th.th_active) = ((!0));
4640
4641 /* adjust the global counters */
4642 __kmp_all_nth++;
4643 __kmp_nth++;
4644
4645 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
4646 // numbers of procs, and method #2 (keyed API call) for higher numbers.
4647 if (__kmp_adjust_gtid_mode) {
4648 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4649 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) != 2) {
4650 TCW_4(__kmp_gtid_mode, 2)(__kmp_gtid_mode) = (2);
4651 }
4652 } else {
4653 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) != 1) {
4654 TCW_4(__kmp_gtid_mode, 1)(__kmp_gtid_mode) = (1);
4655 }
4656 }
4657 }
4658
4659#ifdef KMP_ADJUST_BLOCKTIME1
4660 /* Adjust blocktime back to zero if necessary */
4661 /* Middle initialization might not have occurred yet */
4662 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4663 if (__kmp_nth > __kmp_avail_proc) {
4664 __kmp_zero_bt = TRUE(!0);
4665 }
4666 }
4667#endif /* KMP_ADJUST_BLOCKTIME */
4668
4669 /* actually fork it and create the new worker thread */
4670 KF_TRACE(if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: before __kmp_create_worker: %p\n"
, new_thr); }
4671 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: before __kmp_create_worker: %p\n"
, new_thr); }
;
4672 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4673 KF_TRACE(10,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: after __kmp_create_worker: %p\n"
, new_thr); }
4674 ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: after __kmp_create_worker: %p\n"
, new_thr); }
;
4675
4676 KA_TRACE(20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d forked T#%d\n"
, __kmp_get_global_thread_id(), new_gtid); }
4677 new_gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d forked T#%d\n"
, __kmp_get_global_thread_id(), new_gtid); }
;
4678 KMP_MB();
4679 return new_thr;
4680}
4681
4682/* Reinitialize team for reuse.
4683 The hot team code calls this case at every fork barrier, so EPCC barrier
4684 test are extremely sensitive to changes in it, esp. writes to the team
4685 struct, which cause a cache invalidation in all threads.
4686 IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! */
4687static void __kmp_reinitialize_team(kmp_team_t *team,
4688 kmp_internal_control_t *new_icvs,
4689 ident_t *loc) {
4690 KF_TRACE(10, ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n"
, team->t.t_threads[0], team); }
4691 team->t.t_threads[0], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n"
, team->t.t_threads[0], team); }
;
4692 KMP_DEBUG_ASSERT(team && new_icvs)if (!(team && new_icvs)) { __kmp_debug_assert("team && new_icvs"
, "openmp/runtime/src/kmp_runtime.cpp", 4692); }
;
4693 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc)if (!((!(__kmp_init_parallel)) || new_icvs->nproc)) { __kmp_debug_assert
("(!(__kmp_init_parallel)) || new_icvs->nproc", "openmp/runtime/src/kmp_runtime.cpp"
, 4693); }
;
4694 KMP_CHECK_UPDATE(team->t.t_ident, loc)if ((team->t.t_ident) != (loc)) (team->t.t_ident) = (loc
)
;
4695
4696 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID())if ((team->t.t_id) != ((~0))) (team->t.t_id) = ((~0));
4697 // Copy ICVs to the primary thread's implicit taskdata
4698 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE0);
4699 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4700
4701 KF_TRACE(10, ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n"
, team->t.t_threads[0], team); }
4702 team->t.t_threads[0], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n"
, team->t.t_threads[0], team); }
;
4703}
4704
4705/* Initialize the team data structure.
4706 This assumes the t_threads and t_max_nproc are already set.
4707 Also, we don't touch the arguments */
4708static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
4709 kmp_internal_control_t *new_icvs,
4710 ident_t *loc) {
4711 KF_TRACE(10, ("__kmp_initialize_team: enter: team=%p\n", team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_team: enter: team=%p\n"
, team); }
;
4712
4713 /* verify */
4714 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 4714); }
;
4715 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc)if (!(new_nproc <= team->t.t_max_nproc)) { __kmp_debug_assert
("new_nproc <= team->t.t_max_nproc", "openmp/runtime/src/kmp_runtime.cpp"
, 4715); }
;
4716 KMP_DEBUG_ASSERT(team->t.t_threads)if (!(team->t.t_threads)) { __kmp_debug_assert("team->t.t_threads"
, "openmp/runtime/src/kmp_runtime.cpp", 4716); }
;
4717 KMP_MB();
4718
4719 team->t.t_master_tid = 0; /* not needed */
4720 /* team->t.t_master_bar; not needed */
4721 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4722 team->t.t_nproc = new_nproc;
4723
4724 /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */
4725 team->t.t_next_pool = NULL__null;
4726 /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess
4727 * up hot team */
4728
4729 TCW_SYNC_PTR(team->t.t_pkfn, NULL)((team->t.t_pkfn)) = ((__null)); /* not needed */
4730 team->t.t_invoke = NULL__null; /* not needed */
4731
4732 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4733 team->t.t_sched.sched = new_icvs->sched.sched;
4734
4735#if KMP_ARCH_X860 || KMP_ARCH_X86_641
4736 team->t.t_fp_control_saved = FALSE0; /* not needed */
4737 team->t.t_x87_fpu_control_word = 0; /* not needed */
4738 team->t.t_mxcsr = 0; /* not needed */
4739#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
4740
4741 team->t.t_construct = 0;
4742
4743 team->t.t_ordered.dt.t_value = 0;
4744 team->t.t_master_active = FALSE0;
4745
4746#ifdef KMP_DEBUG1
4747 team->t.t_copypriv_data = NULL__null; /* not necessary, but nice for debugging */
4748#endif
4749#if KMP_OS_WINDOWS0
4750 team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */
4751#endif
4752
4753 team->t.t_control_stack_top = NULL__null;
4754
4755 __kmp_reinitialize_team(team, new_icvs, loc);
4756
4757 KMP_MB();
4758 KF_TRACE(10, ("__kmp_initialize_team: exit: team=%p\n", team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_team: exit: team=%p\n"
, team); }
;
4759}
4760
4761#if (KMP_OS_LINUX1 || KMP_OS_FREEBSD0) && KMP_AFFINITY_SUPPORTED1
4762/* Sets full mask for thread and returns old mask, no changes to structures. */
4763static void
4764__kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4765 if (KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {
4766 int status;
4767 if (old_mask != NULL__null) {
4768 status = __kmp_get_system_affinity(old_mask, TRUE)(old_mask)->get_system_affinity((!0));
4769 int error = errno(*__errno_location ());
4770 if (status != 0) {
4771 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError)__kmp_msg_format(kmp_i18n_msg_ChangeThreadAffMaskError), KMP_ERR(error)__kmp_msg_error_code(error),
4772 __kmp_msg_null);
4773 }
4774 }
4775 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE)(__kmp_affin_fullMask)->set_system_affinity((!0));
4776 }
4777}
4778#endif
4779
4780#if KMP_AFFINITY_SUPPORTED1
4781
4782// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
4783// It calculates the worker + primary thread's partition based upon the parent
4784// thread's partition, and binds each worker to a thread in their partition.
4785// The primary thread's partition should already include its current binding.
4786static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
4787 // Do not partition places for the hidden helper team
4788 if (KMP_HIDDEN_HELPER_TEAM(team)(team->t.t_threads[0] == __kmp_hidden_helper_main_thread))
4789 return;
4790 // Copy the primary thread's place partition to the team struct
4791 kmp_info_t *master_th = team->t.t_threads[0];
4792 KMP_DEBUG_ASSERT(master_th != NULL)if (!(master_th != __null)) { __kmp_debug_assert("master_th != __null"
, "openmp/runtime/src/kmp_runtime.cpp", 4792); }
;
4793 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4794 int first_place = master_th->th.th_first_place;
4795 int last_place = master_th->th.th_last_place;
4796 int masters_place = master_th->th.th_current_place;
4797 int num_masks = __kmp_affinity.num_masks;
4798 team->t.t_first_place = first_place;
4799 team->t.t_last_place = last_place;
4800
4801 KA_TRACE(20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
"bound to place %d partition = [%d,%d]\n", proc_bind, __kmp_gtid_from_thread
(team->t.t_threads[0]), team->t.t_id, masters_place, first_place
, last_place); }
4802 "bound to place %d partition = [%d,%d]\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
"bound to place %d partition = [%d,%d]\n", proc_bind, __kmp_gtid_from_thread
(team->t.t_threads[0]), team->t.t_id, masters_place, first_place
, last_place); }
4803 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
"bound to place %d partition = [%d,%d]\n", proc_bind, __kmp_gtid_from_thread
(team->t.t_threads[0]), team->t.t_id, masters_place, first_place
, last_place); }
4804 team->t.t_id, masters_place, first_place, last_place))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
"bound to place %d partition = [%d,%d]\n", proc_bind, __kmp_gtid_from_thread
(team->t.t_threads[0]), team->t.t_id, masters_place, first_place
, last_place); }
;
4805
4806 switch (proc_bind) {
4807
4808 case proc_bind_default:
4809 // Serial teams might have the proc_bind policy set to proc_bind_default.
4810 // Not an issue -- we don't rebind primary thread for any proc_bind policy.
4811 KMP_DEBUG_ASSERT(team->t.t_nproc == 1)if (!(team->t.t_nproc == 1)) { __kmp_debug_assert("team->t.t_nproc == 1"
, "openmp/runtime/src/kmp_runtime.cpp", 4811); }
;
4812 break;
4813
4814 case proc_bind_primary: {
4815 int f;
4816 int n_th = team->t.t_nproc;
4817 for (f = 1; f < n_th; f++) {
4818 kmp_info_t *th = team->t.t_threads[f];
4819 KMP_DEBUG_ASSERT(th != NULL)if (!(th != __null)) { __kmp_debug_assert("th != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 4819); }
;
4820 th->th.th_first_place = first_place;
4821 th->th.th_last_place = last_place;
4822 th->th.th_new_place = masters_place;
4823 if (__kmp_display_affinity && masters_place != th->th.th_current_place &&
4824 team->t.t_display_affinity != 1) {
4825 team->t.t_display_affinity = 1;
4826 }
4827
4828 KA_TRACE(100, ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, masters_place, first_place, last_place
); }
4829 "partition = [%d,%d]\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, masters_place, first_place, last_place
); }
4830 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, masters_place, first_place, last_place
); }
4831 f, masters_place, first_place, last_place))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, masters_place, first_place, last_place
); }
;
4832 }
4833 } break;
4834
4835 case proc_bind_close: {
4836 int f;
4837 int n_th = team->t.t_nproc;
4838 int n_places;
4839 if (first_place <= last_place) {
4840 n_places = last_place - first_place + 1;
4841 } else {
4842 n_places = num_masks - first_place + last_place + 1;
4843 }
4844 if (n_th <= n_places) {
4845 int place = masters_place;
4846 for (f = 1; f < n_th; f++) {
4847 kmp_info_t *th = team->t.t_threads[f];
4848 KMP_DEBUG_ASSERT(th != NULL)if (!(th != __null)) { __kmp_debug_assert("th != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 4848); }
;
4849
4850 if (place == last_place) {
4851 place = first_place;
4852 } else if (place == (num_masks - 1)) {
4853 place = 0;
4854 } else {
4855 place++;
4856 }
4857 th->th.th_first_place = first_place;
4858 th->th.th_last_place = last_place;
4859 th->th.th_new_place = place;
4860 if (__kmp_display_affinity && place != th->th.th_current_place &&
4861 team->t.t_display_affinity != 1) {
4862 team->t.t_display_affinity = 1;
4863 }
4864
4865 KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, place, first_place, last_place); }
4866 "partition = [%d,%d]\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, place, first_place, last_place); }
4867 __kmp_gtid_from_thread(team->t.t_threads[f]),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, place, first_place, last_place); }
4868 team->t.t_id, f, place, first_place, last_place))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, place, first_place, last_place); }
;
4869 }
4870 } else {
4871 int S, rem, gap, s_count;
4872 S = n_th / n_places;
4873 s_count = 0;
4874 rem = n_th - (S * n_places);
4875 gap = rem > 0 ? n_places / rem : n_places;
4876 int place = masters_place;
4877 int gap_ct = gap;
4878 for (f = 0; f < n_th; f++) {
4879 kmp_info_t *th = team->t.t_threads[f];
4880 KMP_DEBUG_ASSERT(th != NULL)if (!(th != __null)) { __kmp_debug_assert("th != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 4880); }
;
4881
4882 th->th.th_first_place = first_place;
4883 th->th.th_last_place = last_place;
4884 th->th.th_new_place = place;
4885 if (__kmp_display_affinity && place != th->th.th_current_place &&
4886 team->t.t_display_affinity != 1) {
4887 team->t.t_display_affinity = 1;
4888 }
4889 s_count++;
4890
4891 if ((s_count == S) && rem && (gap_ct == gap)) {
4892 // do nothing, add an extra thread to place on next iteration
4893 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4894 // we added an extra thread to this place; move to next place
4895 if (place == last_place) {
4896 place = first_place;
4897 } else if (place == (num_masks - 1)) {
4898 place = 0;
4899 } else {
4900 place++;
4901 }
4902 s_count = 0;
4903 gap_ct = 1;
4904 rem--;
4905 } else if (s_count == S) { // place full; don't add extra
4906 if (place == last_place) {
4907 place = first_place;
4908 } else if (place == (num_masks - 1)) {
4909 place = 0;
4910 } else {
4911 place++;
4912 }
4913 gap_ct++;
4914 s_count = 0;
4915 }
4916
4917 KA_TRACE(100,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, first_place
, last_place); }
4918 ("__kmp_partition_places: close: T#%d(%d:%d) place %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, first_place
, last_place); }
4919 "partition = [%d,%d]\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, first_place
, last_place); }
4920 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, first_place
, last_place); }
4921 th->th.th_new_place, first_place, last_place))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, first_place
, last_place); }
;
4922 }
4923 KMP_DEBUG_ASSERT(place == masters_place)if (!(place == masters_place)) { __kmp_debug_assert("place == masters_place"
, "openmp/runtime/src/kmp_runtime.cpp", 4923); }
;
4924 }
4925 } break;
4926
4927 case proc_bind_spread: {
4928 int f;
4929 int n_th = team->t.t_nproc;
4930 int n_places;
4931 int thidx;
4932 if (first_place <= last_place) {
4933 n_places = last_place - first_place + 1;
4934 } else {
4935 n_places = num_masks - first_place + last_place + 1;
4936 }
4937 if (n_th <= n_places) {
4938 int place = -1;
4939
4940 if (n_places != num_masks) {
4941 int S = n_places / n_th;
4942 int s_count, rem, gap, gap_ct;
4943
4944 place = masters_place;
4945 rem = n_places - n_th * S;
4946 gap = rem ? n_th / rem : 1;
4947 gap_ct = gap;
4948 thidx = n_th;
4949 if (update_master_only == 1)
4950 thidx = 1;
4951 for (f = 0; f < thidx; f++) {
4952 kmp_info_t *th = team->t.t_threads[f];
4953 KMP_DEBUG_ASSERT(th != NULL)if (!(th != __null)) { __kmp_debug_assert("th != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 4953); }
;
4954
4955 th->th.th_first_place = place;
4956 th->th.th_new_place = place;
4957 if (__kmp_display_affinity && place != th->th.th_current_place &&
4958 team->t.t_display_affinity != 1) {
4959 team->t.t_display_affinity = 1;
4960 }
4961 s_count = 1;
4962 while (s_count < S) {
4963 if (place == last_place) {
4964 place = first_place;
4965 } else if (place == (num_masks - 1)) {
4966 place = 0;
4967 } else {
4968 place++;
4969 }
4970 s_count++;
4971 }
4972 if (rem && (gap_ct == gap)) {
4973 if (place == last_place) {
4974 place = first_place;
4975 } else if (place == (num_masks - 1)) {
4976 place = 0;
4977 } else {
4978 place++;
4979 }
4980 rem--;
4981 gap_ct = 0;
4982 }
4983 th->th.th_last_place = place;
4984 gap_ct++;
4985
4986 if (place == last_place) {
4987 place = first_place;
4988 } else if (place == (num_masks - 1)) {
4989 place = 0;
4990 } else {
4991 place++;
4992 }
4993
4994 KA_TRACE(100,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], num_masks: %u\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, num_masks
); }
4995 ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], num_masks: %u\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, num_masks
); }
4996 "partition = [%d,%d], num_masks: %u\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], num_masks: %u\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, num_masks
); }
4997 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], num_masks: %u\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, num_masks
); }
4998 f, th->th.th_new_place, th->th.th_first_place,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], num_masks: %u\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, num_masks
); }
4999 th->th.th_last_place, num_masks))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], num_masks: %u\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, num_masks
); }
;
5000 }
5001 } else {
5002 /* Having uniform space of available computation places I can create
5003 T partitions of round(P/T) size and put threads into the first
5004 place of each partition. */
5005 double current = static_cast<double>(masters_place);
5006 double spacing =
5007 (static_cast<double>(n_places + 1) / static_cast<double>(n_th));
5008 int first, last;
5009 kmp_info_t *th;
5010
5011 thidx = n_th + 1;
5012 if (update_master_only == 1)
5013 thidx = 1;
5014 for (f = 0; f < thidx; f++) {
5015 first = static_cast<int>(current);
5016 last = static_cast<int>(current + spacing) - 1;
5017 KMP_DEBUG_ASSERT(last >= first)if (!(last >= first)) { __kmp_debug_assert("last >= first"
, "openmp/runtime/src/kmp_runtime.cpp", 5017); }
;
5018 if (first >= n_places) {
5019 if (masters_place) {
5020 first -= n_places;
5021 last -= n_places;
5022 if (first == (masters_place + 1)) {
5023 KMP_DEBUG_ASSERT(f == n_th)if (!(f == n_th)) { __kmp_debug_assert("f == n_th", "openmp/runtime/src/kmp_runtime.cpp"
, 5023); }
;
5024 first--;
5025 }
5026 if (last == masters_place) {
5027 KMP_DEBUG_ASSERT(f == (n_th - 1))if (!(f == (n_th - 1))) { __kmp_debug_assert("f == (n_th - 1)"
, "openmp/runtime/src/kmp_runtime.cpp", 5027); }
;
5028 last--;
5029 }
5030 } else {
5031 KMP_DEBUG_ASSERT(f == n_th)if (!(f == n_th)) { __kmp_debug_assert("f == n_th", "openmp/runtime/src/kmp_runtime.cpp"
, 5031); }
;
5032 first = 0;
5033 last = 0;
5034 }
5035 }
5036 if (last >= n_places) {
5037 last = (n_places - 1);
5038 }
5039 place = first;
5040 current += spacing;
5041 if (f < n_th) {
5042 KMP_DEBUG_ASSERT(0 <= first)if (!(0 <= first)) { __kmp_debug_assert("0 <= first", "openmp/runtime/src/kmp_runtime.cpp"
, 5042); }
;
5043 KMP_DEBUG_ASSERT(n_places > first)if (!(n_places > first)) { __kmp_debug_assert("n_places > first"
, "openmp/runtime/src/kmp_runtime.cpp", 5043); }
;
5044 KMP_DEBUG_ASSERT(0 <= last)if (!(0 <= last)) { __kmp_debug_assert("0 <= last", "openmp/runtime/src/kmp_runtime.cpp"
, 5044); }
;
5045 KMP_DEBUG_ASSERT(n_places > last)if (!(n_places > last)) { __kmp_debug_assert("n_places > last"
, "openmp/runtime/src/kmp_runtime.cpp", 5045); }
;
5046 KMP_DEBUG_ASSERT(last_place >= first_place)if (!(last_place >= first_place)) { __kmp_debug_assert("last_place >= first_place"
, "openmp/runtime/src/kmp_runtime.cpp", 5046); }
;
5047 th = team->t.t_threads[f];
5048 KMP_DEBUG_ASSERT(th)if (!(th)) { __kmp_debug_assert("th", "openmp/runtime/src/kmp_runtime.cpp"
, 5048); }
;
5049 th->th.th_first_place = first;
5050 th->th.th_new_place = place;
5051 th->th.th_last_place = last;
5052 if (__kmp_display_affinity && place != th->th.th_current_place &&
5053 team->t.t_display_affinity != 1) {
5054 team->t.t_display_affinity = 1;
5055 }
5056 KA_TRACE(100,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, spacing)
; }
5057 ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, spacing)
; }
5058 "partition = [%d,%d], spacing = %.4f\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, spacing)
; }
5059 __kmp_gtid_from_thread(team->t.t_threads[f]),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, spacing)
; }
5060 team->t.t_id, f, th->th.th_new_place,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, spacing)
; }
5061 th->th.th_first_place, th->th.th_last_place, spacing))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, spacing)
; }
;
5062 }
5063 }
5064 }
5065 KMP_DEBUG_ASSERT(update_master_only || place == masters_place)if (!(update_master_only || place == masters_place)) { __kmp_debug_assert
("update_master_only || place == masters_place", "openmp/runtime/src/kmp_runtime.cpp"
, 5065); }
;
5066 } else {
5067 int S, rem, gap, s_count;
5068 S = n_th / n_places;
5069 s_count = 0;
5070 rem = n_th - (S * n_places);
5071 gap = rem > 0 ? n_places / rem : n_places;
5072 int place = masters_place;
5073 int gap_ct = gap;
5074 thidx = n_th;
5075 if (update_master_only == 1)
5076 thidx = 1;
5077 for (f = 0; f < thidx; f++) {
5078 kmp_info_t *th = team->t.t_threads[f];
5079 KMP_DEBUG_ASSERT(th != NULL)if (!(th != __null)) { __kmp_debug_assert("th != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 5079); }
;
5080
5081 th->th.th_first_place = place;
5082 th->th.th_last_place = place;
5083 th->th.th_new_place = place;
5084 if (__kmp_display_affinity && place != th->th.th_current_place &&
5085 team->t.t_display_affinity != 1) {
5086 team->t.t_display_affinity = 1;
5087 }
5088 s_count++;
5089
5090 if ((s_count == S) && rem && (gap_ct == gap)) {
5091 // do nothing, add an extra thread to place on next iteration
5092 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
5093 // we added an extra thread to this place; move on to next place
5094 if (place == last_place) {
5095 place = first_place;
5096 } else if (place == (num_masks - 1)) {
5097 place = 0;
5098 } else {
5099 place++;
5100 }
5101 s_count = 0;
5102 gap_ct = 1;
5103 rem--;
5104 } else if (s_count == S) { // place is full; don't add extra thread
5105 if (place == last_place) {
5106 place = first_place;
5107 } else if (place == (num_masks - 1)) {
5108 place = 0;
5109 } else {
5110 place++;
5111 }
5112 gap_ct++;
5113 s_count = 0;
5114 }
5115
5116 KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, th->th.th_first_place
, th->th.th_last_place); }
5117 "partition = [%d,%d]\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, th->th.th_first_place
, th->th.th_last_place); }
5118 __kmp_gtid_from_thread(team->t.t_threads[f]),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, th->th.th_first_place
, th->th.th_last_place); }
5119 team->t.t_id, f, th->th.th_new_place,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, th->th.th_first_place
, th->th.th_last_place); }
5120 th->th.th_first_place, th->th.th_last_place))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, th->th.th_first_place
, th->th.th_last_place); }
;
5121 }
5122 KMP_DEBUG_ASSERT(update_master_only || place == masters_place)if (!(update_master_only || place == masters_place)) { __kmp_debug_assert
("update_master_only || place == masters_place", "openmp/runtime/src/kmp_runtime.cpp"
, 5122); }
;
5123 }
5124 } break;
5125
5126 default:
5127 break;
5128 }
5129
5130 KA_TRACE(20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_partition_places: exit T#%d\n"
, team->t.t_id); }
;
5131}
5132
5133#endif // KMP_AFFINITY_SUPPORTED
5134
5135/* allocate a new team data structure to use. take one off of the free pool if
5136 available */
5137kmp_team_t *
5138__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
5139#if OMPT_SUPPORT1
5140 ompt_data_t ompt_parallel_data,
5141#endif
5142 kmp_proc_bind_t new_proc_bind,
5143 kmp_internal_control_t *new_icvs,
5144 int argc USE_NESTED_HOT_ARG(kmp_info_t *master), kmp_info_t *master) {
5145 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team)((void)0);
5146 int f;
5147 kmp_team_t *team;
5148 int use_hot_team = !root->r.r_active;
5149 int level = 0;
5150 int do_place_partition = 1;
5151
5152 KA_TRACE(20, ("__kmp_allocate_team: called\n"))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: called\n"
); }
;
5153 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0)if (!(new_nproc >= 1 && argc >= 0)) { __kmp_debug_assert
("new_nproc >= 1 && argc >= 0", "openmp/runtime/src/kmp_runtime.cpp"
, 5153); }
;
5154 KMP_DEBUG_ASSERT(max_nproc >= new_nproc)if (!(max_nproc >= new_nproc)) { __kmp_debug_assert("max_nproc >= new_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5154); }
;
5155 KMP_MB();
5156
5157#if KMP_NESTED_HOT_TEAMS1
5158 kmp_hot_team_ptr_t *hot_teams;
5159 if (master) {
5160 team = master->th.th_team;
5161 level = team->t.t_active_level;
5162 if (master->th.th_teams_microtask) { // in teams construct?
5163 if (master->th.th_teams_size.nteams > 1 &&
5164 ( // #teams > 1
5165 team->t.t_pkfn ==
5166 (microtask_t)__kmp_teams_master || // inner fork of the teams
5167 master->th.th_teams_level <
5168 team->t.t_level)) { // or nested parallel inside the teams
5169 ++level; // not increment if #teams==1, or for outer fork of the teams;
5170 // increment otherwise
5171 }
5172 // Do not perform the place partition if inner fork of the teams
5173 // Wait until nested parallel region encountered inside teams construct
5174 if ((master->th.th_teams_size.nteams == 1 &&
5175 master->th.th_teams_level >= team->t.t_level) ||
5176 (team->t.t_pkfn == (microtask_t)__kmp_teams_master))
5177 do_place_partition = 0;
5178 }
5179 hot_teams = master->th.th_hot_teams;
5180 if (level < __kmp_hot_teams_max_level && hot_teams &&
5181 hot_teams[level].hot_team) {
5182 // hot team has already been allocated for given level
5183 use_hot_team = 1;
5184 } else {
5185 use_hot_team = 0;
5186 }
5187 } else {
5188 // check we won't access uninitialized hot_teams, just in case
5189 KMP_DEBUG_ASSERT(new_nproc == 1)if (!(new_nproc == 1)) { __kmp_debug_assert("new_nproc == 1",
"openmp/runtime/src/kmp_runtime.cpp", 5189); }
;
5190 }
5191#endif
5192 // Optimization to use a "hot" team
5193 if (use_hot_team && new_nproc > 1) {
5194 KMP_DEBUG_ASSERT(new_nproc <= max_nproc)if (!(new_nproc <= max_nproc)) { __kmp_debug_assert("new_nproc <= max_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5194); }
;
5195#if KMP_NESTED_HOT_TEAMS1
5196 team = hot_teams[level].hot_team;
5197#else
5198 team = root->r.r_hot_team;
5199#endif
5200#if KMP_DEBUG1
5201 if (__kmp_tasking_mode != tskm_immediate_exec) {
5202 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: hot team task_team[0] = %p "
"task_team[1] = %p before reinit\n", team->t.t_task_team[
0], team->t.t_task_team[1]); }
5203 "task_team[1] = %p before reinit\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: hot team task_team[0] = %p "
"task_team[1] = %p before reinit\n", team->t.t_task_team[
0], team->t.t_task_team[1]); }
5204 team->t.t_task_team[0], team->t.t_task_team[1]))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: hot team task_team[0] = %p "
"task_team[1] = %p before reinit\n", team->t.t_task_team[
0], team->t.t_task_team[1]); }
;
5205 }
5206#endif
5207
5208 if (team->t.t_nproc != new_nproc &&
5209 __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5210 // Distributed barrier may need a resize
5211 int old_nthr = team->t.t_nproc;
5212 __kmp_resize_dist_barrier(team, old_nthr, new_nproc);
5213 }
5214
5215 // If not doing the place partition, then reset the team's proc bind
5216 // to indicate that partitioning of all threads still needs to take place
5217 if (do_place_partition == 0)
5218 team->t.t_proc_bind = proc_bind_default;
5219 // Has the number of threads changed?
5220 /* Let's assume the most common case is that the number of threads is
5221 unchanged, and put that case first. */
5222 if (team->t.t_nproc == new_nproc) { // Check changes in number of threads
5223 KA_TRACE(20, ("__kmp_allocate_team: reusing hot team\n"))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: reusing hot team\n"
); }
;
5224 // This case can mean that omp_set_num_threads() was called and the hot
5225 // team size was already reduced, so we check the special flag
5226 if (team->t.t_size_changed == -1) {
5227 team->t.t_size_changed = 1;
5228 } else {
5229 KMP_CHECK_UPDATE(team->t.t_size_changed, 0)if ((team->t.t_size_changed) != (0)) (team->t.t_size_changed
) = (0)
;
5230 }
5231
5232 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
5233 kmp_r_sched_t new_sched = new_icvs->sched;
5234 // set primary thread's schedule as new run-time schedule
5235 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched)if ((team->t.t_sched.sched) != (new_sched.sched)) (team->
t.t_sched.sched) = (new_sched.sched)
;
5236
5237 __kmp_reinitialize_team(team, new_icvs,
5238 root->r.r_uber_thread->th.th_ident);
5239
5240 KF_TRACE(10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n"
, 0, team->t.t_threads[0], team); }
5241 team->t.t_threads[0], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n"
, 0, team->t.t_threads[0], team); }
;
5242 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5243
5244#if KMP_AFFINITY_SUPPORTED1
5245 if ((team->t.t_size_changed == 0) &&
5246 (team->t.t_proc_bind == new_proc_bind)) {
5247 if (new_proc_bind == proc_bind_spread) {
5248 if (do_place_partition) {
5249 // add flag to update only master for spread
5250 __kmp_partition_places(team, 1);
5251 }
5252 }
5253 KA_TRACE(200, ("__kmp_allocate_team: reusing hot team #%d bindings: "if (kmp_a_debug >= 200) { __kmp_debug_printf ("__kmp_allocate_team: reusing hot team #%d bindings: "
"proc_bind = %d, partition = [%d,%d]\n", team->t.t_id, new_proc_bind
, team->t.t_first_place, team->t.t_last_place); }
5254 "proc_bind = %d, partition = [%d,%d]\n",if (kmp_a_debug >= 200) { __kmp_debug_printf ("__kmp_allocate_team: reusing hot team #%d bindings: "
"proc_bind = %d, partition = [%d,%d]\n", team->t.t_id, new_proc_bind
, team->t.t_first_place, team->t.t_last_place); }
5255 team->t.t_id, new_proc_bind, team->t.t_first_place,if (kmp_a_debug >= 200) { __kmp_debug_printf ("__kmp_allocate_team: reusing hot team #%d bindings: "
"proc_bind = %d, partition = [%d,%d]\n", team->t.t_id, new_proc_bind
, team->t.t_first_place, team->t.t_last_place); }
5256 team->t.t_last_place))if (kmp_a_debug >= 200) { __kmp_debug_printf ("__kmp_allocate_team: reusing hot team #%d bindings: "
"proc_bind = %d, partition = [%d,%d]\n", team->t.t_id, new_proc_bind
, team->t.t_first_place, team->t.t_last_place); }
;
5257 } else {
5258 if (do_place_partition) {
5259 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind)if ((team->t.t_proc_bind) != (new_proc_bind)) (team->t.
t_proc_bind) = (new_proc_bind)
;
5260 __kmp_partition_places(team);
5261 }
5262 }
5263#else
5264 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind)if ((team->t.t_proc_bind) != (new_proc_bind)) (team->t.
t_proc_bind) = (new_proc_bind)
;
5265#endif /* KMP_AFFINITY_SUPPORTED */
5266 } else if (team->t.t_nproc > new_nproc) {
5267 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: decreasing hot team thread count to %d\n"
, new_nproc); }
5268 ("__kmp_allocate_team: decreasing hot team thread count to %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: decreasing hot team thread count to %d\n"
, new_nproc); }
5269 new_nproc))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: decreasing hot team thread count to %d\n"
, new_nproc); }
;
5270
5271 team->t.t_size_changed = 1;
5272 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5273 // Barrier size already reduced earlier in this function
5274 // Activate team threads via th_used_in_team
5275 __kmp_add_threads_to_team(team, new_nproc);
5276 }
5277#if KMP_NESTED_HOT_TEAMS1
5278 if (__kmp_hot_teams_mode == 0) {
5279 // AC: saved number of threads should correspond to team's value in this
5280 // mode, can be bigger in mode 1, when hot team has threads in reserve
5281 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc)if (!(hot_teams[level].hot_team_nth == team->t.t_nproc)) {
__kmp_debug_assert("hot_teams[level].hot_team_nth == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5281); }
;
5282 hot_teams[level].hot_team_nth = new_nproc;
5283#endif // KMP_NESTED_HOT_TEAMS
5284 /* release the extra threads we don't need any more */
5285 for (f = new_nproc; f < team->t.t_nproc; f++) {
5286 KMP_DEBUG_ASSERT(team->t.t_threads[f])if (!(team->t.t_threads[f])) { __kmp_debug_assert("team->t.t_threads[f]"
, "openmp/runtime/src/kmp_runtime.cpp", 5286); }
;
5287 if (__kmp_tasking_mode != tskm_immediate_exec) {
5288 // When decreasing team size, threads no longer in the team should
5289 // unref task team.
5290 team->t.t_threads[f]->th.th_task_team = NULL__null;
5291 }
5292 __kmp_free_thread(team->t.t_threads[f]);
5293 team->t.t_threads[f] = NULL__null;
5294 }
5295#if KMP_NESTED_HOT_TEAMS1
5296 } // (__kmp_hot_teams_mode == 0)
5297 else {
5298 // When keeping extra threads in team, switch threads to wait on own
5299 // b_go flag
5300 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5301 KMP_DEBUG_ASSERT(team->t.t_threads[f])if (!(team->t.t_threads[f])) { __kmp_debug_assert("team->t.t_threads[f]"
, "openmp/runtime/src/kmp_runtime.cpp", 5301); }
;
5302 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5303 for (int b = 0; b < bs_last_barrier; ++b) {
5304 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG2) {
5305 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG3;
5306 }
5307 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0)if ((balign[b].bb.leaf_kids) != (0)) (balign[b].bb.leaf_kids)
= (0)
;
5308 }
5309 }
5310 }
5311#endif // KMP_NESTED_HOT_TEAMS
5312 team->t.t_nproc = new_nproc;
5313 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
5314 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched)if ((team->t.t_sched.sched) != (new_icvs->sched.sched))
(team->t.t_sched.sched) = (new_icvs->sched.sched)
;
5315 __kmp_reinitialize_team(team, new_icvs,
5316 root->r.r_uber_thread->th.th_ident);
5317
5318 // Update remaining threads
5319 for (f = 0; f < new_nproc; ++f) {
5320 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5321 }
5322
5323 // restore the current task state of the primary thread: should be the
5324 // implicit task
5325 KF_TRACE(10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n"
, 0, team->t.t_threads[0], team); }
5326 team->t.t_threads[0], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n"
, 0, team->t.t_threads[0], team); }
;
5327
5328 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5329
5330#ifdef KMP_DEBUG1
5331 for (f = 0; f < team->t.t_nproc; f++) {
5332 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&if (!(team->t.t_threads[f] && team->t.t_threads
[f]->th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("team->t.t_threads[f] && team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5334); }
5333 team->t.t_threads[f]->th.th_team_nproc ==if (!(team->t.t_threads[f] && team->t.t_threads
[f]->th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("team->t.t_threads[f] && team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5334); }
5334 team->t.t_nproc)if (!(team->t.t_threads[f] && team->t.t_threads
[f]->th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("team->t.t_threads[f] && team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5334); }
;
5335 }
5336#endif
5337
5338 if (do_place_partition) {
5339 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind)if ((team->t.t_proc_bind) != (new_proc_bind)) (team->t.
t_proc_bind) = (new_proc_bind)
;
5340#if KMP_AFFINITY_SUPPORTED1
5341 __kmp_partition_places(team);
5342#endif
5343 }
5344 } else { // team->t.t_nproc < new_nproc
5345#if (KMP_OS_LINUX1 || KMP_OS_FREEBSD0) && KMP_AFFINITY_SUPPORTED1
5346 kmp_affin_mask_t *old_mask;
5347 if (KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {
5348 KMP_CPU_ALLOC(old_mask)(old_mask = __kmp_affinity_dispatch->allocate_mask());
5349 }
5350#endif
5351
5352 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: increasing hot team thread count to %d\n"
, new_nproc); }
5353 ("__kmp_allocate_team: increasing hot team thread count to %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: increasing hot team thread count to %d\n"
, new_nproc); }
5354 new_nproc))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: increasing hot team thread count to %d\n"
, new_nproc); }
;
5355 int old_nproc = team->t.t_nproc; // save old value and use to update only
5356 team->t.t_size_changed = 1;
5357
5358#if KMP_NESTED_HOT_TEAMS1
5359 int avail_threads = hot_teams[level].hot_team_nth;
5360 if (new_nproc < avail_threads)
5361 avail_threads = new_nproc;
5362 kmp_info_t **other_threads = team->t.t_threads;
5363 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5364 // Adjust barrier data of reserved threads (if any) of the team
5365 // Other data will be set in __kmp_initialize_info() below.
5366 int b;
5367 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5368 for (b = 0; b < bs_last_barrier; ++b) {
5369 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5370 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG)if (!(balign[b].bb.wait_flag != 2)) { __kmp_debug_assert("balign[b].bb.wait_flag != 2"
, "openmp/runtime/src/kmp_runtime.cpp", 5370); }
;
5371#if USE_DEBUGGER0
5372 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5373#endif
5374 }
5375 }
5376 if (hot_teams[level].hot_team_nth >= new_nproc) {
5377 // we have all needed threads in reserve, no need to allocate any
5378 // this only possible in mode 1, cannot have reserved threads in mode 0
5379 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1)if (!(__kmp_hot_teams_mode == 1)) { __kmp_debug_assert("__kmp_hot_teams_mode == 1"
, "openmp/runtime/src/kmp_runtime.cpp", 5379); }
;
5380 team->t.t_nproc = new_nproc; // just get reserved threads involved
5381 } else {
5382 // We may have some threads in reserve, but not enough;
5383 // get reserved threads involved if any.
5384 team->t.t_nproc = hot_teams[level].hot_team_nth;
5385 hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size
5386#endif // KMP_NESTED_HOT_TEAMS
5387 if (team->t.t_max_nproc < new_nproc) {
5388 /* reallocate larger arrays */
5389 __kmp_reallocate_team_arrays(team, new_nproc);
5390 __kmp_reinitialize_team(team, new_icvs, NULL__null);
5391 }
5392
5393#if (KMP_OS_LINUX1 || KMP_OS_FREEBSD0) && KMP_AFFINITY_SUPPORTED1
5394 /* Temporarily set full mask for primary thread before creation of
5395 workers. The reason is that workers inherit the affinity from the
5396 primary thread, so if a lot of workers are created on the single
5397 core quickly, they don't get a chance to set their own affinity for
5398 a long time. */
5399 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
5400#endif
5401
5402 /* allocate new threads for the hot team */
5403 for (f = team->t.t_nproc; f < new_nproc; f++) {
5404 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5405 KMP_DEBUG_ASSERT(new_worker)if (!(new_worker)) { __kmp_debug_assert("new_worker", "openmp/runtime/src/kmp_runtime.cpp"
, 5405); }
;
5406 team->t.t_threads[f] = new_worker;
5407
5408 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init T#%d arrived: "
"join=%llu, plain=%llu\n", team->t.t_id, __kmp_gtid_from_tid
(f, team), team->t.t_id, f, team->t.t_bar[bs_forkjoin_barrier
].b_arrived, team->t.t_bar[bs_plain_barrier].b_arrived); }
5409 ("__kmp_allocate_team: team %d init T#%d arrived: "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init T#%d arrived: "
"join=%llu, plain=%llu\n", team->t.t_id, __kmp_gtid_from_tid
(f, team), team->t.t_id, f, team->t.t_bar[bs_forkjoin_barrier
].b_arrived, team->t.t_bar[bs_plain_barrier].b_arrived); }
5410 "join=%llu, plain=%llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init T#%d arrived: "
"join=%llu, plain=%llu\n", team->t.t_id, __kmp_gtid_from_tid
(f, team), team->t.t_id, f, team->t.t_bar[bs_forkjoin_barrier
].b_arrived, team->t.t_bar[bs_plain_barrier].b_arrived); }
5411 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init T#%d arrived: "
"join=%llu, plain=%llu\n", team->t.t_id, __kmp_gtid_from_tid
(f, team), team->t.t_id, f, team->t.t_bar[bs_forkjoin_barrier
].b_arrived, team->t.t_bar[bs_plain_barrier].b_arrived); }
5412 team->t.t_bar[bs_forkjoin_barrier].b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init T#%d arrived: "
"join=%llu, plain=%llu\n", team->t.t_id, __kmp_gtid_from_tid
(f, team), team->t.t_id, f, team->t.t_bar[bs_forkjoin_barrier
].b_arrived, team->t.t_bar[bs_plain_barrier].b_arrived); }
5413 team->t.t_bar[bs_plain_barrier].b_arrived))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init T#%d arrived: "
"join=%llu, plain=%llu\n", team->t.t_id, __kmp_gtid_from_tid
(f, team), team->t.t_id, f, team->t.t_bar[bs_forkjoin_barrier
].b_arrived, team->t.t_bar[bs_plain_barrier].b_arrived); }
;
5414
5415 { // Initialize barrier data for new threads.
5416 int b;
5417 kmp_balign_t *balign = new_worker->th.th_bar;
5418 for (b = 0; b < bs_last_barrier; ++b) {
5419 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5420 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=if (!(balign[b].bb.wait_flag != 2)) { __kmp_debug_assert("balign[b].bb.wait_flag != 2"
, "openmp/runtime/src/kmp_runtime.cpp", 5421); }
5421 KMP_BARRIER_PARENT_FLAG)if (!(balign[b].bb.wait_flag != 2)) { __kmp_debug_assert("balign[b].bb.wait_flag != 2"
, "openmp/runtime/src/kmp_runtime.cpp", 5421); }
;
5422#if USE_DEBUGGER0
5423 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5424#endif
5425 }
5426 }
5427 }
5428
5429#if (KMP_OS_LINUX1 || KMP_OS_FREEBSD0) && KMP_AFFINITY_SUPPORTED1
5430 if (KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {
5431 /* Restore initial primary thread's affinity mask */
5432 __kmp_set_system_affinity(old_mask, TRUE)(old_mask)->set_system_affinity((!0));
5433 KMP_CPU_FREE(old_mask)__kmp_affinity_dispatch->deallocate_mask(old_mask);
5434 }
5435#endif
5436#if KMP_NESTED_HOT_TEAMS1
5437 } // end of check of t_nproc vs. new_nproc vs. hot_team_nth
5438#endif // KMP_NESTED_HOT_TEAMS
5439 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5440 // Barrier size already increased earlier in this function
5441 // Activate team threads via th_used_in_team
5442 __kmp_add_threads_to_team(team, new_nproc);
5443 }
5444 /* make sure everyone is syncronized */
5445 // new threads below
5446 __kmp_initialize_team(team, new_nproc, new_icvs,
5447 root->r.r_uber_thread->th.th_ident);
5448
5449 /* reinitialize the threads */
5450 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc)if (!(team->t.t_nproc == new_nproc)) { __kmp_debug_assert(
"team->t.t_nproc == new_nproc", "openmp/runtime/src/kmp_runtime.cpp"
, 5450); }
;
5451 for (f = 0; f < team->t.t_nproc; ++f)
5452 __kmp_initialize_info(team->t.t_threads[f], team, f,
5453 __kmp_gtid_from_tid(f, team));
5454
5455 // set th_task_state for new threads in hot team with older thread's state
5456 kmp_uint8 old_state = team->t.t_threads[old_nproc - 1]->th.th_task_state;
5457 for (f = old_nproc; f < team->t.t_nproc; ++f)
5458 team->t.t_threads[f]->th.th_task_state = old_state;
5459
5460#ifdef KMP_DEBUG1
5461 for (f = 0; f < team->t.t_nproc; ++f) {
5462 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&if (!(team->t.t_threads[f] && team->t.t_threads
[f]->th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("team->t.t_threads[f] && team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5464); }
5463 team->t.t_threads[f]->th.th_team_nproc ==if (!(team->t.t_threads[f] && team->t.t_threads
[f]->th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("team->t.t_threads[f] && team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5464); }
5464 team->t.t_nproc)if (!(team->t.t_threads[f] && team->t.t_threads
[f]->th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("team->t.t_threads[f] && team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5464); }
;
5465 }
5466#endif
5467
5468 if (do_place_partition) {
5469 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind)if ((team->t.t_proc_bind) != (new_proc_bind)) (team->t.
t_proc_bind) = (new_proc_bind)
;
5470#if KMP_AFFINITY_SUPPORTED1
5471 __kmp_partition_places(team);
5472#endif
5473 }
5474 } // Check changes in number of threads
5475
5476 kmp_info_t *master = team->t.t_threads[0];
5477 if (master->th.th_teams_microtask) {
5478 for (f = 1; f < new_nproc; ++f) {
5479 // propagate teams construct specific info to workers
5480 kmp_info_t *thr = team->t.t_threads[f];
5481 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5482 thr->th.th_teams_level = master->th.th_teams_level;
5483 thr->th.th_teams_size = master->th.th_teams_size;
5484 }
5485 }
5486#if KMP_NESTED_HOT_TEAMS1
5487 if (level) {
5488 // Sync barrier state for nested hot teams, not needed for outermost hot
5489 // team.
5490 for (f = 1; f < new_nproc; ++f) {
5491 kmp_info_t *thr = team->t.t_threads[f];
5492 int b;
5493 kmp_balign_t *balign = thr->th.th_bar;
5494 for (b = 0; b < bs_last_barrier; ++b) {
5495 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5496 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG)if (!(balign[b].bb.wait_flag != 2)) { __kmp_debug_assert("balign[b].bb.wait_flag != 2"
, "openmp/runtime/src/kmp_runtime.cpp", 5496); }
;
5497#if USE_DEBUGGER0
5498 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5499#endif
5500 }
5501 }
5502 }
5503#endif // KMP_NESTED_HOT_TEAMS
5504
5505 /* reallocate space for arguments if necessary */
5506 __kmp_alloc_argv_entries(argc, team, TRUE(!0));
5507 KMP_CHECK_UPDATE(team->t.t_argc, argc)if ((team->t.t_argc) != (argc)) (team->t.t_argc) = (argc
)
;
5508 // The hot team re-uses the previous task team,
5509 // if untouched during the previous release->gather phase.
5510
5511 KF_TRACE(10, (" hot_team = %p\n", team))if (kmp_f_debug >= 10) { __kmp_debug_printf (" hot_team = %p\n"
, team); }
;
5512
5513#if KMP_DEBUG1
5514 if (__kmp_tasking_mode != tskm_immediate_exec) {
5515 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: hot team task_team[0] = %p "
"task_team[1] = %p after reinit\n", team->t.t_task_team[0
], team->t.t_task_team[1]); }
5516 "task_team[1] = %p after reinit\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: hot team task_team[0] = %p "
"task_team[1] = %p after reinit\n", team->t.t_task_team[0
], team->t.t_task_team[1]); }
5517 team->t.t_task_team[0], team->t.t_task_team[1]))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: hot team task_team[0] = %p "
"task_team[1] = %p after reinit\n", team->t.t_task_team[0
], team->t.t_task_team[1]); }
;
5518 }
5519#endif
5520
5521#if OMPT_SUPPORT1
5522 __ompt_team_assign_id(team, ompt_parallel_data);
5523#endif
5524
5525 KMP_MB();
5526
5527 return team;
5528 }
5529
5530 /* next, let's try to take one from the team pool */
5531 KMP_MB();
5532 for (team = CCAST(kmp_team_t *, __kmp_team_pool)const_cast<kmp_team_t *>(__kmp_team_pool); (team);) {
5533 /* TODO: consider resizing undersized teams instead of reaping them, now
5534 that we have a resizing mechanism */
5535 if (team->t.t_max_nproc >= max_nproc) {
5536 /* take this team from the team pool */
5537 __kmp_team_pool = team->t.t_next_pool;
5538
5539 if (max_nproc > 1 &&
5540 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5541 if (!team->t.b) { // Allocate barrier structure
5542 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5543 }
5544 }
5545
5546 /* setup the team for fresh use */
5547 __kmp_initialize_team(team, new_nproc, new_icvs, NULL__null);
5548
5549 KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: setting task_team[0] %p and "
"task_team[1] %p to NULL\n", &team->t.t_task_team[0],
&team->t.t_task_team[1]); }
5550 "task_team[1] %p to NULL\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: setting task_team[0] %p and "
"task_team[1] %p to NULL\n", &team->t.t_task_team[0],
&team->t.t_task_team[1]); }
5551 &team->t.t_task_team[0], &team->t.t_task_team[1]))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: setting task_team[0] %p and "
"task_team[1] %p to NULL\n", &team->t.t_task_team[0],
&team->t.t_task_team[1]); }
;
5552 team->t.t_task_team[0] = NULL__null;
5553 team->t.t_task_team[1] = NULL__null;
5554
5555 /* reallocate space for arguments if necessary */
5556 __kmp_alloc_argv_entries(argc, team, TRUE(!0));
5557 KMP_CHECK_UPDATE(team->t.t_argc, argc)if ((team->t.t_argc) != (argc)) (team->t.t_argc) = (argc
)
;
5558
5559 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n"
, team->t.t_id, 0, 0); }
5560 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n"
, team->t.t_id, 0, 0); }
5561 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n"
, team->t.t_id, 0, 0); }
;
5562 { // Initialize barrier data.
5563 int b;
5564 for (b = 0; b < bs_last_barrier; ++b) {
5565 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE0;
5566#if USE_DEBUGGER0
5567 team->t.t_bar[b].b_master_arrived = 0;
5568 team->t.t_bar[b].b_team_arrived = 0;
5569#endif
5570 }
5571 }
5572
5573 team->t.t_proc_bind = new_proc_bind;
5574
5575 KA_TRACE(20, ("__kmp_allocate_team: using team from pool %d.\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: using team from pool %d.\n"
, team->t.t_id); }
5576 team->t.t_id))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: using team from pool %d.\n"
, team->t.t_id); }
;
5577
5578#if OMPT_SUPPORT1
5579 __ompt_team_assign_id(team, ompt_parallel_data);
5580#endif
5581
5582 KMP_MB();
5583
5584 return team;
5585 }
5586
5587 /* reap team if it is too small, then loop back and check the next one */
5588 // not sure if this is wise, but, will be redone during the hot-teams
5589 // rewrite.
5590 /* TODO: Use technique to find the right size hot-team, don't reap them */
5591 team = __kmp_reap_team(team);
5592 __kmp_team_pool = team;
5593 }
5594
5595 /* nothing available in the pool, no matter, make a new team! */
5596 KMP_MB();
5597 team = (kmp_team_t *)__kmp_allocate(sizeof(kmp_team_t))___kmp_allocate((sizeof(kmp_team_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 5597)
;
5598
5599 /* and set it up */
5600 team->t.t_max_nproc = max_nproc;
5601 if (max_nproc > 1 &&
5602 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5603 // Allocate barrier structure
5604 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5605 }
5606
5607 /* NOTE well, for some reason allocating one big buffer and dividing it up
5608 seems to really hurt performance a lot on the P4, so, let's not use this */
5609 __kmp_allocate_team_arrays(team, max_nproc);
5610
5611 KA_TRACE(20, ("__kmp_allocate_team: making a new team\n"))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: making a new team\n"
); }
;
5612 __kmp_initialize_team(team, new_nproc, new_icvs, NULL__null);
5613
5614 KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
"%p to NULL\n", &team->t.t_task_team[0], &team->
t.t_task_team[1]); }
5615 "%p to NULL\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
"%p to NULL\n", &team->t.t_task_team[0], &team->
t.t_task_team[1]); }
5616 &team->t.t_task_team[0], &team->t.t_task_team[1]))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
"%p to NULL\n", &team->t.t_task_team[0], &team->
t.t_task_team[1]); }
;
5617 team->t.t_task_team[0] = NULL__null; // to be removed, as __kmp_allocate zeroes
5618 // memory, no need to duplicate
5619 team->t.t_task_team[1] = NULL__null; // to be removed, as __kmp_allocate zeroes
5620 // memory, no need to duplicate
5621
5622 if (__kmp_storage_map) {
5623 __kmp_print_team_storage_map("team", team, team->t.t_id, new_nproc);
5624 }
5625
5626 /* allocate space for arguments */
5627 __kmp_alloc_argv_entries(argc, team, FALSE0);
5628 team->t.t_argc = argc;
5629
5630 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n"
, team->t.t_id, 0, 0); }
5631 ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n"
, team->t.t_id, 0, 0); }
5632 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n"
, team->t.t_id, 0, 0); }
;
5633 { // Initialize barrier data.
5634 int b;
5635 for (b = 0; b < bs_last_barrier; ++b) {
5636 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE0;
5637#if USE_DEBUGGER0
5638 team->t.t_bar[b].b_master_arrived = 0;
5639 team->t.t_bar[b].b_team_arrived = 0;
5640#endif
5641 }
5642 }
5643
5644 team->t.t_proc_bind = new_proc_bind;
5645
5646#if OMPT_SUPPORT1
5647 __ompt_team_assign_id(team, ompt_parallel_data);
5648 team->t.ompt_serialized_team_info = NULL__null;
5649#endif
5650
5651 KMP_MB();
5652
5653 KA_TRACE(20, ("__kmp_allocate_team: done creating a new team %d.\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: done creating a new team %d.\n"
, team->t.t_id); }
5654 team->t.t_id))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: done creating a new team %d.\n"
, team->t.t_id); }
;
5655
5656 return team;
5657}
5658
5659/* TODO implement hot-teams at all levels */
5660/* TODO implement lazy thread release on demand (disband request) */
5661
5662/* free the team. return it to the team pool. release all the threads
5663 * associated with it */
5664void __kmp_free_team(kmp_root_t *root,
5665 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master), kmp_info_t *master) {
5666 int f;
5667 KA_TRACE(20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_team: T#%d freeing team %d\n"
, __kmp_get_global_thread_id(), team->t.t_id); }
5668 team->t.t_id))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_team: T#%d freeing team %d\n"
, __kmp_get_global_thread_id(), team->t.t_id); }
;
5669
5670 /* verify state */
5671 KMP_DEBUG_ASSERT(root)if (!(root)) { __kmp_debug_assert("root", "openmp/runtime/src/kmp_runtime.cpp"
, 5671); }
;
5672 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 5672); }
;
5673 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc)if (!(team->t.t_nproc <= team->t.t_max_nproc)) { __kmp_debug_assert
("team->t.t_nproc <= team->t.t_max_nproc", "openmp/runtime/src/kmp_runtime.cpp"
, 5673); }
;
5674 KMP_DEBUG_ASSERT(team->t.t_threads)if (!(team->t.t_threads)) { __kmp_debug_assert("team->t.t_threads"
, "openmp/runtime/src/kmp_runtime.cpp", 5674); }
;
5675
5676 int use_hot_team = team == root->r.r_hot_team;
5677#if KMP_NESTED_HOT_TEAMS1
5678 int level;
5679 if (master) {
5680 level = team->t.t_active_level - 1;
5681 if (master->th.th_teams_microtask) { // in teams construct?
5682 if (master->th.th_teams_size.nteams > 1) {
5683 ++level; // level was not increased in teams construct for
5684 // team_of_masters
5685 }
5686 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5687 master->th.th_teams_level == team->t.t_level) {
5688 ++level; // level was not increased in teams construct for
5689 // team_of_workers before the parallel
5690 } // team->t.t_level will be increased inside parallel
5691 }
5692#if KMP_DEBUG1
5693 kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;
5694#endif
5695 if (level < __kmp_hot_teams_max_level) {
5696 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team)if (!(team == hot_teams[level].hot_team)) { __kmp_debug_assert
("team == hot_teams[level].hot_team", "openmp/runtime/src/kmp_runtime.cpp"
, 5696); }
;
5697 use_hot_team = 1;
5698 }
5699 }
5700#endif // KMP_NESTED_HOT_TEAMS
5701
5702 /* team is done working */
5703 TCW_SYNC_PTR(team->t.t_pkfn,((team->t.t_pkfn)) = ((__null))
5704 NULL)((team->t.t_pkfn)) = ((__null)); // Important for Debugging Support Library.
5705#if KMP_OS_WINDOWS0
5706 team->t.t_copyin_counter = 0; // init counter for possible reuse
5707#endif
5708 // Do not reset pointer to parent team to NULL for hot teams.
5709
5710 /* if we are non-hot team, release our threads */
5711 if (!use_hot_team) {
5712 if (__kmp_tasking_mode != tskm_immediate_exec) {
5713 // Wait for threads to reach reapable state
5714 for (f = 1; f < team->t.t_nproc; ++f) {
5715 KMP_DEBUG_ASSERT(team->t.t_threads[f])if (!(team->t.t_threads[f])) { __kmp_debug_assert("team->t.t_threads[f]"
, "openmp/runtime/src/kmp_runtime.cpp", 5715); }
;
5716 kmp_info_t *th = team->t.t_threads[f];
5717 volatile kmp_uint32 *state = &th->th.th_reap_state;
5718 while (*state != KMP_SAFE_TO_REAP1) {
5719#if KMP_OS_WINDOWS0
5720 // On Windows a thread can be killed at any time, check this
5721 DWORD ecode;
5722 if (!__kmp_is_thread_alive(th, &ecode)) {
5723 *state = KMP_SAFE_TO_REAP1; // reset the flag for dead thread
5724 break;
5725 }
5726#endif
5727 // first check if thread is sleeping
5728 kmp_flag_64<> fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5729 if (fl.is_sleeping())
5730 fl.resume(__kmp_gtid_from_thread(th));
5731 KMP_CPU_PAUSE()__kmp_x86_pause();
5732 }
5733 }
5734
5735 // Delete task teams
5736 int tt_idx;
5737 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5738 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5739 if (task_team != NULL__null) {
5740 for (f = 0; f < team->t.t_nproc; ++f) { // threads unref task teams
5741 KMP_DEBUG_ASSERT(team->t.t_threads[f])if (!(team->t.t_threads[f])) { __kmp_debug_assert("team->t.t_threads[f]"
, "openmp/runtime/src/kmp_runtime.cpp", 5741); }
;
5742 team->t.t_threads[f]->th.th_task_team = NULL__null;
5743 }
5744 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n"
, __kmp_get_global_thread_id(), task_team, team->t.t_id); }
5745 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n"
, __kmp_get_global_thread_id(), task_team, team->t.t_id); }
5746 ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n"
, __kmp_get_global_thread_id(), task_team, team->t.t_id); }
5747 __kmp_get_gtid(), task_team, team->t.t_id))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n"
, __kmp_get_global_thread_id(), task_team, team->t.t_id); }
;
5748#if KMP_NESTED_HOT_TEAMS1
5749 __kmp_free_task_team(master, task_team);
5750#endif
5751 team->t.t_task_team[tt_idx] = NULL__null;
5752 }
5753 }
5754 }
5755
5756 // Reset pointer to parent team only for non-hot teams.
5757 team->t.t_parent = NULL__null;
5758 team->t.t_level = 0;
5759 team->t.t_active_level = 0;
5760
5761 /* free the worker threads */
5762 for (f = 1; f < team->t.t_nproc; ++f) {
5763 KMP_DEBUG_ASSERT(team->t.t_threads[f])if (!(team->t.t_threads[f])) { __kmp_debug_assert("team->t.t_threads[f]"
, "openmp/runtime/src/kmp_runtime.cpp", 5763); }
;
5764 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5765 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team),__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(team
->t.t_threads[f]->th.th_used_in_team)), (kmp_uint32)(1)
, (kmp_uint32)(2))
5766 1, 2)__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(team
->t.t_threads[f]->th.th_used_in_team)), (kmp_uint32)(1)
, (kmp_uint32)(2))
;
5767 }
5768 __kmp_free_thread(team->t.t_threads[f]);
5769 }
5770
5771 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5772 if (team->t.b) {
5773 // wake up thread at old location
5774 team->t.b->go_release();
5775 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) {
5776 for (f = 1; f < team->t.t_nproc; ++f) {
5777 if (team->t.b->sleep[f].sleep) {
5778 __kmp_atomic_resume_64(
5779 team->t.t_threads[f]->th.th_info.ds.ds_gtid,
5780 (kmp_atomic_flag_64<> *)NULL__null);
5781 }
5782 }
5783 }
5784 // Wait for threads to be removed from team
5785 for (int f = 1; f < team->t.t_nproc; ++f) {
5786 while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
5787 KMP_CPU_PAUSE()__kmp_x86_pause();
5788 }
5789 }
5790 }
5791
5792 for (f = 1; f < team->t.t_nproc; ++f) {
5793 team->t.t_threads[f] = NULL__null;
5794 }
5795
5796 if (team->t.t_max_nproc > 1 &&
5797 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5798 distributedBarrier::deallocate(team->t.b);
5799 team->t.b = NULL__null;
5800 }
5801 /* put the team back in the team pool */
5802 /* TODO limit size of team pool, call reap_team if pool too large */
5803 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool)const_cast<kmp_team_t *>(__kmp_team_pool);
5804 __kmp_team_pool = (volatile kmp_team_t *)team;
5805 } else { // Check if team was created for primary threads in teams construct
5806 // See if first worker is a CG root
5807 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&if (!(team->t.t_threads[1] && team->t.t_threads
[1]->th.th_cg_roots)) { __kmp_debug_assert("team->t.t_threads[1] && team->t.t_threads[1]->th.th_cg_roots"
, "openmp/runtime/src/kmp_runtime.cpp", 5808); }
5808 team->t.t_threads[1]->th.th_cg_roots)if (!(team->t.t_threads[1] && team->t.t_threads
[1]->th.th_cg_roots)) { __kmp_debug_assert("team->t.t_threads[1] && team->t.t_threads[1]->th.th_cg_roots"
, "openmp/runtime/src/kmp_runtime.cpp", 5808); }
;
5809 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5810 // Clean up the CG root nodes on workers so that this team can be re-used
5811 for (f = 1; f < team->t.t_nproc; ++f) {
5812 kmp_info_t *thr = team->t.t_threads[f];
5813 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&if (!(thr && thr->th.th_cg_roots && thr->
th.th_cg_roots->cg_root == thr)) { __kmp_debug_assert("thr && thr->th.th_cg_roots && thr->th.th_cg_roots->cg_root == thr"
, "openmp/runtime/src/kmp_runtime.cpp", 5814); }
5814 thr->th.th_cg_roots->cg_root == thr)if (!(thr && thr->th.th_cg_roots && thr->
th.th_cg_roots->cg_root == thr)) { __kmp_debug_assert("thr && thr->th.th_cg_roots && thr->th.th_cg_roots->cg_root == thr"
, "openmp/runtime/src/kmp_runtime.cpp", 5814); }
;
5815 // Pop current CG root off list
5816 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5817 thr->th.th_cg_roots = tmp->up;
5818 KA_TRACE(100, ("__kmp_free_team: Thread %p popping node %p and moving"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_team: Thread %p popping node %p and moving"
" up to node %p. cg_nthreads was %d\n", thr, tmp, thr->th
.th_cg_roots, tmp->cg_nthreads); }
5819 " up to node %p. cg_nthreads was %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_team: Thread %p popping node %p and moving"
" up to node %p. cg_nthreads was %d\n", thr, tmp, thr->th
.th_cg_roots, tmp->cg_nthreads); }
5820 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_team: Thread %p popping node %p and moving"
" up to node %p. cg_nthreads was %d\n", thr, tmp, thr->th
.th_cg_roots, tmp->cg_nthreads); }
;
5821 int i = tmp->cg_nthreads--;
5822 if (i == 1) {
5823 __kmp_free(tmp)___kmp_free((tmp), "openmp/runtime/src/kmp_runtime.cpp", 5823
)
; // free CG if we are the last thread in it
5824 }
5825 // Restore current task's thread_limit from CG root
5826 if (thr->th.th_cg_roots)
5827 thr->th.th_current_task->td_icvs.thread_limit =
5828 thr->th.th_cg_roots->cg_thread_limit;
5829 }
5830 }
5831 }
5832
5833 KMP_MB();
5834}
5835
5836/* reap the team. destroy it, reclaim all its resources and free its memory */
5837kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5838 kmp_team_t *next_pool = team->t.t_next_pool;
5839
5840 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 5840); }
;
5841 KMP_DEBUG_ASSERT(team->t.t_dispatch)if (!(team->t.t_dispatch)) { __kmp_debug_assert("team->t.t_dispatch"
, "openmp/runtime/src/kmp_runtime.cpp", 5841); }
;
5842 KMP_DEBUG_ASSERT(team->t.t_disp_buffer)if (!(team->t.t_disp_buffer)) { __kmp_debug_assert("team->t.t_disp_buffer"
, "openmp/runtime/src/kmp_runtime.cpp", 5842); }
;
5843 KMP_DEBUG_ASSERT(team->t.t_threads)if (!(team->t.t_threads)) { __kmp_debug_assert("team->t.t_threads"
, "openmp/runtime/src/kmp_runtime.cpp", 5843); }
;
5844 KMP_DEBUG_ASSERT(team->t.t_argv)if (!(team->t.t_argv)) { __kmp_debug_assert("team->t.t_argv"
, "openmp/runtime/src/kmp_runtime.cpp", 5844); }
;
5845
5846 /* TODO clean the threads that are a part of this? */
5847
5848 /* free stuff */
5849 __kmp_free_team_arrays(team);
5850 if (team->t.t_argv != &team->t.t_inline_argv[0])
5851 __kmp_free((void *)team->t.t_argv)___kmp_free(((void *)team->t.t_argv), "openmp/runtime/src/kmp_runtime.cpp"
, 5851)
;
5852 __kmp_free(team)___kmp_free((team), "openmp/runtime/src/kmp_runtime.cpp", 5852
)
;
5853
5854 KMP_MB();
5855 return next_pool;
5856}
5857
5858// Free the thread. Don't reap it, just place it on the pool of available
5859// threads.
5860//
5861// Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid
5862// binding for the affinity mechanism to be useful.
5863//
5864// Now, we always keep the free list (__kmp_thread_pool) sorted by gtid.
5865// However, we want to avoid a potential performance problem by always
5866// scanning through the list to find the correct point at which to insert
5867// the thread (potential N**2 behavior). To do this we keep track of the
5868// last place a thread struct was inserted (__kmp_thread_pool_insert_pt).
5869// With single-level parallelism, threads will always be added to the tail
5870// of the list, kept track of by __kmp_thread_pool_insert_pt. With nested
5871// parallelism, all bets are off and we may need to scan through the entire
5872// free list.
5873//
5874// This change also has a potentially large performance benefit, for some
5875// applications. Previously, as threads were freed from the hot team, they
5876// would be placed back on the free list in inverse order. If the hot team
5877// grew back to it's original size, then the freed thread would be placed
5878// back on the hot team in reverse order. This could cause bad cache
5879// locality problems on programs where the size of the hot team regularly
5880// grew and shrunk.
5881//
5882// Now, for single-level parallelism, the OMP tid is always == gtid.
5883void __kmp_free_thread(kmp_info_t *this_th) {
5884 int gtid;
5885 kmp_info_t **scan;
5886
5887 KA_TRACE(20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n"
, __kmp_get_global_thread_id(), this_th->th.th_info.ds.ds_gtid
); }
5888 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n"
, __kmp_get_global_thread_id(), this_th->th.th_info.ds.ds_gtid
); }
;
5889
5890 KMP_DEBUG_ASSERT(this_th)if (!(this_th)) { __kmp_debug_assert("this_th", "openmp/runtime/src/kmp_runtime.cpp"
, 5890); }
;
5891
5892 // When moving thread to pool, switch thread to wait on own b_go flag, and
5893 // uninitialized (NULL team).
5894 int b;
5895 kmp_balign_t *balign = this_th->th.th_bar;
5896 for (b = 0; b < bs_last_barrier; ++b) {
5897 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG2)
5898 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG3;
5899 balign[b].bb.team = NULL__null;
5900 balign[b].bb.leaf_kids = 0;
5901 }
5902 this_th->th.th_task_state = 0;
5903 this_th->th.th_reap_state = KMP_SAFE_TO_REAP1;
5904
5905 /* put thread back on the free pool */
5906 TCW_PTR(this_th->th.th_team, NULL)((this_th->th.th_team)) = ((__null));
5907 TCW_PTR(this_th->th.th_root, NULL)((this_th->th.th_root)) = ((__null));
5908 TCW_PTR(this_th->th.th_dispatch, NULL)((this_th->th.th_dispatch)) = ((__null)); /* NOT NEEDED */
5909
5910 while (this_th->th.th_cg_roots) {
5911 this_th->th.th_cg_roots->cg_nthreads--;
5912 KA_TRACE(100, ("__kmp_free_thread: Thread %p decrement cg_nthreads on node"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_thread: Thread %p decrement cg_nthreads on node"
" %p of thread %p to %d\n", this_th, this_th->th.th_cg_roots
, this_th->th.th_cg_roots->cg_root, this_th->th.th_cg_roots
->cg_nthreads); }
5913 " %p of thread %p to %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_thread: Thread %p decrement cg_nthreads on node"
" %p of thread %p to %d\n", this_th, this_th->th.th_cg_roots
, this_th->th.th_cg_roots->cg_root, this_th->th.th_cg_roots
->cg_nthreads); }
5914 this_th, this_th->th.th_cg_roots,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_thread: Thread %p decrement cg_nthreads on node"
" %p of thread %p to %d\n", this_th, this_th->th.th_cg_roots
, this_th->th.th_cg_roots->cg_root, this_th->th.th_cg_roots
->cg_nthreads); }
5915 this_th->th.th_cg_roots->cg_root,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_thread: Thread %p decrement cg_nthreads on node"
" %p of thread %p to %d\n", this_th, this_th->th.th_cg_roots
, this_th->th.th_cg_roots->cg_root, this_th->th.th_cg_roots
->cg_nthreads); }
5916 this_th->th.th_cg_roots->cg_nthreads))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_thread: Thread %p decrement cg_nthreads on node"
" %p of thread %p to %d\n", this_th, this_th->th.th_cg_roots
, this_th->th.th_cg_roots->cg_root, this_th->th.th_cg_roots
->cg_nthreads); }
;
5917 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5918 if (tmp->cg_root == this_th) { // Thread is a cg_root
5919 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0)if (!(tmp->cg_nthreads == 0)) { __kmp_debug_assert("tmp->cg_nthreads == 0"
, "openmp/runtime/src/kmp_runtime.cpp", 5919); }
;
5920 KA_TRACE(if (kmp_a_debug >= 5) { __kmp_debug_printf ("__kmp_free_thread: Thread %p freeing node %p\n"
, this_th, tmp); }
5921 5, ("__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp))if (kmp_a_debug >= 5) { __kmp_debug_printf ("__kmp_free_thread: Thread %p freeing node %p\n"
, this_th, tmp); }
;
5922 this_th->th.th_cg_roots = tmp->up;
5923 __kmp_free(tmp)___kmp_free((tmp), "openmp/runtime/src/kmp_runtime.cpp", 5923
)
;
5924 } else { // Worker thread
5925 if (tmp->cg_nthreads == 0) { // last thread leaves contention group
5926 __kmp_free(tmp)___kmp_free((tmp), "openmp/runtime/src/kmp_runtime.cpp", 5926
)
;
5927 }
5928 this_th->th.th_cg_roots = NULL__null;
5929 break;
5930 }
5931 }
5932
5933 /* If the implicit task assigned to this thread can be used by other threads
5934 * -> multiple threads can share the data and try to free the task at
5935 * __kmp_reap_thread at exit. This duplicate use of the task data can happen
5936 * with higher probability when hot team is disabled but can occurs even when
5937 * the hot team is enabled */
5938 __kmp_free_implicit_task(this_th);
5939 this_th->th.th_current_task = NULL__null;
5940
5941 // If the __kmp_thread_pool_insert_pt is already past the new insert
5942 // point, then we need to re-scan the entire list.
5943 gtid = this_th->th.th_info.ds.ds_gtid;
5944 if (__kmp_thread_pool_insert_pt != NULL__null) {
5945 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL)if (!(__kmp_thread_pool != __null)) { __kmp_debug_assert("__kmp_thread_pool != __null"
, "openmp/runtime/src/kmp_runtime.cpp", 5945); }
;
5946 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5947 __kmp_thread_pool_insert_pt = NULL__null;
5948 }
5949 }
5950
5951 // Scan down the list to find the place to insert the thread.
5952 // scan is the address of a link in the list, possibly the address of
5953 // __kmp_thread_pool itself.
5954 //
5955 // In the absence of nested parallelism, the for loop will have 0 iterations.
5956 if (__kmp_thread_pool_insert_pt != NULL__null) {
5957 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5958 } else {
5959 scan = CCAST(kmp_info_t **, &__kmp_thread_pool)const_cast<kmp_info_t **>(&__kmp_thread_pool);
5960 }
5961 for (; (*scan != NULL__null) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5962 scan = &((*scan)->th.th_next_pool))
5963 ;
5964
5965 // Insert the new element on the list, and set __kmp_thread_pool_insert_pt
5966 // to its address.
5967 TCW_PTR(this_th->th.th_next_pool, *scan)((this_th->th.th_next_pool)) = ((*scan));
5968 __kmp_thread_pool_insert_pt = *scan = this_th;
5969 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||if (!((this_th->th.th_next_pool == __null) || (this_th->
th.th_info.ds.ds_gtid < this_th->th.th_next_pool->th
.th_info.ds.ds_gtid))) { __kmp_debug_assert("(this_th->th.th_next_pool == __null) || (this_th->th.th_info.ds.ds_gtid < this_th->th.th_next_pool->th.th_info.ds.ds_gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 5971); }
5970 (this_th->th.th_info.ds.ds_gtid <if (!((this_th->th.th_next_pool == __null) || (this_th->
th.th_info.ds.ds_gtid < this_th->th.th_next_pool->th
.th_info.ds.ds_gtid))) { __kmp_debug_assert("(this_th->th.th_next_pool == __null) || (this_th->th.th_info.ds.ds_gtid < this_th->th.th_next_pool->th.th_info.ds.ds_gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 5971); }
5971 this_th->th.th_next_pool->th.th_info.ds.ds_gtid))if (!((this_th->th.th_next_pool == __null) || (this_th->
th.th_info.ds.ds_gtid < this_th->th.th_next_pool->th
.th_info.ds.ds_gtid))) { __kmp_debug_assert("(this_th->th.th_next_pool == __null) || (this_th->th.th_info.ds.ds_gtid < this_th->th.th_next_pool->th.th_info.ds.ds_gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 5971); }
;
5972 TCW_4(this_th->th.th_in_pool, TRUE)(this_th->th.th_in_pool) = ((!0));
5973 __kmp_suspend_initialize_thread(this_th);
5974 __kmp_lock_suspend_mx(this_th);
5975 if (this_th->th.th_active == TRUE(!0)) {
5976 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth)(&__kmp_thread_pool_active_nth)->fetch_add(1, std::memory_order_acq_rel
)
;
5977 this_th->th.th_active_in_pool = TRUE(!0);
5978 }
5979#if KMP_DEBUG1
5980 else {
5981 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE)if (!(this_th->th.th_active_in_pool == 0)) { __kmp_debug_assert
("this_th->th.th_active_in_pool == 0", "openmp/runtime/src/kmp_runtime.cpp"
, 5981); }
;
5982 }
5983#endif
5984 __kmp_unlock_suspend_mx(this_th);
5985
5986 TCW_4(__kmp_nth, __kmp_nth - 1)(__kmp_nth) = (__kmp_nth - 1);
5987
5988#ifdef KMP_ADJUST_BLOCKTIME1
5989 /* Adjust blocktime back to user setting or default if necessary */
5990 /* Middle initialization might never have occurred */
5991 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5992 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0)if (!(__kmp_avail_proc > 0)) { __kmp_debug_assert("__kmp_avail_proc > 0"
, "openmp/runtime/src/kmp_runtime.cpp", 5992); }
;
5993 if (__kmp_nth <= __kmp_avail_proc) {
5994 __kmp_zero_bt = FALSE0;
5995 }
5996 }
5997#endif /* KMP_ADJUST_BLOCKTIME */
5998
5999 KMP_MB();
6000}
6001
6002/* ------------------------------------------------------------------------ */
6003
6004void *__kmp_launch_thread(kmp_info_t *this_thr) {
6005#if OMP_PROFILING_SUPPORT0
6006 ProfileTraceFile = getenv("LIBOMPTARGET_PROFILE");
6007 // TODO: add a configuration option for time granularity
6008 if (ProfileTraceFile)
6009 llvm::timeTraceProfilerInitialize(500 /* us */, "libomptarget");
6010#endif
6011
6012 int gtid = this_thr->th.th_info.ds.ds_gtid;
6013 /* void *stack_data;*/
6014 kmp_team_t **volatile pteam;
6015
6016 KMP_MB();
6017 KA_TRACE(10, ("__kmp_launch_thread: T#%d start\n", gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_launch_thread: T#%d start\n"
, gtid); }
;
6018
6019 if (__kmp_env_consistency_check) {
6020 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid); // ATT: Memory leak?
6021 }
6022
6023#if OMPD_SUPPORT1
6024 if (ompd_state & OMPD_ENABLE_BP0x1)
6025 ompd_bp_thread_begin();
6026#endif
6027
6028#if OMPT_SUPPORT1
6029 ompt_data_t *thread_data = nullptr;
6030 if (ompt_enabled.enabled) {
6031 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
6032 *thread_data = ompt_data_none{0};
6033
6034 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6035 this_thr->th.ompt_thread_info.wait_id = 0;
6036 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
6037 this_thr->th.ompt_thread_info.parallel_flags = 0;
6038 if (ompt_enabled.ompt_callback_thread_begin) {
6039 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)ompt_callback_thread_begin_callback(
6040 ompt_thread_worker, thread_data);
6041 }
6042 this_thr->th.ompt_thread_info.state = ompt_state_idle;
6043 }
6044#endif
6045
6046 /* This is the place where threads wait for work */
6047 while (!TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) {
6048 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid])if (!(this_thr == __kmp_threads[gtid])) { __kmp_debug_assert(
"this_thr == __kmp_threads[gtid]", "openmp/runtime/src/kmp_runtime.cpp"
, 6048); }
;
6049 KMP_MB();
6050
6051 /* wait for work to do */
6052 KA_TRACE(20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_launch_thread: T#%d waiting for work\n"
, gtid); }
;
6053
6054 /* No tid yet since not part of a team */
6055 __kmp_fork_barrier(gtid, KMP_GTID_DNE(-2));
6056
6057#if OMPT_SUPPORT1
6058 if (ompt_enabled.enabled) {
6059 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6060 }
6061#endif
6062
6063 pteam = &this_thr->th.th_team;
6064
6065 /* have we been allocated? */
6066 if (TCR_SYNC_PTR(*pteam)((void *)(*pteam)) && !TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) {
6067 /* we were just woken up, so run our new task */
6068 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn)((void *)((*pteam)->t.t_pkfn)) != NULL__null) {
6069 int rc;
6070 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n"
, gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam
)->t.t_pkfn); }
6071 ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n"
, gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam
)->t.t_pkfn); }
6072 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n"
, gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam
)->t.t_pkfn); }
6073 (*pteam)->t.t_pkfn))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n"
, gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam
)->t.t_pkfn); }
;
6074
6075 updateHWFPControl(*pteam);
6076
6077#if OMPT_SUPPORT1
6078 if (ompt_enabled.enabled) {
6079 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
6080 }
6081#endif
6082
6083 rc = (*pteam)->t.t_invoke(gtid);
6084 KMP_ASSERT(rc)if (!(rc)) { __kmp_debug_assert("rc", "openmp/runtime/src/kmp_runtime.cpp"
, 6084); }
;
6085
6086 KMP_MB();
6087 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n"
, gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam
)->t.t_pkfn); }
6088 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n"
, gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam
)->t.t_pkfn); }
6089 (*pteam)->t.t_pkfn))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n"
, gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam
)->t.t_pkfn); }
;
6090 }
6091#if OMPT_SUPPORT1
6092 if (ompt_enabled.enabled) {
6093 /* no frame set while outside task */
6094 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none{0};
6095
6096 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6097 }
6098#endif
6099 /* join barrier after parallel region */
6100 __kmp_join_barrier(gtid);
6101 }
6102 }
6103 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done)((void *)((intptr_t)__kmp_global.g.g_done));
6104
6105#if OMPD_SUPPORT1
6106 if (ompd_state & OMPD_ENABLE_BP0x1)
6107 ompd_bp_thread_end();
6108#endif
6109
6110#if OMPT_SUPPORT1
6111 if (ompt_enabled.ompt_callback_thread_end) {
6112 ompt_callbacks.ompt_callback(ompt_callback_thread_end)ompt_callback_thread_end_callback(thread_data);
6113 }
6114#endif
6115
6116 this_thr->th.th_task_team = NULL__null;
6117 /* run the destructors for the threadprivate data for this thread */
6118 __kmp_common_destroy_gtid(gtid);
6119
6120 KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_launch_thread: T#%d done\n"
, gtid); }
;
6121 KMP_MB();
6122
6123#if OMP_PROFILING_SUPPORT0
6124 llvm::timeTraceProfilerFinishThread();
6125#endif
6126 return this_thr;
6127}
6128
6129/* ------------------------------------------------------------------------ */
6130
6131void __kmp_internal_end_dest(void *specific_gtid) {
6132 // Make sure no significant bits are lost
6133 int gtid;
6134 __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, &gtid);
6135
6136 KA_TRACE(30, ("__kmp_internal_end_dest: T#%d\n", gtid))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_internal_end_dest: T#%d\n"
, gtid); }
;
6137 /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage
6138 * this is because 0 is reserved for the nothing-stored case */
6139
6140 __kmp_internal_end_thread(gtid);
6141}
6142
6143#if KMP_OS_UNIX1 && KMP_DYNAMIC_LIB1
6144
6145__attribute__((destructor)) void __kmp_internal_end_dtor(void) {
6146 __kmp_internal_end_atexit();
6147}
6148
6149#endif
6150
6151/* [Windows] josh: when the atexit handler is called, there may still be more
6152 than one thread alive */
6153void __kmp_internal_end_atexit(void) {
6154 KA_TRACE(30, ("__kmp_internal_end_atexit\n"))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_internal_end_atexit\n"
); }
;
6155 /* [Windows]
6156 josh: ideally, we want to completely shutdown the library in this atexit
6157 handler, but stat code that depends on thread specific data for gtid fails
6158 because that data becomes unavailable at some point during the shutdown, so
6159 we call __kmp_internal_end_thread instead. We should eventually remove the
6160 dependency on __kmp_get_specific_gtid in the stat code and use
6161 __kmp_internal_end_library to cleanly shutdown the library.
6162
6163 // TODO: Can some of this comment about GVS be removed?
6164 I suspect that the offending stat code is executed when the calling thread
6165 tries to clean up a dead root thread's data structures, resulting in GVS
6166 code trying to close the GVS structures for that thread, but since the stat
6167 code uses __kmp_get_specific_gtid to get the gtid with the assumption that
6168 the calling thread is cleaning up itself instead of another thread, it get
6169 confused. This happens because allowing a thread to unregister and cleanup
6170 another thread is a recent modification for addressing an issue.
6171 Based on the current design (20050722), a thread may end up
6172 trying to unregister another thread only if thread death does not trigger
6173 the calling of __kmp_internal_end_thread. For Linux* OS, there is the
6174 thread specific data destructor function to detect thread death. For
6175 Windows dynamic, there is DllMain(THREAD_DETACH). For Windows static, there
6176 is nothing. Thus, the workaround is applicable only for Windows static
6177 stat library. */
6178 __kmp_internal_end_library(-1);
6179#if KMP_OS_WINDOWS0
6180 __kmp_close_console();
6181#endif
6182}
6183
6184static void __kmp_reap_thread(kmp_info_t *thread, int is_root) {
6185 // It is assumed __kmp_forkjoin_lock is acquired.
6186
6187 int gtid;
6188
6189 KMP_DEBUG_ASSERT(thread != NULL)if (!(thread != __null)) { __kmp_debug_assert("thread != __null"
, "openmp/runtime/src/kmp_runtime.cpp", 6189); }
;
6190
6191 gtid = thread->th.th_info.ds.ds_gtid;
6192
6193 if (!is_root) {
6194 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) {
6195 /* Assume the threads are at the fork barrier here */
6196 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n"
, gtid); }
6197 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n"
, gtid); }
6198 gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n"
, gtid); }
;
6199 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
6200 while (
6201 !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3)__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(thread
->th.th_used_in_team)), (kmp_uint32)(0), (kmp_uint32)(3))
)
6202 KMP_CPU_PAUSE()__kmp_x86_pause();
6203 __kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL__null);
6204 } else {
6205 /* Need release fence here to prevent seg faults for tree forkjoin
6206 barrier (GEH) */
6207 kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
6208 thread);
6209 __kmp_release_64(&flag);
6210 }
6211 }
6212
6213 // Terminate OS thread.
6214 __kmp_reap_worker(thread);
6215
6216 // The thread was killed asynchronously. If it was actively
6217 // spinning in the thread pool, decrement the global count.
6218 //
6219 // There is a small timing hole here - if the worker thread was just waking
6220 // up after sleeping in the pool, had reset it's th_active_in_pool flag but
6221 // not decremented the global counter __kmp_thread_pool_active_nth yet, then
6222 // the global counter might not get updated.
6223 //
6224 // Currently, this can only happen as the library is unloaded,
6225 // so there are no harmful side effects.
6226 if (thread->th.th_active_in_pool) {
6227 thread->th.th_active_in_pool = FALSE0;
6228 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth)(&__kmp_thread_pool_active_nth)->fetch_sub(1, std::memory_order_acq_rel
)
;
6229 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0)if (!(__kmp_thread_pool_active_nth >= 0)) { __kmp_debug_assert
("__kmp_thread_pool_active_nth >= 0", "openmp/runtime/src/kmp_runtime.cpp"
, 6229); }
;
6230 }
6231 }
6232
6233 __kmp_free_implicit_task(thread);
6234
6235// Free the fast memory for tasking
6236#if USE_FAST_MEMORY3
6237 __kmp_free_fast_memory(thread);
6238#endif /* USE_FAST_MEMORY */
6239
6240 __kmp_suspend_uninitialize_thread(thread);
6241
6242 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread)if (!(__kmp_threads[gtid] == thread)) { __kmp_debug_assert("__kmp_threads[gtid] == thread"
, "openmp/runtime/src/kmp_runtime.cpp", 6242); }
;
6243 TCW_SYNC_PTR(__kmp_threads[gtid], NULL)((__kmp_threads[gtid])) = ((__null));
6244
6245 --__kmp_all_nth;
6246 // __kmp_nth was decremented when thread is added to the pool.
6247
6248#ifdef KMP_ADJUST_BLOCKTIME1
6249 /* Adjust blocktime back to user setting or default if necessary */
6250 /* Middle initialization might never have occurred */
6251 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6252 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0)if (!(__kmp_avail_proc > 0)) { __kmp_debug_assert("__kmp_avail_proc > 0"
, "openmp/runtime/src/kmp_runtime.cpp", 6252); }
;
6253 if (__kmp_nth <= __kmp_avail_proc) {
6254 __kmp_zero_bt = FALSE0;
6255 }
6256 }
6257#endif /* KMP_ADJUST_BLOCKTIME */
6258
6259 /* free the memory being used */
6260 if (__kmp_env_consistency_check) {
6261 if (thread->th.th_cons) {
6262 __kmp_free_cons_stack(thread->th.th_cons);
6263 thread->th.th_cons = NULL__null;
6264 }
6265 }
6266
6267 if (thread->th.th_pri_common != NULL__null) {
6268 __kmp_free(thread->th.th_pri_common)___kmp_free((thread->th.th_pri_common), "openmp/runtime/src/kmp_runtime.cpp"
, 6268)
;
6269 thread->th.th_pri_common = NULL__null;
6270 }
6271
6272 if (thread->th.th_task_state_memo_stack != NULL__null) {
6273 __kmp_free(thread->th.th_task_state_memo_stack)___kmp_free((thread->th.th_task_state_memo_stack), "openmp/runtime/src/kmp_runtime.cpp"
, 6273)
;
6274 thread->th.th_task_state_memo_stack = NULL__null;
6275 }
6276
6277#if KMP_USE_BGET1
6278 if (thread->th.th_local.bget_data != NULL__null) {
6279 __kmp_finalize_bget(thread);
6280 }
6281#endif
6282
6283#if KMP_AFFINITY_SUPPORTED1
6284 if (thread->th.th_affin_mask != NULL__null) {
6285 KMP_CPU_FREE(thread->th.th_affin_mask)__kmp_affinity_dispatch->deallocate_mask(thread->th.th_affin_mask
)
;
6286 thread->th.th_affin_mask = NULL__null;
6287 }
6288#endif /* KMP_AFFINITY_SUPPORTED */
6289
6290#if KMP_USE_HIER_SCHED0
6291 if (thread->th.th_hier_bar_data != NULL__null) {
6292 __kmp_free(thread->th.th_hier_bar_data)___kmp_free((thread->th.th_hier_bar_data), "openmp/runtime/src/kmp_runtime.cpp"
, 6292)
;
6293 thread->th.th_hier_bar_data = NULL__null;
6294 }
6295#endif
6296
6297 __kmp_reap_team(thread->th.th_serial_team);
6298 thread->th.th_serial_team = NULL__null;
6299 __kmp_free(thread)___kmp_free((thread), "openmp/runtime/src/kmp_runtime.cpp", 6299
)
;
6300
6301 KMP_MB();
6302
6303} // __kmp_reap_thread
6304
6305static void __kmp_itthash_clean(kmp_info_t *th) {
6306#if USE_ITT_NOTIFY1
6307 if (__kmp_itt_region_domains.count > 0) {
6308 for (int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6309 kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i];
6310 while (bucket) {
6311 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6312 __kmp_thread_free(th, bucket)___kmp_thread_free((th), (bucket), "openmp/runtime/src/kmp_runtime.cpp"
, 6312)
;
6313 bucket = next;
6314 }
6315 }
6316 }
6317 if (__kmp_itt_barrier_domains.count > 0) {
6318 for (int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6319 kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i];
6320 while (bucket) {
6321 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6322 __kmp_thread_free(th, bucket)___kmp_thread_free((th), (bucket), "openmp/runtime/src/kmp_runtime.cpp"
, 6322)
;
6323 bucket = next;
6324 }
6325 }
6326 }
6327#endif
6328}
6329
6330static void __kmp_internal_end(void) {
6331 int i;
6332
6333 /* First, unregister the library */
6334 __kmp_unregister_library();
6335
6336#if KMP_OS_WINDOWS0
6337 /* In Win static library, we can't tell when a root actually dies, so we
6338 reclaim the data structures for any root threads that have died but not
6339 unregistered themselves, in order to shut down cleanly.
6340 In Win dynamic library we also can't tell when a thread dies. */
6341 __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of
6342// dead roots
6343#endif
6344
6345 for (i = 0; i < __kmp_threads_capacity; i++)
6346 if (__kmp_root[i])
6347 if (__kmp_root[i]->r.r_active)
6348 break;
6349 KMP_MB(); /* Flush all pending memory write invalidates. */
6350 TCW_SYNC_4(__kmp_global.g.g_done, TRUE)(__kmp_global.g.g_done) = ((!0));
6351
6352 if (i < __kmp_threads_capacity) {
6353#if KMP_USE_MONITOR
6354 // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor??
6355 KMP_MB(); /* Flush all pending memory write invalidates. */
6356
6357 // Need to check that monitor was initialized before reaping it. If we are
6358 // called form __kmp_atfork_child (which sets __kmp_init_parallel = 0), then
6359 // __kmp_monitor will appear to contain valid data, but it is only valid in
6360 // the parent process, not the child.
6361 // New behavior (201008): instead of keying off of the flag
6362 // __kmp_init_parallel, the monitor thread creation is keyed off
6363 // of the new flag __kmp_init_monitor.
6364 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6365 if (TCR_4(__kmp_init_monitor)(__kmp_init_monitor)) {
6366 __kmp_reap_monitor(&__kmp_monitor);
6367 TCW_4(__kmp_init_monitor, 0)(__kmp_init_monitor) = (0);
6368 }
6369 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6370 KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end: monitor reaped\n"
); }
;
6371#endif // KMP_USE_MONITOR
6372 } else {
6373/* TODO move this to cleanup code */
6374#ifdef KMP_DEBUG1
6375 /* make sure that everything has properly ended */
6376 for (i = 0; i < __kmp_threads_capacity; i++) {
6377 if (__kmp_root[i]) {
6378 // KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC:
6379 // there can be uber threads alive here
6380 KMP_ASSERT(!__kmp_root[i]->r.r_active)if (!(!__kmp_root[i]->r.r_active)) { __kmp_debug_assert("!__kmp_root[i]->r.r_active"
, "openmp/runtime/src/kmp_runtime.cpp", 6380); }
; // TODO: can they be active?
6381 }
6382 }
6383#endif
6384
6385 KMP_MB();
6386
6387 // Reap the worker threads.
6388 // This is valid for now, but be careful if threads are reaped sooner.
6389 while (__kmp_thread_pool != NULL__null) { // Loop thru all the thread in the pool.
6390 // Get the next thread from the pool.
6391 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool)const_cast<kmp_info_t *>(__kmp_thread_pool);
6392 __kmp_thread_pool = thread->th.th_next_pool;
6393 // Reap it.
6394 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP)if (!(thread->th.th_reap_state == 1)) { __kmp_debug_assert
("thread->th.th_reap_state == 1", "openmp/runtime/src/kmp_runtime.cpp"
, 6394); }
;
6395 thread->th.th_next_pool = NULL__null;
6396 thread->th.th_in_pool = FALSE0;
6397 __kmp_reap_thread(thread, 0);
6398 }
6399 __kmp_thread_pool_insert_pt = NULL__null;
6400
6401 // Reap teams.
6402 while (__kmp_team_pool != NULL__null) { // Loop thru all the teams in the pool.
6403 // Get the next team from the pool.
6404 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool)const_cast<kmp_team_t *>(__kmp_team_pool);
6405 __kmp_team_pool = team->t.t_next_pool;
6406 // Reap it.
6407 team->t.t_next_pool = NULL__null;
6408 __kmp_reap_team(team);
6409 }
6410
6411 __kmp_reap_task_teams();
6412
6413#if KMP_OS_UNIX1
6414 // Threads that are not reaped should not access any resources since they
6415 // are going to be deallocated soon, so the shutdown sequence should wait
6416 // until all threads either exit the final spin-waiting loop or begin
6417 // sleeping after the given blocktime.
6418 for (i = 0; i < __kmp_threads_capacity; i++) {
6419 kmp_info_t *thr = __kmp_threads[i];
6420 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking)(&thr->th.th_blocking)->load(std::memory_order_acquire
)
)
6421 KMP_CPU_PAUSE()__kmp_x86_pause();
6422 }
6423#endif
6424
6425 for (i = 0; i < __kmp_threads_capacity; ++i) {
6426 // TBD: Add some checking...
6427 // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL );
6428 }
6429
6430 /* Make sure all threadprivate destructors get run by joining with all
6431 worker threads before resetting this flag */
6432 TCW_SYNC_4(__kmp_init_common, FALSE)(__kmp_init_common) = (0);
6433
6434 KA_TRACE(10, ("__kmp_internal_end: all workers reaped\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end: all workers reaped\n"
); }
;
6435 KMP_MB();
6436
6437#if KMP_USE_MONITOR
6438 // See note above: One of the possible fixes for CQ138434 / CQ140126
6439 //
6440 // FIXME: push both code fragments down and CSE them?
6441 // push them into __kmp_cleanup() ?
6442 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6443 if (TCR_4(__kmp_init_monitor)(__kmp_init_monitor)) {
6444 __kmp_reap_monitor(&__kmp_monitor);
6445 TCW_4(__kmp_init_monitor, 0)(__kmp_init_monitor) = (0);
6446 }
6447 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6448 KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end: monitor reaped\n"
); }
;
6449#endif
6450 } /* else !__kmp_global.t_active */
6451 TCW_4(__kmp_init_gtid, FALSE)(__kmp_init_gtid) = (0);
6452 KMP_MB(); /* Flush all pending memory write invalidates. */
6453
6454 __kmp_cleanup();
6455#if OMPT_SUPPORT1
6456 ompt_fini();
6457#endif
6458}
6459
6460void __kmp_internal_end_library(int gtid_req) {
6461 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
6462 /* this shouldn't be a race condition because __kmp_internal_end() is the
6463 only place to clear __kmp_serial_init */
6464 /* we'll check this later too, after we get the lock */
6465 // 2009-09-06: We do not set g_abort without setting g_done. This check looks
6466 // redundant, because the next check will work in any case.
6467 if (__kmp_global.g.g_abort) {
6468 KA_TRACE(11, ("__kmp_internal_end_library: abort, exiting\n"))if (kmp_a_debug >= 11) { __kmp_debug_printf ("__kmp_internal_end_library: abort, exiting\n"
); }
;
6469 /* TODO abort? */
6470 return;
6471 }
6472 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done) || !__kmp_init_serial) {
6473 KA_TRACE(10, ("__kmp_internal_end_library: already finished\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: already finished\n"
); }
;
6474 return;
6475 }
6476
6477 // If hidden helper team has been initialized, we need to deinit it
6478 if (TCR_4(__kmp_init_hidden_helper)(__kmp_init_hidden_helper) &&
6479 !TCR_4(__kmp_hidden_helper_team_done)(__kmp_hidden_helper_team_done)) {
6480 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE)(__kmp_hidden_helper_team_done) = ((!0));
6481 // First release the main thread to let it continue its work
6482 __kmp_hidden_helper_main_thread_release();
6483 // Wait until the hidden helper team has been destroyed
6484 __kmp_hidden_helper_threads_deinitz_wait();
6485 }
6486
6487 KMP_MB(); /* Flush all pending memory write invalidates. */
6488 /* find out who we are and what we should do */
6489 {
6490 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6491 KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: enter T#%d (%d)\n"
, gtid, gtid_req); }
6492 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: enter T#%d (%d)\n"
, gtid, gtid_req); }
;
6493 if (gtid == KMP_GTID_SHUTDOWN(-3)) {
6494 KA_TRACE(10, ("__kmp_internal_end_library: !__kmp_init_runtime, system "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: !__kmp_init_runtime, system "
"already shutdown\n"); }
6495 "already shutdown\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: !__kmp_init_runtime, system "
"already shutdown\n"); }
;
6496 return;
6497 } else if (gtid == KMP_GTID_MONITOR(-4)) {
6498 KA_TRACE(10, ("__kmp_internal_end_library: monitor thread, gtid not "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: monitor thread, gtid not "
"registered, or system shutdown\n"); }
6499 "registered, or system shutdown\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: monitor thread, gtid not "
"registered, or system shutdown\n"); }
;
6500 return;
6501 } else if (gtid == KMP_GTID_DNE(-2)) {
6502 KA_TRACE(10, ("__kmp_internal_end_library: gtid not registered or system "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: gtid not registered or system "
"shutdown\n"); }
6503 "shutdown\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: gtid not registered or system "
"shutdown\n"); }
;
6504 /* we don't know who we are, but we may still shutdown the library */
6505 } else if (KMP_UBER_GTID(gtid)) {
6506 /* unregister ourselves as an uber thread. gtid is no longer valid */
6507 if (__kmp_root[gtid]->r.r_active) {
6508 __kmp_global.g.g_abort = -1;
6509 TCW_SYNC_4(__kmp_global.g.g_done, TRUE)(__kmp_global.g.g_done) = ((!0));
6510 __kmp_unregister_library();
6511 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: root still active, abort T#%d\n"
, gtid); }
6512 ("__kmp_internal_end_library: root still active, abort T#%d\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: root still active, abort T#%d\n"
, gtid); }
6513 gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: root still active, abort T#%d\n"
, gtid); }
;
6514 return;
6515 } else {
6516 __kmp_itthash_clean(__kmp_threads[gtid]);
6517 KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: unregistering sibling T#%d\n"
, gtid); }
6518 10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: unregistering sibling T#%d\n"
, gtid); }
6519 ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: unregistering sibling T#%d\n"
, gtid); }
;
6520 __kmp_unregister_root_current_thread(gtid);
6521 }
6522 } else {
6523/* worker threads may call this function through the atexit handler, if they
6524 * call exit() */
6525/* For now, skip the usual subsequent processing and just dump the debug buffer.
6526 TODO: do a thorough shutdown instead */
6527#ifdef DUMP_DEBUG_ON_EXIT
6528 if (__kmp_debug_buf)
6529 __kmp_dump_debug_buffer();
6530#endif
6531 // added unregister library call here when we switch to shm linux
6532 // if we don't, it will leave lots of files in /dev/shm
6533 // cleanup shared memory file before exiting.
6534 __kmp_unregister_library();
6535 return;
6536 }
6537 }
6538 /* synchronize the termination process */
6539 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6540
6541 /* have we already finished */
6542 if (__kmp_global.g.g_abort) {
6543 KA_TRACE(10, ("__kmp_internal_end_library: abort, exiting\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: abort, exiting\n"
); }
;
6544 /* TODO abort? */
6545 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6546 return;
6547 }
6548 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done) || !__kmp_init_serial) {
6549 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6550 return;
6551 }
6552
6553 /* We need this lock to enforce mutex between this reading of
6554 __kmp_threads_capacity and the writing by __kmp_register_root.
6555 Alternatively, we can use a counter of roots that is atomically updated by
6556 __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and
6557 __kmp_internal_end_*. */
6558 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6559
6560 /* now we can safely conduct the actual termination */
6561 __kmp_internal_end();
6562
6563 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6564 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6565
6566 KA_TRACE(10, ("__kmp_internal_end_library: exit\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: exit\n"
); }
;
6567
6568#ifdef DUMP_DEBUG_ON_EXIT
6569 if (__kmp_debug_buf)
6570 __kmp_dump_debug_buffer();
6571#endif
6572
6573#if KMP_OS_WINDOWS0
6574 __kmp_close_console();
6575#endif
6576
6577 __kmp_fini_allocator();
6578
6579} // __kmp_internal_end_library
6580
6581void __kmp_internal_end_thread(int gtid_req) {
6582 int i;
6583
6584 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
6585 /* this shouldn't be a race condition because __kmp_internal_end() is the
6586 * only place to clear __kmp_serial_init */
6587 /* we'll check this later too, after we get the lock */
6588 // 2009-09-06: We do not set g_abort without setting g_done. This check looks
6589 // redundant, because the next check will work in any case.
6590 if (__kmp_global.g.g_abort) {
6591 KA_TRACE(11, ("__kmp_internal_end_thread: abort, exiting\n"))if (kmp_a_debug >= 11) { __kmp_debug_printf ("__kmp_internal_end_thread: abort, exiting\n"
); }
;
6592 /* TODO abort? */
6593 return;
6594 }
6595 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done) || !__kmp_init_serial) {
6596 KA_TRACE(10, ("__kmp_internal_end_thread: already finished\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: already finished\n"
); }
;
6597 return;
6598 }
6599
6600 // If hidden helper team has been initialized, we need to deinit it
6601 if (TCR_4(__kmp_init_hidden_helper)(__kmp_init_hidden_helper) &&
6602 !TCR_4(__kmp_hidden_helper_team_done)(__kmp_hidden_helper_team_done)) {
6603 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE)(__kmp_hidden_helper_team_done) = ((!0));
6604 // First release the main thread to let it continue its work
6605 __kmp_hidden_helper_main_thread_release();
6606 // Wait until the hidden helper team has been destroyed
6607 __kmp_hidden_helper_threads_deinitz_wait();
6608 }
6609
6610 KMP_MB(); /* Flush all pending memory write invalidates. */
6611
6612 /* find out who we are and what we should do */
6613 {
6614 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6615 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: enter T#%d (%d)\n"
, gtid, gtid_req); }
6616 ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: enter T#%d (%d)\n"
, gtid, gtid_req); }
;
6617 if (gtid == KMP_GTID_SHUTDOWN(-3)) {
6618 KA_TRACE(10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: !__kmp_init_runtime, system "
"already shutdown\n"); }
6619 "already shutdown\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: !__kmp_init_runtime, system "
"already shutdown\n"); }
;
6620 return;
6621 } else if (gtid == KMP_GTID_MONITOR(-4)) {
6622 KA_TRACE(10, ("__kmp_internal_end_thread: monitor thread, gtid not "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: monitor thread, gtid not "
"registered, or system shutdown\n"); }
6623 "registered, or system shutdown\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: monitor thread, gtid not "
"registered, or system shutdown\n"); }
;
6624 return;
6625 } else if (gtid == KMP_GTID_DNE(-2)) {
6626 KA_TRACE(10, ("__kmp_internal_end_thread: gtid not registered or system "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: gtid not registered or system "
"shutdown\n"); }
6627 "shutdown\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: gtid not registered or system "
"shutdown\n"); }
;
6628 return;
6629 /* we don't know who we are */
6630 } else if (KMP_UBER_GTID(gtid)) {
6631 /* unregister ourselves as an uber thread. gtid is no longer valid */
6632 if (__kmp_root[gtid]->r.r_active) {
6633 __kmp_global.g.g_abort = -1;
6634 TCW_SYNC_4(__kmp_global.g.g_done, TRUE)(__kmp_global.g.g_done) = ((!0));
6635 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: root still active, abort T#%d\n"
, gtid); }
6636 ("__kmp_internal_end_thread: root still active, abort T#%d\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: root still active, abort T#%d\n"
, gtid); }
6637 gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: root still active, abort T#%d\n"
, gtid); }
;
6638 return;
6639 } else {
6640 KA_TRACE(10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: unregistering sibling T#%d\n"
, gtid); }
6641 gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: unregistering sibling T#%d\n"
, gtid); }
;
6642 __kmp_unregister_root_current_thread(gtid);
6643 }
6644 } else {
6645 /* just a worker thread, let's leave */
6646 KA_TRACE(10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: worker thread T#%d\n"
, gtid); }
;
6647
6648 if (gtid >= 0) {
6649 __kmp_threads[gtid]->th.th_task_team = NULL__null;
6650 }
6651
6652 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n"
, gtid); }
6653 ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n"
, gtid); }
6654 gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n"
, gtid); }
;
6655 return;
6656 }
6657 }
6658#if KMP_DYNAMIC_LIB1
6659 if (__kmp_pause_status != kmp_hard_paused)
6660 // AC: lets not shutdown the dynamic library at the exit of uber thread,
6661 // because we will better shutdown later in the library destructor.
6662 {
6663 KA_TRACE(10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: exiting T#%d\n"
, gtid_req); }
;
6664 return;
6665 }
6666#endif
6667 /* synchronize the termination process */
6668 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6669
6670 /* have we already finished */
6671 if (__kmp_global.g.g_abort) {
6672 KA_TRACE(10, ("__kmp_internal_end_thread: abort, exiting\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: abort, exiting\n"
); }
;
6673 /* TODO abort? */
6674 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6675 return;
6676 }
6677 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done) || !__kmp_init_serial) {
6678 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6679 return;
6680 }
6681
6682 /* We need this lock to enforce mutex between this reading of
6683 __kmp_threads_capacity and the writing by __kmp_register_root.
6684 Alternatively, we can use a counter of roots that is atomically updated by
6685 __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and
6686 __kmp_internal_end_*. */
6687
6688 /* should we finish the run-time? are all siblings done? */
6689 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6690
6691 for (i = 0; i < __kmp_threads_capacity; ++i) {
6692 if (KMP_UBER_GTID(i)) {
6693 KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n"
, i); }
6694 10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n"
, i); }
6695 ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n"
, i); }
;
6696 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6697 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6698 return;
6699 }
6700 }
6701
6702 /* now we can safely conduct the actual termination */
6703
6704 __kmp_internal_end();
6705
6706 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6707 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6708
6709 KA_TRACE(10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: exit T#%d\n"
, gtid_req); }
;
6710
6711#ifdef DUMP_DEBUG_ON_EXIT
6712 if (__kmp_debug_buf)
6713 __kmp_dump_debug_buffer();
6714#endif
6715} // __kmp_internal_end_thread
6716
6717// -----------------------------------------------------------------------------
6718// Library registration stuff.
6719
6720static long __kmp_registration_flag = 0;
6721// Random value used to indicate library initialization.
6722static char *__kmp_registration_str = NULL__null;
6723// Value to be saved in env var __KMP_REGISTERED_LIB_<pid>.
6724
6725static inline char *__kmp_reg_status_name() {
6726/* On RHEL 3u5 if linked statically, getpid() returns different values in
6727 each thread. If registration and unregistration go in different threads
6728 (omp_misc_other_root_exit.cpp test case), the name of registered_lib_env
6729 env var can not be found, because the name will contain different pid. */
6730// macOS* complains about name being too long with additional getuid()
6731#if KMP_OS_UNIX1 && !KMP_OS_DARWIN0 && KMP_DYNAMIC_LIB1
6732 return __kmp_str_format("__KMP_REGISTERED_LIB_%d_%d", (int)getpid(),
6733 (int)getuid());
6734#else
6735 return __kmp_str_format("__KMP_REGISTERED_LIB_%d", (int)getpid());
6736#endif
6737} // __kmp_reg_status_get
6738
6739#if defined(KMP_USE_SHM)
6740// If /dev/shm is not accessible, we will create a temporary file under /tmp.
6741char *temp_reg_status_file_name = nullptr;
6742#endif
6743
6744void __kmp_register_library_startup(void) {
6745
6746 char *name = __kmp_reg_status_name(); // Name of the environment variable.
6747 int done = 0;
6748 union {
6749 double dtime;
6750 long ltime;
6751 } time;
6752#if KMP_ARCH_X860 || KMP_ARCH_X86_641
6753 __kmp_initialize_system_tick();
6754#endif
6755 __kmp_read_system_time(&time.dtime);
6756 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6757 __kmp_registration_str =
6758 __kmp_str_format("%p-%lx-%s", &__kmp_registration_flag,
6759 __kmp_registration_flag, KMP_LIBRARY_FILE"libomp.so.5");
6760
6761 KA_TRACE(50, ("__kmp_register_library_startup: %s=\"%s\"\n", name,if (kmp_a_debug >= 50) { __kmp_debug_printf ("__kmp_register_library_startup: %s=\"%s\"\n"
, name, __kmp_registration_str); }
6762 __kmp_registration_str))if (kmp_a_debug >= 50) { __kmp_debug_printf ("__kmp_register_library_startup: %s=\"%s\"\n"
, name, __kmp_registration_str); }
;
6763
6764 while (!done) {
6765
6766 char *value = NULL__null; // Actual value of the environment variable.
6767
6768#if defined(KMP_USE_SHM)
6769 char *shm_name = __kmp_str_format("/%s", name);
6770 int shm_preexist = 0;
6771 char *data1;
6772 int fd1 = shm_open(shm_name, O_CREAT0100 | O_EXCL0200 | O_RDWR02, 0666);
6773 if ((fd1 == -1) && (errno(*__errno_location ()) == EEXIST17)) {
6774 // file didn't open because it already exists.
6775 // try opening existing file
6776 fd1 = shm_open(shm_name, O_RDWR02, 0666);
6777 if (fd1 == -1) { // file didn't open
6778 // error out here
6779 __kmp_fatal(KMP_MSG(FunctionError, "Can't open SHM")__kmp_msg_format(kmp_i18n_msg_FunctionError, "Can't open SHM"
)
, KMP_ERR(0)__kmp_msg_error_code(0),
6780 __kmp_msg_null);
6781 } else {
6782 // able to open existing file
6783 shm_preexist = 1;
6784 }
6785 } else if (fd1 == -1) {
6786 // SHM didn't open; it was due to error other than already exists. Try to
6787 // create a temp file under /tmp.
6788 // TODO: /tmp might not always be the temporary directory. For now we will
6789 // not consider TMPDIR. If /tmp is not accessible, we simply error out.
6790 char *temp_file_name = __kmp_str_format("/tmp/%sXXXXXX", name);
6791 fd1 = mkstemp(temp_file_name);
6792 if (fd1 == -1) {
6793 // error out here.
6794 __kmp_fatal(KMP_MSG(FunctionError, "Can't open TEMP")__kmp_msg_format(kmp_i18n_msg_FunctionError, "Can't open TEMP"
)
, KMP_ERR(errno)__kmp_msg_error_code((*__errno_location ())),
6795 __kmp_msg_null);
6796 }
6797 temp_reg_status_file_name = temp_file_name;
6798 }
6799 if (shm_preexist == 0) {
6800 // we created SHM now set size
6801 if (ftruncate(fd1, SHM_SIZE1024) == -1) {
6802 // error occured setting size;
6803 __kmp_fatal(KMP_MSG(FunctionError, "Can't set size of SHM")__kmp_msg_format(kmp_i18n_msg_FunctionError, "Can't set size of SHM"
)
,
6804 KMP_ERR(errno)__kmp_msg_error_code((*__errno_location ())), __kmp_msg_null);
6805 }
6806 }
6807 data1 =
6808 (char *)mmap(0, SHM_SIZE1024, PROT_READ0x1 | PROT_WRITE0x2, MAP_SHARED0x01, fd1, 0);
6809 if (data1 == MAP_FAILED((void *) -1)) {
6810 // failed to map shared memory
6811 __kmp_fatal(KMP_MSG(FunctionError, "Can't map SHM")__kmp_msg_format(kmp_i18n_msg_FunctionError, "Can't map SHM"), KMP_ERR(errno)__kmp_msg_error_code((*__errno_location ())),
6812 __kmp_msg_null);
6813 }
6814 if (shm_preexist == 0) { // set data to SHM, set value
6815 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str)strcpy(data1, __kmp_registration_str);
6816 }
6817 // Read value from either what we just wrote or existing file.
6818 value = __kmp_str_format("%s", data1); // read value from SHM
6819 munmap(data1, SHM_SIZE1024);
6820 close(fd1);
6821#else // Windows and unix with static library
6822 // Set environment variable, but do not overwrite if it is exist.
6823 __kmp_env_set(name, __kmp_registration_str, 0);
6824 // read value to see if it got set
6825 value = __kmp_env_get(name);
6826#endif
6827
6828 if (value != NULL__null && strcmp(value, __kmp_registration_str) == 0) {
6829 done = 1; // Ok, environment variable set successfully, exit the loop.
6830 } else {
6831 // Oops. Write failed. Another copy of OpenMP RTL is in memory.
6832 // Check whether it alive or dead.
6833 int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead.
6834 char *tail = value;
6835 char *flag_addr_str = NULL__null;
6836 char *flag_val_str = NULL__null;
6837 char const *file_name = NULL__null;
6838 __kmp_str_split(tail, '-', &flag_addr_str, &tail);
6839 __kmp_str_split(tail, '-', &flag_val_str, &tail);
6840 file_name = tail;
6841 if (tail != NULL__null) {
6842 unsigned long *flag_addr = 0;
6843 unsigned long flag_val = 0;
6844 KMP_SSCANFsscanf(flag_addr_str, "%p", RCAST(void **, &flag_addr)reinterpret_cast<void **>(&flag_addr));
6845 KMP_SSCANFsscanf(flag_val_str, "%lx", &flag_val);
6846 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name, "") != 0) {
6847 // First, check whether environment-encoded address is mapped into
6848 // addr space.
6849 // If so, dereference it to see if it still has the right value.
6850 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6851 neighbor = 1;
6852 } else {
6853 // If not, then we know the other copy of the library is no longer
6854 // running.
6855 neighbor = 2;
6856 }
6857 }
6858 }
6859 switch (neighbor) {
6860 case 0: // Cannot parse environment variable -- neighbor status unknown.
6861 // Assume it is the incompatible format of future version of the
6862 // library. Assume the other library is alive.
6863 // WARN( ... ); // TODO: Issue a warning.
6864 file_name = "unknown library";
6865 KMP_FALLTHROUGH()[[fallthrough]];
6866 // Attention! Falling to the next case. That's intentional.
6867 case 1: { // Neighbor is alive.
6868 // Check it is allowed.
6869 char *duplicate_ok = __kmp_env_get("KMP_DUPLICATE_LIB_OK");
6870 if (!__kmp_str_match_true(duplicate_ok)) {
6871 // That's not allowed. Issue fatal error.
6872 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name)__kmp_msg_format(kmp_i18n_msg_DuplicateLibrary, "libomp.so.5"
, file_name)
,
6873 KMP_HNT(DuplicateLibrary)__kmp_msg_format(kmp_i18n_hnt_DuplicateLibrary), __kmp_msg_null);
6874 }
6875 KMP_INTERNAL_FREE(duplicate_ok)free(duplicate_ok);
6876 __kmp_duplicate_library_ok = 1;
6877 done = 1; // Exit the loop.
6878 } break;
6879 case 2: { // Neighbor is dead.
6880
6881#if defined(KMP_USE_SHM)
6882 // close shared memory.
6883 shm_unlink(shm_name); // this removes file in /dev/shm
6884#else
6885 // Clear the variable and try to register library again.
6886 __kmp_env_unset(name);
6887#endif
6888 } break;
6889 default: {
6890 KMP_DEBUG_ASSERT(0)if (!(0)) { __kmp_debug_assert("0", "openmp/runtime/src/kmp_runtime.cpp"
, 6890); }
;
6891 } break;
6892 }
6893 }
6894 KMP_INTERNAL_FREE((void *)value)free((void *)value);
6895#if defined(KMP_USE_SHM)
6896 KMP_INTERNAL_FREE((void *)shm_name)free((void *)shm_name);
6897#endif
6898 } // while
6899 KMP_INTERNAL_FREE((void *)name)free((void *)name);
6900
6901} // func __kmp_register_library_startup
6902
6903void __kmp_unregister_library(void) {
6904
6905 char *name = __kmp_reg_status_name();
6906 char *value = NULL__null;
6907
6908#if defined(KMP_USE_SHM)
6909 bool use_shm = true;
6910 char *shm_name = __kmp_str_format("/%s", name);
6911 int fd1 = shm_open(shm_name, O_RDONLY00, 0666);
6912 if (fd1 == -1) {
6913 // File did not open. Try the temporary file.
6914 use_shm = false;
6915 KMP_DEBUG_ASSERT(temp_reg_status_file_name)if (!(temp_reg_status_file_name)) { __kmp_debug_assert("temp_reg_status_file_name"
, "openmp/runtime/src/kmp_runtime.cpp", 6915); }
;
6916 fd1 = open(temp_reg_status_file_name, O_RDONLY00);
6917 if (fd1 == -1) {
6918 // give it up now.
6919 return;
6920 }
6921 }
6922 char *data1 = (char *)mmap(0, SHM_SIZE1024, PROT_READ0x1, MAP_SHARED0x01, fd1, 0);
6923 if (data1 != MAP_FAILED((void *) -1)) {
6924 value = __kmp_str_format("%s", data1); // read value from SHM
6925 munmap(data1, SHM_SIZE1024);
6926 }
6927 close(fd1);
6928#else
6929 value = __kmp_env_get(name);
6930#endif
6931
6932 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0)if (!(__kmp_registration_flag != 0)) { __kmp_debug_assert("__kmp_registration_flag != 0"
, "openmp/runtime/src/kmp_runtime.cpp", 6932); }
;
6933 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL)if (!(__kmp_registration_str != __null)) { __kmp_debug_assert
("__kmp_registration_str != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 6933); }
;
6934 if (value != NULL__null && strcmp(value, __kmp_registration_str) == 0) {
6935// Ok, this is our variable. Delete it.
6936#if defined(KMP_USE_SHM)
6937 if (use_shm) {
6938 shm_unlink(shm_name); // this removes file in /dev/shm
6939 } else {
6940 KMP_DEBUG_ASSERT(temp_reg_status_file_name)if (!(temp_reg_status_file_name)) { __kmp_debug_assert("temp_reg_status_file_name"
, "openmp/runtime/src/kmp_runtime.cpp", 6940); }
;
6941 unlink(temp_reg_status_file_name); // this removes the temp file
6942 }
6943#else
6944 __kmp_env_unset(name);
6945#endif
6946 }
6947
6948#if defined(KMP_USE_SHM)
6949 KMP_INTERNAL_FREE(shm_name)free(shm_name);
6950 if (!use_shm) {
6951 KMP_DEBUG_ASSERT(temp_reg_status_file_name)if (!(temp_reg_status_file_name)) { __kmp_debug_assert("temp_reg_status_file_name"
, "openmp/runtime/src/kmp_runtime.cpp", 6951); }
;
6952 KMP_INTERNAL_FREE(temp_reg_status_file_name)free(temp_reg_status_file_name);
6953 }
6954#endif
6955
6956 KMP_INTERNAL_FREE(__kmp_registration_str)free(__kmp_registration_str);
6957 KMP_INTERNAL_FREE(value)free(value);
6958 KMP_INTERNAL_FREE(name)free(name);
6959
6960 __kmp_registration_flag = 0;
6961 __kmp_registration_str = NULL__null;
6962
6963} // __kmp_unregister_library
6964
6965// End of Library registration stuff.
6966// -----------------------------------------------------------------------------
6967
6968#if KMP_MIC_SUPPORTED((0 || 1) && (1 || 0))
6969
6970static void __kmp_check_mic_type() {
6971 kmp_cpuid_t cpuid_state = {0};
6972 kmp_cpuid_t *cs_p = &cpuid_state;
6973 __kmp_x86_cpuid(1, 0, cs_p);
6974 // We don't support mic1 at the moment
6975 if ((cs_p->eax & 0xff0) == 0xB10) {
6976 __kmp_mic_type = mic2;
6977 } else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6978 __kmp_mic_type = mic3;
6979 } else {
6980 __kmp_mic_type = non_mic;
6981 }
6982}
6983
6984#endif /* KMP_MIC_SUPPORTED */
6985
6986#if KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0)
6987static void __kmp_user_level_mwait_init() {
6988 struct kmp_cpuid buf;
6989 __kmp_x86_cpuid(7, 0, &buf);
6990 __kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1);
6991 __kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait;
6992 __kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0);
6993 KF_TRACE(30, ("__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",if (kmp_f_debug >= 30) { __kmp_debug_printf ("__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n"
, __kmp_umwait_enabled); }
6994 __kmp_umwait_enabled))if (kmp_f_debug >= 30) { __kmp_debug_printf ("__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n"
, __kmp_umwait_enabled); }
;
6995}
6996#elif KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0)
6997#ifndef AT_INTELPHIUSERMWAIT
6998// Spurious, non-existent value that should always fail to return anything.
6999// Will be replaced with the correct value when we know that.
7000#define AT_INTELPHIUSERMWAIT 10000
7001#endif
7002// getauxval() function is available in RHEL7 and SLES12. If a system with an
7003// earlier OS is used to build the RTL, we'll use the following internal
7004// function when the entry is not found.
7005unsigned long getauxval(unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
7006unsigned long getauxval(unsigned long) { return 0; }
7007
7008static void __kmp_user_level_mwait_init() {
7009 // When getauxval() and correct value of AT_INTELPHIUSERMWAIT are available
7010 // use them to find if the user-level mwait is enabled. Otherwise, forcibly
7011 // set __kmp_mwait_enabled=TRUE on Intel MIC if the environment variable
7012 // KMP_USER_LEVEL_MWAIT was set to TRUE.
7013 if (__kmp_mic_type == mic3) {
7014 unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
7015 if ((res & 0x1) || __kmp_user_level_mwait) {
7016 __kmp_mwait_enabled = TRUE(!0);
7017 if (__kmp_user_level_mwait) {
7018 KMP_INFORM(EnvMwaitWarn)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_EnvMwaitWarn
), __kmp_msg_null)
;
7019 }
7020 } else {
7021 __kmp_mwait_enabled = FALSE0;
7022 }
7023 }
7024 KF_TRACE(30, ("__kmp_user_level_mwait_init: __kmp_mic_type = %d, "if (kmp_f_debug >= 30) { __kmp_debug_printf ("__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
"__kmp_mwait_enabled = %d\n", __kmp_mic_type, __kmp_mwait_enabled
); }
7025 "__kmp_mwait_enabled = %d\n",if (kmp_f_debug >= 30) { __kmp_debug_printf ("__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
"__kmp_mwait_enabled = %d\n", __kmp_mic_type, __kmp_mwait_enabled
); }
7026 __kmp_mic_type, __kmp_mwait_enabled))if (kmp_f_debug >= 30) { __kmp_debug_printf ("__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
"__kmp_mwait_enabled = %d\n", __kmp_mic_type, __kmp_mwait_enabled
); }
;
7027}
7028#endif /* KMP_HAVE_UMWAIT */
7029
7030static void __kmp_do_serial_initialize(void) {
7031 int i, gtid;
7032 size_t size;
7033
7034 KA_TRACE(10, ("__kmp_do_serial_initialize: enter\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_do_serial_initialize: enter\n"
); }
;
7035
7036 KMP_DEBUG_ASSERT(sizeof(kmp_int32) == 4)if (!(sizeof(kmp_int32) == 4)) { __kmp_debug_assert("sizeof(kmp_int32) == 4"
, "openmp/runtime/src/kmp_runtime.cpp", 7036); }
;
7037 KMP_DEBUG_ASSERT(sizeof(kmp_uint32) == 4)if (!(sizeof(kmp_uint32) == 4)) { __kmp_debug_assert("sizeof(kmp_uint32) == 4"
, "openmp/runtime/src/kmp_runtime.cpp", 7037); }
;
7038 KMP_DEBUG_ASSERT(sizeof(kmp_int64) == 8)if (!(sizeof(kmp_int64) == 8)) { __kmp_debug_assert("sizeof(kmp_int64) == 8"
, "openmp/runtime/src/kmp_runtime.cpp", 7038); }
;
7039 KMP_DEBUG_ASSERT(sizeof(kmp_uint64) == 8)if (!(sizeof(kmp_uint64) == 8)) { __kmp_debug_assert("sizeof(kmp_uint64) == 8"
, "openmp/runtime/src/kmp_runtime.cpp", 7039); }
;
7040 KMP_DEBUG_ASSERT(sizeof(kmp_intptr_t) == sizeof(void *))if (!(sizeof(kmp_intptr_t) == sizeof(void *))) { __kmp_debug_assert
("sizeof(kmp_intptr_t) == sizeof(void *)", "openmp/runtime/src/kmp_runtime.cpp"
, 7040); }
;
7041
7042#if OMPT_SUPPORT1
7043 ompt_pre_init();
7044#endif
7045#if OMPD_SUPPORT1
7046 __kmp_env_dump();
7047 ompd_init();
7048#endif
7049
7050 __kmp_validate_locks();
7051
7052#if ENABLE_LIBOMPTARGET1
7053 /* Initialize functions from libomptarget */
7054 __kmp_init_omptarget();
7055#endif
7056
7057 /* Initialize internal memory allocator */
7058 __kmp_init_allocator();
7059
7060 /* Register the library startup via an environment variable or via mapped
7061 shared memory file and check to see whether another copy of the library is
7062 already registered. Since forked child process is often terminated, we
7063 postpone the registration till middle initialization in the child */
7064 if (__kmp_need_register_serial)
7065 __kmp_register_library_startup();
7066
7067 /* TODO reinitialization of library */
7068 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) {
7069 KA_TRACE(10, ("__kmp_do_serial_initialize: reinitialization of library\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_do_serial_initialize: reinitialization of library\n"
); }
;
7070 }
7071
7072 __kmp_global.g.g_abort = 0;
7073 TCW_SYNC_4(__kmp_global.g.g_done, FALSE)(__kmp_global.g.g_done) = (0);
7074
7075/* initialize the locks */
7076#if KMP_USE_ADAPTIVE_LOCKS(0 || 1) && !0
7077#if KMP_DEBUG_ADAPTIVE_LOCKS0
7078 __kmp_init_speculative_stats();
7079#endif
7080#endif
7081#if KMP_STATS_ENABLED0
7082 __kmp_stats_init();
7083#endif
7084 __kmp_init_lock(&__kmp_global_lock);
7085 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
7086 __kmp_init_lock(&__kmp_debug_lock);
7087 __kmp_init_atomic_lock(&__kmp_atomic_lock);
7088 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
7089 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
7090 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
7091 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
7092 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
7093 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
7094 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
7095 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
7096 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
7097 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
7098 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
7099 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
7100 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
7101 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
7102#if KMP_USE_MONITOR
7103 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
7104#endif
7105 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
7106
7107 /* conduct initialization and initial setup of configuration */
7108
7109 __kmp_runtime_initialize();
7110
7111#if KMP_MIC_SUPPORTED((0 || 1) && (1 || 0))
7112 __kmp_check_mic_type();
7113#endif
7114
7115// Some global variable initialization moved here from kmp_env_initialize()
7116#ifdef KMP_DEBUG1
7117 kmp_diag = 0;
7118#endif
7119 __kmp_abort_delay = 0;
7120
7121 // From __kmp_init_dflt_team_nth()
7122 /* assume the entire machine will be used */
7123 __kmp_dflt_team_nth_ub = __kmp_xproc;
7124 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH1) {
7125 __kmp_dflt_team_nth_ub = KMP_MIN_NTH1;
7126 }
7127 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
7128 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
7129 }
7130 __kmp_max_nth = __kmp_sys_max_nth;
7131 __kmp_cg_max_nth = __kmp_sys_max_nth;
7132 __kmp_teams_max_nth = __kmp_xproc; // set a "reasonable" default
7133 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
7134 __kmp_teams_max_nth = __kmp_sys_max_nth;
7135 }
7136
7137 // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME"
7138 // part
7139 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME(__kmp_is_hybrid_cpu() ? (0) : (200));
7140#if KMP_USE_MONITOR
7141 __kmp_monitor_wakeups =
7142 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7143 __kmp_bt_intervals =
7144 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7145#endif
7146 // From "KMP_LIBRARY" part of __kmp_env_initialize()
7147 __kmp_library = library_throughput;
7148 // From KMP_SCHEDULE initialization
7149 __kmp_static = kmp_sch_static_balanced;
7150// AC: do not use analytical here, because it is non-monotonous
7151//__kmp_guided = kmp_sch_guided_iterative_chunked;
7152//__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no
7153// need to repeat assignment
7154// Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch
7155// bit control and barrier method control parts
7156#if KMP_FAST_REDUCTION_BARRIER1
7157#define kmp_reduction_barrier_gather_bb ((int)1)
7158#define kmp_reduction_barrier_release_bb ((int)1)
7159#define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
7160#define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
7161#endif // KMP_FAST_REDUCTION_BARRIER
7162 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
7163 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
7164 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
7165 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
7166 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
7167#if KMP_FAST_REDUCTION_BARRIER1
7168 if (i == bs_reduction_barrier) { // tested and confirmed on ALTIX only (
7169 // lin_64 ): hyper,1
7170 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
7171 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
7172 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
7173 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
7174 }
7175#endif // KMP_FAST_REDUCTION_BARRIER
7176 }
7177#if KMP_FAST_REDUCTION_BARRIER1
7178#undef kmp_reduction_barrier_release_pat
7179#undef kmp_reduction_barrier_gather_pat
7180#undef kmp_reduction_barrier_release_bb
7181#undef kmp_reduction_barrier_gather_bb
7182#endif // KMP_FAST_REDUCTION_BARRIER
7183#if KMP_MIC_SUPPORTED((0 || 1) && (1 || 0))
7184 if (__kmp_mic_type == mic2) { // KNC
7185 // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC
7186 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3; // plain gather
7187 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
7188 1; // forkjoin release
7189 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7190 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7191 }
7192#if KMP_FAST_REDUCTION_BARRIER1
7193 if (__kmp_mic_type == mic2) { // KNC
7194 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7195 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7196 }
7197#endif // KMP_FAST_REDUCTION_BARRIER
7198#endif // KMP_MIC_SUPPORTED
7199
7200// From KMP_CHECKS initialization
7201#ifdef KMP_DEBUG1
7202 __kmp_env_checks = TRUE(!0); /* development versions have the extra checks */
7203#else
7204 __kmp_env_checks = FALSE0; /* port versions do not have the extra checks */
7205#endif
7206
7207 // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization
7208 __kmp_foreign_tp = TRUE(!0);
7209
7210 __kmp_global.g.g_dynamic = FALSE0;
7211 __kmp_global.g.g_dynamic_mode = dynamic_default;
7212
7213 __kmp_init_nesting_mode();
7214
7215 __kmp_env_initialize(NULL__null);
7216
7217#if KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0) || KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0)
7218 __kmp_user_level_mwait_init();
7219#endif
7220// Print all messages in message catalog for testing purposes.
7221#ifdef KMP_DEBUG1
7222 char const *val = __kmp_env_get("KMP_DUMP_CATALOG");
7223 if (__kmp_str_match_true(val)) {
7224 kmp_str_buf_t buffer;
7225 __kmp_str_buf_init(&buffer){ (&buffer)->str = (&buffer)->bulk; (&buffer
)->size = sizeof((&buffer)->bulk); (&buffer)->
used = 0; (&buffer)->bulk[0] = 0; }
;
7226 __kmp_i18n_dump_catalog(&buffer);
7227 __kmp_printf("%s", buffer.str);
7228 __kmp_str_buf_free(&buffer);
7229 }
7230 __kmp_env_free(&val);
7231#endif
7232
7233 __kmp_threads_capacity =
7234 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
7235 // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part
7236 __kmp_tp_capacity = __kmp_default_tp_capacity(
7237 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
7238
7239 // If the library is shut down properly, both pools must be NULL. Just in
7240 // case, set them to NULL -- some memory may leak, but subsequent code will
7241 // work even if pools are not freed.
7242 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL)if (!(__kmp_thread_pool == __null)) { __kmp_debug_assert("__kmp_thread_pool == __null"
, "openmp/runtime/src/kmp_runtime.cpp", 7242); }
;
7243 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL)if (!(__kmp_thread_pool_insert_pt == __null)) { __kmp_debug_assert
("__kmp_thread_pool_insert_pt == __null", "openmp/runtime/src/kmp_runtime.cpp"
, 7243); }
;
7244 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL)if (!(__kmp_team_pool == __null)) { __kmp_debug_assert("__kmp_team_pool == __null"
, "openmp/runtime/src/kmp_runtime.cpp", 7244); }
;
7245 __kmp_thread_pool = NULL__null;
7246 __kmp_thread_pool_insert_pt = NULL__null;
7247 __kmp_team_pool = NULL__null;
7248
7249 /* Allocate all of the variable sized records */
7250 /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are
7251 * expandable */
7252 /* Since allocation is cache-aligned, just add extra padding at the end */
7253 size =
7254 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * __kmp_threads_capacity +
7255 CACHE_LINE64;
7256 __kmp_threads = (kmp_info_t **)__kmp_allocate(size)___kmp_allocate((size), "openmp/runtime/src/kmp_runtime.cpp",
7256)
;
7257 __kmp_root = (kmp_root_t **)((char *)__kmp_threads +
7258 sizeof(kmp_info_t *) * __kmp_threads_capacity);
7259
7260 /* init thread counts */
7261 KMP_DEBUG_ASSERT(__kmp_all_nth ==if (!(__kmp_all_nth == 0)) { __kmp_debug_assert("__kmp_all_nth == 0"
, "openmp/runtime/src/kmp_runtime.cpp", 7262); }
7262 0)if (!(__kmp_all_nth == 0)) { __kmp_debug_assert("__kmp_all_nth == 0"
, "openmp/runtime/src/kmp_runtime.cpp", 7262); }
; // Asserts fail if the library is reinitializing and
7263 KMP_DEBUG_ASSERT(__kmp_nth == 0)if (!(__kmp_nth == 0)) { __kmp_debug_assert("__kmp_nth == 0",
"openmp/runtime/src/kmp_runtime.cpp", 7263); }
; // something was wrong in termination.
7264 __kmp_all_nth = 0;
7265 __kmp_nth = 0;
7266
7267 /* setup the uber master thread and hierarchy */
7268 gtid = __kmp_register_root(TRUE(!0));
7269 KA_TRACE(10, ("__kmp_do_serial_initialize T#%d\n", gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_do_serial_initialize T#%d\n"
, gtid); }
;
7270 KMP_ASSERT(KMP_UBER_GTID(gtid))if (!(KMP_UBER_GTID(gtid))) { __kmp_debug_assert("KMP_UBER_GTID(gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 7270); }
;
7271 KMP_ASSERT(KMP_INITIAL_GTID(gtid))if (!((0 == (gtid)))) { __kmp_debug_assert("KMP_INITIAL_GTID(gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 7271); }
;
7272
7273 KMP_MB(); /* Flush all pending memory write invalidates. */
7274
7275 __kmp_common_initialize();
7276
7277#if KMP_OS_UNIX1
7278 /* invoke the child fork handler */
7279 __kmp_register_atfork();
7280#endif
7281
7282#if !KMP_DYNAMIC_LIB1 || \
7283 ((KMP_COMPILER_ICC0 || KMP_COMPILER_ICX0) && KMP_OS_DARWIN0)
7284 {
7285 /* Invoke the exit handler when the program finishes, only for static
7286 library and macOS* dynamic. For other dynamic libraries, we already
7287 have _fini and DllMain. */
7288 int rc = atexit(__kmp_internal_end_atexit);
7289 if (rc != 0) {
7290 __kmp_fatal(KMP_MSG(FunctionError, "atexit()")__kmp_msg_format(kmp_i18n_msg_FunctionError, "atexit()"), KMP_ERR(rc)__kmp_msg_error_code(rc),
7291 __kmp_msg_null);
7292 }
7293 }
7294#endif
7295
7296#if KMP_HANDLE_SIGNALS(1 || 0)
7297#if KMP_OS_UNIX1
7298 /* NOTE: make sure that this is called before the user installs their own
7299 signal handlers so that the user handlers are called first. this way they
7300 can return false, not call our handler, avoid terminating the library, and
7301 continue execution where they left off. */
7302 __kmp_install_signals(FALSE0);
7303#endif /* KMP_OS_UNIX */
7304#if KMP_OS_WINDOWS0
7305 __kmp_install_signals(TRUE(!0));
7306#endif /* KMP_OS_WINDOWS */
7307#endif
7308
7309 /* we have finished the serial initialization */
7310 __kmp_init_counter++;
7311
7312 __kmp_init_serial = TRUE(!0);
7313
7314 if (__kmp_settings) {
7315 __kmp_env_print();
7316 }
7317
7318 if (__kmp_display_env || __kmp_display_env_verbose) {
7319 __kmp_env_print_2();
7320 }
7321
7322#if OMPT_SUPPORT1
7323 ompt_post_init();
7324#endif
7325
7326 KMP_MB();
7327
7328 KA_TRACE(10, ("__kmp_do_serial_initialize: exit\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_do_serial_initialize: exit\n"
); }
;
7329}
7330
7331void __kmp_serial_initialize(void) {
7332 if (__kmp_init_serial) {
7333 return;
7334 }
7335 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7336 if (__kmp_init_serial) {
7337 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7338 return;
7339 }
7340 __kmp_do_serial_initialize();
7341 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7342}
7343
7344static void __kmp_do_middle_initialize(void) {
7345 int i, j;
7346 int prev_dflt_team_nth;
7347
7348 if (!__kmp_init_serial) {
7349 __kmp_do_serial_initialize();
7350 }
7351
7352 KA_TRACE(10, ("__kmp_middle_initialize: enter\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_middle_initialize: enter\n"
); }
;
7353
7354 if (UNLIKELY(!__kmp_need_register_serial)__builtin_expect(!!(!__kmp_need_register_serial), 0)) {
7355 // We are in a forked child process. The registration was skipped during
7356 // serial initialization in __kmp_atfork_child handler. Do it here.
7357 __kmp_register_library_startup();
7358 }
7359
7360 // Save the previous value for the __kmp_dflt_team_nth so that
7361 // we can avoid some reinitialization if it hasn't changed.
7362 prev_dflt_team_nth = __kmp_dflt_team_nth;
7363
7364#if KMP_AFFINITY_SUPPORTED1
7365 // __kmp_affinity_initialize() will try to set __kmp_ncores to the
7366 // number of cores on the machine.
7367 __kmp_affinity_initialize(__kmp_affinity);
7368
7369#endif /* KMP_AFFINITY_SUPPORTED */
7370
7371 KMP_ASSERT(__kmp_xproc > 0)if (!(__kmp_xproc > 0)) { __kmp_debug_assert("__kmp_xproc > 0"
, "openmp/runtime/src/kmp_runtime.cpp", 7371); }
;
7372 if (__kmp_avail_proc == 0) {
7373 __kmp_avail_proc = __kmp_xproc;
7374 }
7375
7376 // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3),
7377 // correct them now
7378 j = 0;
7379 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
7380 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
7381 __kmp_avail_proc;
7382 j++;
7383 }
7384
7385 if (__kmp_dflt_team_nth == 0) {
7386#ifdef KMP_DFLT_NTH_CORES
7387 // Default #threads = #cores
7388 __kmp_dflt_team_nth = __kmp_ncores;
7389 KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
"__kmp_ncores (%d)\n", __kmp_dflt_team_nth); }
7390 "__kmp_ncores (%d)\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
"__kmp_ncores (%d)\n", __kmp_dflt_team_nth); }
7391 __kmp_dflt_team_nth))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
"__kmp_ncores (%d)\n", __kmp_dflt_team_nth); }
;
7392#else
7393 // Default #threads = #available OS procs
7394 __kmp_dflt_team_nth = __kmp_avail_proc;
7395 KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
"__kmp_avail_proc(%d)\n", __kmp_dflt_team_nth); }
7396 "__kmp_avail_proc(%d)\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
"__kmp_avail_proc(%d)\n", __kmp_dflt_team_nth); }
7397 __kmp_dflt_team_nth))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
"__kmp_avail_proc(%d)\n", __kmp_dflt_team_nth); }
;
7398#endif /* KMP_DFLT_NTH_CORES */
7399 }
7400
7401 if (__kmp_dflt_team_nth < KMP_MIN_NTH1) {
7402 __kmp_dflt_team_nth = KMP_MIN_NTH1;
7403 }
7404 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
7405 __kmp_dflt_team_nth = __kmp_sys_max_nth;
7406 }
7407
7408 if (__kmp_nesting_mode > 0)
7409 __kmp_set_nesting_mode_threads();
7410
7411 // There's no harm in continuing if the following check fails,
7412 // but it indicates an error in the previous logic.
7413 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub)if (!(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub)) { __kmp_debug_assert
("__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub", "openmp/runtime/src/kmp_runtime.cpp"
, 7413); }
;
7414
7415 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
7416 // Run through the __kmp_threads array and set the num threads icv for each
7417 // root thread that is currently registered with the RTL (which has not
7418 // already explicitly set its nthreads-var with a call to
7419 // omp_set_num_threads()).
7420 for (i = 0; i < __kmp_threads_capacity; i++) {
7421 kmp_info_t *thread = __kmp_threads[i];
7422 if (thread == NULL__null)
7423 continue;
7424 if (thread->th.th_current_task->td_icvs.nproc != 0)
7425 continue;
7426
7427 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth)(((__kmp_threads[i])->th.th_current_task->td_icvs.nproc
) = (__kmp_dflt_team_nth))
;
7428 }
7429 }
7430 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n"
, __kmp_dflt_team_nth); }
7431 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n"
, __kmp_dflt_team_nth); }
7432 ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n"
, __kmp_dflt_team_nth); }
7433 __kmp_dflt_team_nth))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n"
, __kmp_dflt_team_nth); }
;
7434
7435#ifdef KMP_ADJUST_BLOCKTIME1
7436 /* Adjust blocktime to zero if necessary now that __kmp_avail_proc is set */
7437 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
7438 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0)if (!(__kmp_avail_proc > 0)) { __kmp_debug_assert("__kmp_avail_proc > 0"
, "openmp/runtime/src/kmp_runtime.cpp", 7438); }
;
7439 if (__kmp_nth > __kmp_avail_proc) {
7440 __kmp_zero_bt = TRUE(!0);
7441 }
7442 }
7443#endif /* KMP_ADJUST_BLOCKTIME */
7444
7445 /* we have finished middle initialization */
7446 TCW_SYNC_4(__kmp_init_middle, TRUE)(__kmp_init_middle) = ((!0));
7447
7448 KA_TRACE(10, ("__kmp_do_middle_initialize: exit\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_do_middle_initialize: exit\n"
); }
;
7449}
7450
7451void __kmp_middle_initialize(void) {
7452 if (__kmp_init_middle) {
7453 return;
7454 }
7455 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7456 if (__kmp_init_middle) {
7457 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7458 return;
7459 }
7460 __kmp_do_middle_initialize();
7461 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7462}
7463
7464void __kmp_parallel_initialize(void) {
7465 int gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg(); // this might be a new root
7466
7467 /* synchronize parallel initialization (for sibling) */
7468 if (TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
7469 return;
7470 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7471 if (TCR_4(__kmp_init_parallel)(__kmp_init_parallel)) {
7472 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7473 return;
7474 }
7475
7476 /* TODO reinitialization after we have already shut down */
7477 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) {
7478 KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_parallel_initialize: attempt to init while shutting down\n"
); }
7479 10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_parallel_initialize: attempt to init while shutting down\n"
); }
7480 ("__kmp_parallel_initialize: attempt to init while shutting down\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_parallel_initialize: attempt to init while shutting down\n"
); }
;
7481 __kmp_infinite_loop();
7482 }
7483
7484 /* jc: The lock __kmp_initz_lock is already held, so calling
7485 __kmp_serial_initialize would cause a deadlock. So we call
7486 __kmp_do_serial_initialize directly. */
7487 if (!__kmp_init_middle) {
7488 __kmp_do_middle_initialize();
7489 }
7490 __kmp_assign_root_init_mask();
7491 __kmp_resume_if_hard_paused();
7492
7493 /* begin initialization */
7494 KA_TRACE(10, ("__kmp_parallel_initialize: enter\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_parallel_initialize: enter\n"
); }
;
7495 KMP_ASSERT(KMP_UBER_GTID(gtid))if (!(KMP_UBER_GTID(gtid))) { __kmp_debug_assert("KMP_UBER_GTID(gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 7495); }
;
7496
7497#if KMP_ARCH_X860 || KMP_ARCH_X86_641
7498 // Save the FP control regs.
7499 // Worker threads will set theirs to these values at thread startup.
7500 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7501 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7502 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK0xffffffc0;
7503#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
7504
7505#if KMP_OS_UNIX1
7506#if KMP_HANDLE_SIGNALS(1 || 0)
7507 /* must be after __kmp_serial_initialize */
7508 __kmp_install_signals(TRUE(!0));
7509#endif
7510#endif
7511
7512 __kmp_suspend_initialize();
7513
7514#if defined(USE_LOAD_BALANCE1)
7515 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7516 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7517 }
7518#else
7519 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7520 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7521 }
7522#endif
7523
7524 if (__kmp_version) {
7525 __kmp_print_version_2();
7526 }
7527
7528 /* we have finished parallel initialization */
7529 TCW_SYNC_4(__kmp_init_parallel, TRUE)(__kmp_init_parallel) = ((!0));
7530
7531 KMP_MB();
7532 KA_TRACE(10, ("__kmp_parallel_initialize: exit\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_parallel_initialize: exit\n"
); }
;
7533
7534 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7535}
7536
7537void __kmp_hidden_helper_initialize() {
7538 if (TCR_4(__kmp_init_hidden_helper)(__kmp_init_hidden_helper))
7539 return;
7540
7541 // __kmp_parallel_initialize is required before we initialize hidden helper
7542 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
7543 __kmp_parallel_initialize();
7544
7545 // Double check. Note that this double check should not be placed before
7546 // __kmp_parallel_initialize as it will cause dead lock.
7547 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7548 if (TCR_4(__kmp_init_hidden_helper)(__kmp_init_hidden_helper)) {
7549 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7550 return;
7551 }
7552
7553#if KMP_AFFINITY_SUPPORTED1
7554 // Initialize hidden helper affinity settings.
7555 // The above __kmp_parallel_initialize() will initialize
7556 // regular affinity (and topology) if not already done.
7557 if (!__kmp_hh_affinity.flags.initialized)
7558 __kmp_affinity_initialize(__kmp_hh_affinity);
7559#endif
7560
7561 // Set the count of hidden helper tasks to be executed to zero
7562 KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0)(&__kmp_unexecuted_hidden_helper_tasks)->store(0, std::
memory_order_release)
;
7563
7564 // Set the global variable indicating that we're initializing hidden helper
7565 // team/threads
7566 TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE)(__kmp_init_hidden_helper_threads) = ((!0));
7567
7568 // Platform independent initialization
7569 __kmp_do_initialize_hidden_helper_threads();
7570
7571 // Wait here for the finish of initialization of hidden helper teams
7572 __kmp_hidden_helper_threads_initz_wait();
7573
7574 // We have finished hidden helper initialization
7575 TCW_SYNC_4(__kmp_init_hidden_helper, TRUE)(__kmp_init_hidden_helper) = ((!0));
7576
7577 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7578}
7579
7580/* ------------------------------------------------------------------------ */
7581
7582void __kmp_run_before_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
7583 kmp_team_t *team) {
7584 kmp_disp_t *dispatch;
7585
7586 KMP_MB();
7587
7588 /* none of the threads have encountered any constructs, yet. */
7589 this_thr->th.th_local.this_construct = 0;
7590#if KMP_CACHE_MANAGE
7591 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7592#endif /* KMP_CACHE_MANAGE */
7593 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch)((void *)(this_thr->th.th_dispatch));
7594 KMP_DEBUG_ASSERT(dispatch)if (!(dispatch)) { __kmp_debug_assert("dispatch", "openmp/runtime/src/kmp_runtime.cpp"
, 7594); }
;
7595 KMP_DEBUG_ASSERT(team->t.t_dispatch)if (!(team->t.t_dispatch)) { __kmp_debug_assert("team->t.t_dispatch"
, "openmp/runtime/src/kmp_runtime.cpp", 7595); }
;
7596 // KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[
7597 // this_thr->th.th_info.ds.ds_tid ] );
7598
7599 dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */
7600 dispatch->th_doacross_buf_idx = 0; // reset doacross dispatch buffer counter
7601 if (__kmp_env_consistency_check)
7602 __kmp_push_parallel(gtid, team->t.t_ident);
7603
7604 KMP_MB(); /* Flush all pending memory write invalidates. */
7605}
7606
7607void __kmp_run_after_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
7608 kmp_team_t *team) {
7609 if (__kmp_env_consistency_check)
7610 __kmp_pop_parallel(gtid, team->t.t_ident);
7611
7612 __kmp_finish_implicit_task(this_thr);
7613}
7614
7615int __kmp_invoke_task_func(int gtid) {
7616 int rc;
7617 int tid = __kmp_tid_from_gtid(gtid);
7618 kmp_info_t *this_thr = __kmp_threads[gtid];
7619 kmp_team_t *team = this_thr->th.th_team;
7620
7621 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7622#if USE_ITT_BUILD1
7623 if (__itt_stack_caller_create_ptr__kmp_itt_stack_caller_create_ptr__3_0) {
7624 // inform ittnotify about entering user's code
7625 if (team->t.t_stack_id != NULL__null) {
7626 __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id);
7627 } else {
7628 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL)if (!(team->t.t_parent->t.t_stack_id != __null)) { __kmp_debug_assert
("team->t.t_parent->t.t_stack_id != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 7628); }
;
7629 __kmp_itt_stack_callee_enter(
7630 (__itt_caller)team->t.t_parent->t.t_stack_id);
7631 }
7632 }
7633#endif /* USE_ITT_BUILD */
7634#if INCLUDE_SSC_MARKS(1 && 1)
7635 SSC_MARK_INVOKING()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0xd695) : "%ebx")
;
7636#endif
7637
7638#if OMPT_SUPPORT1
7639 void *dummy;
7640 void **exit_frame_p;
7641 ompt_data_t *my_task_data;
7642 ompt_data_t *my_parallel_data;
7643 int ompt_team_size;
7644
7645 if (ompt_enabled.enabled) {
7646 exit_frame_p = &(team->t.t_implicit_task_taskdata[tid]
7647 .ompt_task_info.frame.exit_frame.ptr);
7648 } else {
7649 exit_frame_p = &dummy;
7650 }
7651
7652 my_task_data =
7653 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7654 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7655 if (ompt_enabled.ompt_callback_implicit_task) {
7656 ompt_team_size = team->t.t_nproc;
7657 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
7658 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7659 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7660 OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info))->thread_num = __kmp_tid_from_gtid(gtid);
7661 }
7662#endif
7663
7664#if KMP_STATS_ENABLED0
7665 stats_state_e previous_state = KMP_GET_THREAD_STATE()((void)0);
7666 if (previous_state == stats_state_e::TEAMS_REGION) {
7667 KMP_PUSH_PARTITIONED_TIMER(OMP_teams)((void)0);
7668 } else {
7669 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel)((void)0);
7670 }
7671 KMP_SET_THREAD_STATE(IMPLICIT_TASK)((void)0);
7672#endif
7673
7674 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn)((void *)(team->t.t_pkfn)), gtid,
7675 tid, (int)team->t.t_argc, (void **)team->t.t_argv
7676#if OMPT_SUPPORT1
7677 ,
7678 exit_frame_p
7679#endif
7680 );
7681#if OMPT_SUPPORT1
7682 *exit_frame_p = NULL__null;
7683 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
7684#endif
7685
7686#if KMP_STATS_ENABLED0
7687 if (previous_state == stats_state_e::TEAMS_REGION) {
7688 KMP_SET_THREAD_STATE(previous_state)((void)0);
7689 }
7690 KMP_POP_PARTITIONED_TIMER()((void)0);
7691#endif
7692
7693#if USE_ITT_BUILD1
7694 if (__itt_stack_caller_create_ptr__kmp_itt_stack_caller_create_ptr__3_0) {
7695 // inform ittnotify about leaving user's code
7696 if (team->t.t_stack_id != NULL__null) {
7697 __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id);
7698 } else {
7699 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL)if (!(team->t.t_parent->t.t_stack_id != __null)) { __kmp_debug_assert
("team->t.t_parent->t.t_stack_id != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 7699); }
;
7700 __kmp_itt_stack_callee_leave(
7701 (__itt_caller)team->t.t_parent->t.t_stack_id);
7702 }
7703 }
7704#endif /* USE_ITT_BUILD */
7705 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7706
7707 return rc;
7708}
7709
7710void __kmp_teams_master(int gtid) {
7711 // This routine is called by all primary threads in teams construct
7712 kmp_info_t *thr = __kmp_threads[gtid];
7713 kmp_team_t *team = thr->th.th_team;
7714 ident_t *loc = team->t.t_ident;
7715 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7716 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask)if (!(thr->th.th_teams_microtask)) { __kmp_debug_assert("thr->th.th_teams_microtask"
, "openmp/runtime/src/kmp_runtime.cpp", 7716); }
;
7717 KMP_DEBUG_ASSERT(thr->th.th_set_nproc)if (!(thr->th.th_set_nproc)) { __kmp_debug_assert("thr->th.th_set_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 7717); }
;
7718 KA_TRACE(20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n"
, gtid, __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask
); }
7719 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n"
, gtid, __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask
); }
;
7720
7721 // This thread is a new CG root. Set up the proper variables.
7722 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(sizeof(kmp_cg_root_t))___kmp_allocate((sizeof(kmp_cg_root_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 7722)
;
7723 tmp->cg_root = thr; // Make thr the CG root
7724 // Init to thread limit stored when league primary threads were forked
7725 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7726 tmp->cg_nthreads = 1; // Init counter to one active thread, this one
7727 KA_TRACE(100, ("__kmp_teams_master: Thread %p created node %p and init"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_teams_master: Thread %p created node %p and init"
" cg_nthreads to 1\n", thr, tmp); }
7728 " cg_nthreads to 1\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_teams_master: Thread %p created node %p and init"
" cg_nthreads to 1\n", thr, tmp); }
7729 thr, tmp))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_teams_master: Thread %p created node %p and init"
" cg_nthreads to 1\n", thr, tmp); }
;
7730 tmp->up = thr->th.th_cg_roots;
7731 thr->th.th_cg_roots = tmp;
7732
7733// Launch league of teams now, but not let workers execute
7734// (they hang on fork barrier until next parallel)
7735#if INCLUDE_SSC_MARKS(1 && 1)
7736 SSC_MARK_FORKING()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0xd693) : "%ebx")
;
7737#endif
7738 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7739 (microtask_t)thr->th.th_teams_microtask, // "wrapped" task
7740 VOLATILE_CAST(launch_t)(launch_t) __kmp_invoke_task_func, NULL__null);
7741#if INCLUDE_SSC_MARKS(1 && 1)
7742 SSC_MARK_JOINING()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0xd694) : "%ebx")
;
7743#endif
7744 // If the team size was reduced from the limit, set it to the new size
7745 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7746 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7747 // AC: last parameter "1" eliminates join barrier which won't work because
7748 // worker threads are in a fork barrier waiting for more parallel regions
7749 __kmp_join_call(loc, gtid
7750#if OMPT_SUPPORT1
7751 ,
7752 fork_context_intel
7753#endif
7754 ,
7755 1);
7756}
7757
7758int __kmp_invoke_teams_master(int gtid) {
7759 kmp_info_t *this_thr = __kmp_threads[gtid];
7760 kmp_team_t *team = this_thr->th.th_team;
7761#if KMP_DEBUG1
7762 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7763 KMP_DEBUG_ASSERT((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==if (!((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn
== (void *)__kmp_teams_master)) { __kmp_debug_assert("(void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn == (void *)__kmp_teams_master"
, "openmp/runtime/src/kmp_runtime.cpp", 7764); }
7764 (void *)__kmp_teams_master)if (!((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn
== (void *)__kmp_teams_master)) { __kmp_debug_assert("(void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn == (void *)__kmp_teams_master"
, "openmp/runtime/src/kmp_runtime.cpp", 7764); }
;
7765#endif
7766 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7767#if OMPT_SUPPORT1
7768 int tid = __kmp_tid_from_gtid(gtid);
7769 ompt_data_t *task_data =
7770 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7771 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7772 if (ompt_enabled.ompt_callback_implicit_task) {
7773 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
7774 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7775 ompt_task_initial);
7776 OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info))->thread_num = tid;
7777 }
7778#endif
7779 __kmp_teams_master(gtid);
7780#if OMPT_SUPPORT1
7781 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
7782#endif
7783 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7784 return 1;
7785}
7786
7787/* this sets the requested number of threads for the next parallel region
7788 encountered by this team. since this should be enclosed in the forkjoin
7789 critical section it should avoid race conditions with asymmetrical nested
7790 parallelism */
7791
7792void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) {
7793 kmp_info_t *thr = __kmp_threads[gtid];
7794
7795 if (num_threads > 0)
7796 thr->th.th_set_nproc = num_threads;
7797}
7798
7799static void __kmp_push_thread_limit(kmp_info_t *thr, int num_teams,
7800 int num_threads) {
7801 KMP_DEBUG_ASSERT(thr)if (!(thr)) { __kmp_debug_assert("thr", "openmp/runtime/src/kmp_runtime.cpp"
, 7801); }
;
7802 // Remember the number of threads for inner parallel regions
7803 if (!TCR_4(__kmp_init_middle)(__kmp_init_middle))
7804 __kmp_middle_initialize(); // get internal globals calculated
7805 __kmp_assign_root_init_mask();
7806 KMP_DEBUG_ASSERT(__kmp_avail_proc)if (!(__kmp_avail_proc)) { __kmp_debug_assert("__kmp_avail_proc"
, "openmp/runtime/src/kmp_runtime.cpp", 7806); }
;
7807 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth)if (!(__kmp_dflt_team_nth)) { __kmp_debug_assert("__kmp_dflt_team_nth"
, "openmp/runtime/src/kmp_runtime.cpp", 7807); }
;
7808
7809 if (num_threads == 0) {
7810 if (__kmp_teams_thread_limit > 0) {
7811 num_threads = __kmp_teams_thread_limit;
7812 } else {
7813 num_threads = __kmp_avail_proc / num_teams;
7814 }
7815 // adjust num_threads w/o warning as it is not user setting
7816 // num_threads = min(num_threads, nthreads-var, thread-limit-var)
7817 // no thread_limit clause specified - do not change thread-limit-var ICV
7818 if (num_threads > __kmp_dflt_team_nth) {
7819 num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV
7820 }
7821 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7822 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7823 } // prevent team size to exceed thread-limit-var
7824 if (num_teams * num_threads > __kmp_teams_max_nth) {
7825 num_threads = __kmp_teams_max_nth / num_teams;
7826 }
7827 if (num_threads == 0) {
7828 num_threads = 1;
7829 }
7830 } else {
7831 if (num_threads < 0) {
7832 __kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, 1)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, num_threads, 1
)
,
7833 __kmp_msg_null);
7834 num_threads = 1;
7835 }
7836 // This thread will be the primary thread of the league primary threads
7837 // Store new thread limit; old limit is saved in th_cg_roots list
7838 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7839 // num_threads = min(num_threads, nthreads-var)
7840 if (num_threads > __kmp_dflt_team_nth) {
7841 num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV
7842 }
7843 if (num_teams * num_threads > __kmp_teams_max_nth) {
7844 int new_threads = __kmp_teams_max_nth / num_teams;
7845 if (new_threads == 0) {
7846 new_threads = 1;
7847 }
7848 if (new_threads != num_threads) {
7849 if (!__kmp_reserve_warn) { // user asked for too many threads
7850 __kmp_reserve_warn = 1; // conflicts with KMP_TEAMS_THREAD_LIMIT
7851 __kmp_msg(kmp_ms_warning,
7852 KMP_MSG(CantFormThrTeam, num_threads, new_threads)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, num_threads, new_threads
)
,
7853 KMP_HNT(Unset_ALL_THREADS)__kmp_msg_format(kmp_i18n_hnt_Unset_ALL_THREADS), __kmp_msg_null);
7854 }
7855 }
7856 num_threads = new_threads;
7857 }
7858 }
7859 thr->th.th_teams_size.nth = num_threads;
7860}
7861
7862/* this sets the requested number of teams for the teams region and/or
7863 the number of threads for the next parallel region encountered */
7864void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
7865 int num_threads) {
7866 kmp_info_t *thr = __kmp_threads[gtid];
7867 if (num_teams < 0) {
7868 // OpenMP specification requires requested values to be positive,
7869 // but people can send us any value, so we'd better check
7870 __kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, 1)__kmp_msg_format(kmp_i18n_msg_NumTeamsNotPositive, num_teams,
1)
,
7871 __kmp_msg_null);
7872 num_teams = 1;
7873 }
7874 if (num_teams == 0) {
7875 if (__kmp_nteams > 0) {
7876 num_teams = __kmp_nteams;
7877 } else {
7878 num_teams = 1; // default number of teams is 1.
7879 }
7880 }
7881 if (num_teams > __kmp_teams_max_nth) { // if too many teams requested?
7882 if (!__kmp_reserve_warn) {
7883 __kmp_reserve_warn = 1;
7884 __kmp_msg(kmp_ms_warning,
7885 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, num_teams, __kmp_teams_max_nth
)
,
7886 KMP_HNT(Unset_ALL_THREADS)__kmp_msg_format(kmp_i18n_hnt_Unset_ALL_THREADS), __kmp_msg_null);
7887 }
7888 num_teams = __kmp_teams_max_nth;
7889 }
7890 // Set number of teams (number of threads in the outer "parallel" of the
7891 // teams)
7892 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7893
7894 __kmp_push_thread_limit(thr, num_teams, num_threads);
7895}
7896
7897/* This sets the requested number of teams for the teams region and/or
7898 the number of threads for the next parallel region encountered */
7899void __kmp_push_num_teams_51(ident_t *id, int gtid, int num_teams_lb,
7900 int num_teams_ub, int num_threads) {
7901 kmp_info_t *thr = __kmp_threads[gtid];
7902 KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0)if (!(num_teams_lb >= 0 && num_teams_ub >= 0)) {
__kmp_debug_assert("num_teams_lb >= 0 && num_teams_ub >= 0"
, "openmp/runtime/src/kmp_runtime.cpp", 7902); }
;
7903 KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb)if (!(num_teams_ub >= num_teams_lb)) { __kmp_debug_assert(
"num_teams_ub >= num_teams_lb", "openmp/runtime/src/kmp_runtime.cpp"
, 7903); }
;
7904 KMP_DEBUG_ASSERT(num_threads >= 0)if (!(num_threads >= 0)) { __kmp_debug_assert("num_threads >= 0"
, "openmp/runtime/src/kmp_runtime.cpp", 7904); }
;
7905
7906 if (num_teams_lb > num_teams_ub) {
7907 __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub)__kmp_msg_format(kmp_i18n_msg_FailedToCreateTeam, num_teams_lb
, num_teams_ub)
,
7908 KMP_HNT(SetNewBound, __kmp_teams_max_nth)__kmp_msg_format(kmp_i18n_hnt_SetNewBound, __kmp_teams_max_nth
)
, __kmp_msg_null);
7909 }
7910
7911 int num_teams = 1; // defalt number of teams is 1.
7912
7913 if (num_teams_lb == 0 && num_teams_ub > 0)
7914 num_teams_lb = num_teams_ub;
7915
7916 if (num_teams_lb == 0 && num_teams_ub == 0) { // no num_teams clause
7917 num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
7918 if (num_teams > __kmp_teams_max_nth) {
7919 if (!__kmp_reserve_warn) {
7920 __kmp_reserve_warn = 1;
7921 __kmp_msg(kmp_ms_warning,
7922 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, num_teams, __kmp_teams_max_nth
)
,
7923 KMP_HNT(Unset_ALL_THREADS)__kmp_msg_format(kmp_i18n_hnt_Unset_ALL_THREADS), __kmp_msg_null);
7924 }
7925 num_teams = __kmp_teams_max_nth;
7926 }
7927 } else if (num_teams_lb == num_teams_ub) { // requires exact number of teams
7928 num_teams = num_teams_ub;
7929 } else { // num_teams_lb <= num_teams <= num_teams_ub
7930 if (num_threads <= 0) {
7931 if (num_teams_ub > __kmp_teams_max_nth) {
7932 num_teams = num_teams_lb;
7933 } else {
7934 num_teams = num_teams_ub;
7935 }
7936 } else {
7937 num_teams = (num_threads > __kmp_teams_max_nth)
7938 ? num_teams
7939 : __kmp_teams_max_nth / num_threads;
7940 if (num_teams < num_teams_lb) {
7941 num_teams = num_teams_lb;
7942 } else if (num_teams > num_teams_ub) {
7943 num_teams = num_teams_ub;
7944 }
7945 }
7946 }
7947 // Set number of teams (number of threads in the outer "parallel" of the
7948 // teams)
7949 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7950
7951 __kmp_push_thread_limit(thr, num_teams, num_threads);
7952}
7953
7954// Set the proc_bind var to use in the following parallel region.
7955void __kmp_push_proc_bind(ident_t *id, int gtid, kmp_proc_bind_t proc_bind) {
7956 kmp_info_t *thr = __kmp_threads[gtid];
7957 thr->th.th_set_proc_bind = proc_bind;
7958}
7959
7960/* Launch the worker threads into the microtask. */
7961
7962void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team) {
7963 kmp_info_t *this_thr = __kmp_threads[gtid];
7964
7965#ifdef KMP_DEBUG1
7966 int f;
7967#endif /* KMP_DEBUG */
7968
7969 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 7969); }
;
7970 KMP_DEBUG_ASSERT(this_thr->th.th_team == team)if (!(this_thr->th.th_team == team)) { __kmp_debug_assert(
"this_thr->th.th_team == team", "openmp/runtime/src/kmp_runtime.cpp"
, 7970); }
;
7971 KMP_ASSERT(KMP_MASTER_GTID(gtid))if (!((0 == __kmp_tid_from_gtid((gtid))))) { __kmp_debug_assert
("KMP_MASTER_GTID(gtid)", "openmp/runtime/src/kmp_runtime.cpp"
, 7971); }
;
7972 KMP_MB(); /* Flush all pending memory write invalidates. */
7973
7974 team->t.t_construct = 0; /* no single directives seen yet */
7975 team->t.t_ordered.dt.t_value =
7976 0; /* thread 0 enters the ordered section first */
7977
7978 /* Reset the identifiers on the dispatch buffer */
7979 KMP_DEBUG_ASSERT(team->t.t_disp_buffer)if (!(team->t.t_disp_buffer)) { __kmp_debug_assert("team->t.t_disp_buffer"
, "openmp/runtime/src/kmp_runtime.cpp", 7979); }
;
7980 if (team->t.t_max_nproc > 1) {
7981 int i;
7982 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7983 team->t.t_disp_buffer[i].buffer_index = i;
7984 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7985 }
7986 } else {
7987 team->t.t_disp_buffer[0].buffer_index = 0;
7988 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7989 }
7990
7991 KMP_MB(); /* Flush all pending memory write invalidates. */
7992 KMP_ASSERT(this_thr->th.th_team == team)if (!(this_thr->th.th_team == team)) { __kmp_debug_assert(
"this_thr->th.th_team == team", "openmp/runtime/src/kmp_runtime.cpp"
, 7992); }
;
7993
7994#ifdef KMP_DEBUG1
7995 for (f = 0; f < team->t.t_nproc; f++) {
7996 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&if (!(team->t.t_threads[f] && team->t.t_threads
[f]->th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("team->t.t_threads[f] && team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 7997); }
7997 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc)if (!(team->t.t_threads[f] && team->t.t_threads
[f]->th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("team->t.t_threads[f] && team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 7997); }
;
7998 }
7999#endif /* KMP_DEBUG */
8000
8001 /* release the worker threads so they may begin working */
8002 __kmp_fork_barrier(gtid, 0);
8003}
8004
8005void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) {
8006 kmp_info_t *this_thr = __kmp_threads[gtid];
8007
8008 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 8008); }
;
8009 KMP_DEBUG_ASSERT(this_thr->th.th_team == team)if (!(this_thr->th.th_team == team)) { __kmp_debug_assert(
"this_thr->th.th_team == team", "openmp/runtime/src/kmp_runtime.cpp"
, 8009); }
;
8010 KMP_ASSERT(KMP_MASTER_GTID(gtid))if (!((0 == __kmp_tid_from_gtid((gtid))))) { __kmp_debug_assert
("KMP_MASTER_GTID(gtid)", "openmp/runtime/src/kmp_runtime.cpp"
, 8010); }
;
8011 KMP_MB(); /* Flush all pending memory write invalidates. */
8012
8013 /* Join barrier after fork */
8014
8015#ifdef KMP_DEBUG1
8016 if (__kmp_threads[gtid] &&
8017 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
8018 __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
8019 __kmp_threads[gtid]);
8020 __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
8021 "team->t.t_nproc=%d\n",
8022 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
8023 team->t.t_nproc);
8024 __kmp_print_structure();
8025 }
8026 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&if (!(__kmp_threads[gtid] && __kmp_threads[gtid]->
th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 8027); }
8027 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc)if (!(__kmp_threads[gtid] && __kmp_threads[gtid]->
th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 8027); }
;
8028#endif /* KMP_DEBUG */
8029
8030 __kmp_join_barrier(gtid); /* wait for everyone */
8031#if OMPT_SUPPORT1
8032 if (ompt_enabled.enabled &&
8033 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
8034 int ds_tid = this_thr->th.th_info.ds.ds_tid;
8035 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data
))
;
8036 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
8037#if OMPT_OPTIONAL1
8038 void *codeptr = NULL__null;
8039 if (KMP_MASTER_TID(ds_tid)(0 == (ds_tid)) &&
8040 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback ||
8041 ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback))
8042 codeptr = OMPT_CUR_TEAM_INFO(this_thr)(&(this_thr->th.th_team->t.ompt_team_info))->master_return_address;
8043
8044 if (ompt_enabled.ompt_callback_sync_region_wait) {
8045 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback(
8046 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL__null, task_data,
8047 codeptr);
8048 }
8049 if (ompt_enabled.ompt_callback_sync_region) {
8050 ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback(
8051 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL__null, task_data,
8052 codeptr);
8053 }
8054#endif
8055 if (!KMP_MASTER_TID(ds_tid)(0 == (ds_tid)) && ompt_enabled.ompt_callback_implicit_task) {
8056 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
8057 ompt_scope_end, NULL__null, task_data, 0, ds_tid,
8058 ompt_task_implicit); // TODO: Can this be ompt_task_initial?
8059 }
8060 }
8061#endif
8062
8063 KMP_MB(); /* Flush all pending memory write invalidates. */
8064 KMP_ASSERT(this_thr->th.th_team == team)if (!(this_thr->th.th_team == team)) { __kmp_debug_assert(
"this_thr->th.th_team == team", "openmp/runtime/src/kmp_runtime.cpp"
, 8064); }
;
8065}
8066
8067/* ------------------------------------------------------------------------ */
8068
8069#ifdef USE_LOAD_BALANCE1
8070
8071// Return the worker threads actively spinning in the hot team, if we
8072// are at the outermost level of parallelism. Otherwise, return 0.
8073static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
8074 int i;
8075 int retval;
8076 kmp_team_t *hot_team;
8077
8078 if (root->r.r_active) {
8079 return 0;
8080 }
8081 hot_team = root->r.r_hot_team;
8082 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME(2147483647)) {
8083 return hot_team->t.t_nproc - 1; // Don't count primary thread
8084 }
8085
8086 // Skip the primary thread - it is accounted for elsewhere.
8087 retval = 0;
8088 for (i = 1; i < hot_team->t.t_nproc; i++) {
8089 if (hot_team->t.t_threads[i]->th.th_active) {
8090 retval++;
8091 }
8092 }
8093 return retval;
8094}
8095
8096// Perform an automatic adjustment to the number of
8097// threads used by the next parallel region.
8098static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc) {
8099 int retval;
8100 int pool_active;
8101 int hot_team_active;
8102 int team_curr_active;
8103 int system_active;
8104
8105 KB_TRACE(20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,if (kmp_b_debug >= 20) { __kmp_debug_printf ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n"
, root, set_nproc); }
8106 set_nproc))if (kmp_b_debug >= 20) { __kmp_debug_printf ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n"
, root, set_nproc); }
;
8107 KMP_DEBUG_ASSERT(root)if (!(root)) { __kmp_debug_assert("root", "openmp/runtime/src/kmp_runtime.cpp"
, 8107); }
;
8108 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]if (!(root->r.r_root_team->t.t_threads[0] ->th.th_current_task
->td_icvs.dynamic == (!0))) { __kmp_debug_assert("root->r.r_root_team->t.t_threads[0] ->th.th_current_task->td_icvs.dynamic == (!0)"
, "openmp/runtime/src/kmp_runtime.cpp", 8109); }
8109 ->th.th_current_task->td_icvs.dynamic == TRUE)if (!(root->r.r_root_team->t.t_threads[0] ->th.th_current_task
->td_icvs.dynamic == (!0))) { __kmp_debug_assert("root->r.r_root_team->t.t_threads[0] ->th.th_current_task->td_icvs.dynamic == (!0)"
, "openmp/runtime/src/kmp_runtime.cpp", 8109); }
;
8110 KMP_DEBUG_ASSERT(set_nproc > 1)if (!(set_nproc > 1)) { __kmp_debug_assert("set_nproc > 1"
, "openmp/runtime/src/kmp_runtime.cpp", 8110); }
;
8111
8112 if (set_nproc == 1) {
8113 KB_TRACE(20, ("__kmp_load_balance_nproc: serial execution.\n"))if (kmp_b_debug >= 20) { __kmp_debug_printf ("__kmp_load_balance_nproc: serial execution.\n"
); }
;
8114 return 1;
8115 }
8116
8117 // Threads that are active in the thread pool, active in the hot team for this
8118 // particular root (if we are at the outer par level), and the currently
8119 // executing thread (to become the primary thread) are available to add to the
8120 // new team, but are currently contributing to the system load, and must be
8121 // accounted for.
8122 pool_active = __kmp_thread_pool_active_nth;
8123 hot_team_active = __kmp_active_hot_team_nproc(root);
8124 team_curr_active = pool_active + hot_team_active + 1;
8125
8126 // Check the system load.
8127 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
8128 KB_TRACE(30, ("__kmp_load_balance_nproc: system active = %d pool active = %d "if (kmp_b_debug >= 30) { __kmp_debug_printf ("__kmp_load_balance_nproc: system active = %d pool active = %d "
"hot team active = %d\n", system_active, pool_active, hot_team_active
); }
8129 "hot team active = %d\n",if (kmp_b_debug >= 30) { __kmp_debug_printf ("__kmp_load_balance_nproc: system active = %d pool active = %d "
"hot team active = %d\n", system_active, pool_active, hot_team_active
); }
8130 system_active, pool_active, hot_team_active))if (kmp_b_debug >= 30) { __kmp_debug_printf ("__kmp_load_balance_nproc: system active = %d pool active = %d "
"hot team active = %d\n", system_active, pool_active, hot_team_active
); }
;
8131
8132 if (system_active < 0) {
8133 // There was an error reading the necessary info from /proc, so use the
8134 // thread limit algorithm instead. Once we set __kmp_global.g.g_dynamic_mode
8135 // = dynamic_thread_limit, we shouldn't wind up getting back here.
8136 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
8137 KMP_WARNING(CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit")__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_CantLoadBalUsing
, "KMP_DYNAMIC_MODE=thread limit"), __kmp_msg_null)
;
8138
8139 // Make this call behave like the thread limit algorithm.
8140 retval = __kmp_avail_proc - __kmp_nth +
8141 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
8142 if (retval > set_nproc) {
8143 retval = set_nproc;
8144 }
8145 if (retval < KMP_MIN_NTH1) {
8146 retval = KMP_MIN_NTH1;
8147 }
8148
8149 KB_TRACE(20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n",if (kmp_b_debug >= 20) { __kmp_debug_printf ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n"
, retval); }
8150 retval))if (kmp_b_debug >= 20) { __kmp_debug_printf ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n"
, retval); }
;
8151 return retval;
8152 }
8153
8154 // There is a slight delay in the load balance algorithm in detecting new
8155 // running procs. The real system load at this instant should be at least as
8156 // large as the #active omp thread that are available to add to the team.
8157 if (system_active < team_curr_active) {
8158 system_active = team_curr_active;
8159 }
8160 retval = __kmp_avail_proc - system_active + team_curr_active;
8161 if (retval > set_nproc) {
8162 retval = set_nproc;
8163 }
8164 if (retval < KMP_MIN_NTH1) {
8165 retval = KMP_MIN_NTH1;
8166 }
8167
8168 KB_TRACE(20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval))if (kmp_b_debug >= 20) { __kmp_debug_printf ("__kmp_load_balance_nproc: exit. retval:%d\n"
, retval); }
;
8169 return retval;
8170} // __kmp_load_balance_nproc()
8171
8172#endif /* USE_LOAD_BALANCE */
8173
8174/* ------------------------------------------------------------------------ */
8175
8176/* NOTE: this is called with the __kmp_init_lock held */
8177void __kmp_cleanup(void) {
8178 int f;
8179
8180 KA_TRACE(10, ("__kmp_cleanup: enter\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_cleanup: enter\n"
); }
;
8181
8182 if (TCR_4(__kmp_init_parallel)(__kmp_init_parallel)) {
8183#if KMP_HANDLE_SIGNALS(1 || 0)
8184 __kmp_remove_signals();
8185#endif
8186 TCW_4(__kmp_init_parallel, FALSE)(__kmp_init_parallel) = (0);
8187 }
8188
8189 if (TCR_4(__kmp_init_middle)(__kmp_init_middle)) {
8190#if KMP_AFFINITY_SUPPORTED1
8191 __kmp_affinity_uninitialize();
8192#endif /* KMP_AFFINITY_SUPPORTED */
8193 __kmp_cleanup_hierarchy();
8194 TCW_4(__kmp_init_middle, FALSE)(__kmp_init_middle) = (0);
8195 }
8196
8197 KA_TRACE(10, ("__kmp_cleanup: go serial cleanup\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_cleanup: go serial cleanup\n"
); }
;
8198
8199 if (__kmp_init_serial) {
8200 __kmp_runtime_destroy();
8201 __kmp_init_serial = FALSE0;
8202 }
8203
8204 __kmp_cleanup_threadprivate_caches();
8205
8206 for (f = 0; f < __kmp_threads_capacity; f++) {
8207 if (__kmp_root[f] != NULL__null) {
8208 __kmp_free(__kmp_root[f])___kmp_free((__kmp_root[f]), "openmp/runtime/src/kmp_runtime.cpp"
, 8208)
;
8209 __kmp_root[f] = NULL__null;
8210 }
8211 }
8212 __kmp_free(__kmp_threads)___kmp_free((__kmp_threads), "openmp/runtime/src/kmp_runtime.cpp"
, 8212)
;
8213 // __kmp_threads and __kmp_root were allocated at once, as single block, so
8214 // there is no need in freeing __kmp_root.
8215 __kmp_threads = NULL__null;
8216 __kmp_root = NULL__null;
8217 __kmp_threads_capacity = 0;
8218
8219 // Free old __kmp_threads arrays if they exist.
8220 kmp_old_threads_list_t *ptr = __kmp_old_threads_list;
8221 while (ptr) {
8222 kmp_old_threads_list_t *next = ptr->next;
8223 __kmp_free(ptr->threads)___kmp_free((ptr->threads), "openmp/runtime/src/kmp_runtime.cpp"
, 8223)
;
8224 __kmp_free(ptr)___kmp_free((ptr), "openmp/runtime/src/kmp_runtime.cpp", 8224
)
;
8225 ptr = next;
8226 }
8227
8228#if KMP_USE_DYNAMIC_LOCK1
8229 __kmp_cleanup_indirect_user_locks();
8230#else
8231 __kmp_cleanup_user_locks();
8232#endif
8233#if OMPD_SUPPORT1
8234 if (ompd_state) {
8235 __kmp_free(ompd_env_block)___kmp_free((ompd_env_block), "openmp/runtime/src/kmp_runtime.cpp"
, 8235)
;
8236 ompd_env_block = NULL__null;
8237 ompd_env_block_size = 0;
8238 }
8239#endif
8240
8241#if KMP_AFFINITY_SUPPORTED1
8242 KMP_INTERNAL_FREE(CCAST(char *, __kmp_cpuinfo_file))free(const_cast<char *>(__kmp_cpuinfo_file));
8243 __kmp_cpuinfo_file = NULL__null;
8244#endif /* KMP_AFFINITY_SUPPORTED */
8245
8246#if KMP_USE_ADAPTIVE_LOCKS(0 || 1) && !0
8247#if KMP_DEBUG_ADAPTIVE_LOCKS0
8248 __kmp_print_speculative_stats();
8249#endif
8250#endif
8251 KMP_INTERNAL_FREE(__kmp_nested_nth.nth)free(__kmp_nested_nth.nth);
8252 __kmp_nested_nth.nth = NULL__null;
8253 __kmp_nested_nth.size = 0;
8254 __kmp_nested_nth.used = 0;
8255 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types)free(__kmp_nested_proc_bind.bind_types);
8256 __kmp_nested_proc_bind.bind_types = NULL__null;
8257 __kmp_nested_proc_bind.size = 0;
8258 __kmp_nested_proc_bind.used = 0;
8259 if (__kmp_affinity_format) {
8260 KMP_INTERNAL_FREE(__kmp_affinity_format)free(__kmp_affinity_format);
8261 __kmp_affinity_format = NULL__null;
8262 }
8263
8264 __kmp_i18n_catclose();
8265
8266#if KMP_USE_HIER_SCHED0
8267 __kmp_hier_scheds.deallocate();
8268#endif
8269
8270#if KMP_STATS_ENABLED0
8271 __kmp_stats_fini();
8272#endif
8273
8274 KA_TRACE(10, ("__kmp_cleanup: exit\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_cleanup: exit\n"
); }
;
8275}
8276
8277/* ------------------------------------------------------------------------ */
8278
8279int __kmp_ignore_mppbeg(void) {
8280 char *env;
8281
8282 if ((env = getenv("KMP_IGNORE_MPPBEG")) != NULL__null) {
8283 if (__kmp_str_match_false(env))
8284 return FALSE0;
8285 }
8286 // By default __kmpc_begin() is no-op.
8287 return TRUE(!0);
8288}
8289
8290int __kmp_ignore_mppend(void) {
8291 char *env;
8292
8293 if ((env = getenv("KMP_IGNORE_MPPEND")) != NULL__null) {
8294 if (__kmp_str_match_false(env))
8295 return FALSE0;
8296 }
8297 // By default __kmpc_end() is no-op.
8298 return TRUE(!0);
8299}
8300
8301void __kmp_internal_begin(void) {
8302 int gtid;
8303 kmp_root_t *root;
8304
8305 /* this is a very important step as it will register new sibling threads
8306 and assign these new uber threads a new gtid */
8307 gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
8308 root = __kmp_threads[gtid]->th.th_root;
8309 KMP_ASSERT(KMP_UBER_GTID(gtid))if (!(KMP_UBER_GTID(gtid))) { __kmp_debug_assert("KMP_UBER_GTID(gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 8309); }
;
8310
8311 if (root->r.r_begin)
8312 return;
8313 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
8314 if (root->r.r_begin) {
8315 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8316 return;
8317 }
8318
8319 root->r.r_begin = TRUE(!0);
8320
8321 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8322}
8323
8324/* ------------------------------------------------------------------------ */
8325
8326void __kmp_user_set_library(enum library_type arg) {
8327 int gtid;
8328 kmp_root_t *root;
8329 kmp_info_t *thread;
8330
8331 /* first, make sure we are initialized so we can get our gtid */
8332
8333 gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
8334 thread = __kmp_threads[gtid];
8335
8336 root = thread->th.th_root;
8337
8338 KA_TRACE(20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n"
, gtid, arg, library_serial); }
8339 library_serial))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n"
, gtid, arg, library_serial); }
;
8340 if (root->r.r_in_parallel) { /* Must be called in serial section of top-level
8341 thread */
8342 KMP_WARNING(SetLibraryIncorrectCall)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_SetLibraryIncorrectCall
), __kmp_msg_null)
;
8343 return;
8344 }
8345
8346 switch (arg) {
8347 case library_serial:
8348 thread->th.th_set_nproc = 0;
8349 set__nproc(thread, 1)(((thread)->th.th_current_task->td_icvs.nproc) = (1));
8350 break;
8351 case library_turnaround:
8352 thread->th.th_set_nproc = 0;
8353 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth(((thread)->th.th_current_task->td_icvs.nproc) = (__kmp_dflt_team_nth
? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub))
8354 : __kmp_dflt_team_nth_ub)(((thread)->th.th_current_task->td_icvs.nproc) = (__kmp_dflt_team_nth
? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub))
;
8355 break;
8356 case library_throughput:
8357 thread->th.th_set_nproc = 0;
8358 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth(((thread)->th.th_current_task->td_icvs.nproc) = (__kmp_dflt_team_nth
? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub))
8359 : __kmp_dflt_team_nth_ub)(((thread)->th.th_current_task->td_icvs.nproc) = (__kmp_dflt_team_nth
? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub))
;
8360 break;
8361 default:
8362 KMP_FATAL(UnknownLibraryType, arg)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_UnknownLibraryType,
arg), __kmp_msg_null)
;
8363 }
8364
8365 __kmp_aux_set_library(arg);
8366}
8367
8368void __kmp_aux_set_stacksize(size_t arg) {
8369 if (!__kmp_init_serial)
8370 __kmp_serial_initialize();
8371
8372#if KMP_OS_DARWIN0
8373 if (arg & (0x1000 - 1)) {
8374 arg &= ~(0x1000 - 1);
8375 if (arg + 0x1000) /* check for overflow if we round up */
8376 arg += 0x1000;
8377 }
8378#endif
8379 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8380
8381 /* only change the default stacksize before the first parallel region */
8382 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel)) {
8383 size_t value = arg; /* argument is in bytes */
8384
8385 if (value < __kmp_sys_min_stksize)
8386 value = __kmp_sys_min_stksize;
8387 else if (value > KMP_MAX_STKSIZE(~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)
))
)
8388 value = KMP_MAX_STKSIZE(~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)
))
;
8389
8390 __kmp_stksize = value;
8391
8392 __kmp_env_stksize = TRUE(!0); /* was KMP_STACKSIZE specified? */
8393 }
8394
8395 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8396}
8397
8398/* set the behaviour of the runtime library */
8399/* TODO this can cause some odd behaviour with sibling parallelism... */
8400void __kmp_aux_set_library(enum library_type arg) {
8401 __kmp_library = arg;
8402
8403 switch (__kmp_library) {
8404 case library_serial: {
8405 KMP_INFORM(LibraryIsSerial)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_LibraryIsSerial
), __kmp_msg_null)
;
8406 } break;
8407 case library_turnaround:
8408 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
8409 __kmp_use_yield = 2; // only yield when oversubscribed
8410 break;
8411 case library_throughput:
8412 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME(2147483647))
8413 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME(__kmp_is_hybrid_cpu() ? (0) : (200));
8414 break;
8415 default:
8416 KMP_FATAL(UnknownLibraryType, arg)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_UnknownLibraryType,
arg), __kmp_msg_null)
;
8417 }
8418}
8419
8420/* Getting team information common for all team API */
8421// Returns NULL if not in teams construct
8422static kmp_team_t *__kmp_aux_get_team_info(int &teams_serialized) {
8423 kmp_info_t *thr = __kmp_entry_thread();
8424 teams_serialized = 0;
8425 if (thr->th.th_teams_microtask) {
8426 kmp_team_t *team = thr->th.th_team;
8427 int tlevel = thr->th.th_teams_level; // the level of the teams construct
8428 int ii = team->t.t_level;
8429 teams_serialized = team->t.t_serialized;
8430 int level = tlevel + 1;
8431 KMP_DEBUG_ASSERT(ii >= tlevel)if (!(ii >= tlevel)) { __kmp_debug_assert("ii >= tlevel"
, "openmp/runtime/src/kmp_runtime.cpp", 8431); }
;
8432 while (ii > level) {
8433 for (teams_serialized = team->t.t_serialized;
8434 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
8435 }
8436 if (team->t.t_serialized && (!teams_serialized)) {
8437 team = team->t.t_parent;
8438 continue;
8439 }
8440 if (ii > level) {
8441 team = team->t.t_parent;
8442 ii--;
8443 }
8444 }
8445 return team;
8446 }
8447 return NULL__null;
8448}
8449
8450int __kmp_aux_get_team_num() {
8451 int serialized;
8452 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8453 if (team) {
8454 if (serialized > 1) {
8455 return 0; // teams region is serialized ( 1 team of 1 thread ).
8456 } else {
8457 return team->t.t_master_tid;
8458 }
8459 }
8460 return 0;
8461}
8462
8463int __kmp_aux_get_num_teams() {
8464 int serialized;
8465 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8466 if (team) {
8467 if (serialized > 1) {
8468 return 1;
8469 } else {
8470 return team->t.t_parent->t.t_nproc;
8471 }
8472 }
8473 return 1;
8474}
8475
8476/* ------------------------------------------------------------------------ */
8477
8478/*
8479 * Affinity Format Parser
8480 *
8481 * Field is in form of: %[[[0].]size]type
8482 * % and type are required (%% means print a literal '%')
8483 * type is either single char or long name surrounded by {},
8484 * e.g., N or {num_threads}
8485 * 0 => leading zeros
8486 * . => right justified when size is specified
8487 * by default output is left justified
8488 * size is the *minimum* field length
8489 * All other characters are printed as is
8490 *
8491 * Available field types:
8492 * L {thread_level} - omp_get_level()
8493 * n {thread_num} - omp_get_thread_num()
8494 * h {host} - name of host machine
8495 * P {process_id} - process id (integer)
8496 * T {thread_identifier} - native thread identifier (integer)
8497 * N {num_threads} - omp_get_num_threads()
8498 * A {ancestor_tnum} - omp_get_ancestor_thread_num(omp_get_level()-1)
8499 * a {thread_affinity} - comma separated list of integers or integer ranges
8500 * (values of affinity mask)
8501 *
8502 * Implementation-specific field types can be added
8503 * If a type is unknown, print "undefined"
8504 */
8505
8506// Structure holding the short name, long name, and corresponding data type
8507// for snprintf. A table of these will represent the entire valid keyword
8508// field types.
8509typedef struct kmp_affinity_format_field_t {
8510 char short_name; // from spec e.g., L -> thread level
8511 const char *long_name; // from spec thread_level -> thread level
8512 char field_format; // data type for snprintf (typically 'd' or 's'
8513 // for integer or string)
8514} kmp_affinity_format_field_t;
8515
8516static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
8517#if KMP_AFFINITY_SUPPORTED1
8518 {'A', "thread_affinity", 's'},
8519#endif
8520 {'t', "team_num", 'd'},
8521 {'T', "num_teams", 'd'},
8522 {'L', "nesting_level", 'd'},
8523 {'n', "thread_num", 'd'},
8524 {'N', "num_threads", 'd'},
8525 {'a', "ancestor_tnum", 'd'},
8526 {'H', "host", 's'},
8527 {'P', "process_id", 'd'},
8528 {'i', "native_thread_id", 'd'}};
8529
8530// Return the number of characters it takes to hold field
8531static int __kmp_aux_capture_affinity_field(int gtid, const kmp_info_t *th,
8532 const char **ptr,
8533 kmp_str_buf_t *field_buffer) {
8534 int rc, format_index, field_value;
8535 const char *width_left, *width_right;
8536 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
8537 static const int FORMAT_SIZE = 20;
8538 char format[FORMAT_SIZE] = {0};
8539 char absolute_short_name = 0;
8540
8541 KMP_DEBUG_ASSERT(gtid >= 0)if (!(gtid >= 0)) { __kmp_debug_assert("gtid >= 0", "openmp/runtime/src/kmp_runtime.cpp"
, 8541); }
;
8542 KMP_DEBUG_ASSERT(th)if (!(th)) { __kmp_debug_assert("th", "openmp/runtime/src/kmp_runtime.cpp"
, 8542); }
;
8543 KMP_DEBUG_ASSERT(**ptr == '%')if (!(**ptr == '%')) { __kmp_debug_assert("**ptr == '%'", "openmp/runtime/src/kmp_runtime.cpp"
, 8543); }
;
8544 KMP_DEBUG_ASSERT(field_buffer)if (!(field_buffer)) { __kmp_debug_assert("field_buffer", "openmp/runtime/src/kmp_runtime.cpp"
, 8544); }
;
8545
8546 __kmp_str_buf_clear(field_buffer);
8547
8548 // Skip the initial %
8549 (*ptr)++;
8550
8551 // Check for %% first
8552 if (**ptr == '%') {
8553 __kmp_str_buf_cat(field_buffer, "%", 1);
8554 (*ptr)++; // skip over the second %
8555 return 1;
8556 }
8557
8558 // Parse field modifiers if they are present
8559 pad_zeros = false;
8560 if (**ptr == '0') {
8561 pad_zeros = true;
8562 (*ptr)++; // skip over 0
8563 }
8564 right_justify = false;
8565 if (**ptr == '.') {
8566 right_justify = true;
8567 (*ptr)++; // skip over .
8568 }
8569 // Parse width of field: [width_left, width_right)
8570 width_left = width_right = NULL__null;
8571 if (**ptr >= '0' && **ptr <= '9') {
8572 width_left = *ptr;
8573 SKIP_DIGITS(*ptr){ while (*(*ptr) >= '0' && *(*ptr) <= '9') (*ptr
)++; }
;
8574 width_right = *ptr;
8575 }
8576
8577 // Create the format for KMP_SNPRINTF based on flags parsed above
8578 format_index = 0;
8579 format[format_index++] = '%';
8580 if (!right_justify)
8581 format[format_index++] = '-';
8582 if (pad_zeros)
8583 format[format_index++] = '0';
8584 if (width_left && width_right) {
8585 int i = 0;
8586 // Only allow 8 digit number widths.
8587 // This also prevents overflowing format variable
8588 while (i < 8 && width_left < width_right) {
8589 format[format_index++] = *width_left;
8590 width_left++;
8591 i++;
8592 }
8593 }
8594
8595 // Parse a name (long or short)
8596 // Canonicalize the name into absolute_short_name
8597 found_valid_name = false;
8598 parse_long_name = (**ptr == '{');
8599 if (parse_long_name)
8600 (*ptr)++; // skip initial left brace
8601 for (size_t i = 0; i < sizeof(__kmp_affinity_format_table) /
8602 sizeof(__kmp_affinity_format_table[0]);
8603 ++i) {
8604 char short_name = __kmp_affinity_format_table[i].short_name;
8605 const char *long_name = __kmp_affinity_format_table[i].long_name;
8606 char field_format = __kmp_affinity_format_table[i].field_format;
8607 if (parse_long_name) {
8608 size_t length = KMP_STRLENstrlen(long_name);
8609 if (strncmp(*ptr, long_name, length) == 0) {
8610 found_valid_name = true;
8611 (*ptr) += length; // skip the long name
8612 }
8613 } else if (**ptr == short_name) {
8614 found_valid_name = true;
8615 (*ptr)++; // skip the short name
8616 }
8617 if (found_valid_name) {
8618 format[format_index++] = field_format;
8619 format[format_index++] = '\0';
8620 absolute_short_name = short_name;
8621 break;
8622 }
8623 }
8624 if (parse_long_name) {
8625 if (**ptr != '}') {
8626 absolute_short_name = 0;
8627 } else {
8628 (*ptr)++; // skip over the right brace
8629 }
8630 }
8631
8632 // Attempt to fill the buffer with the requested
8633 // value using snprintf within __kmp_str_buf_print()
8634 switch (absolute_short_name) {
8635 case 't':
8636 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
8637 break;
8638 case 'T':
8639 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
8640 break;
8641 case 'L':
8642 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8643 break;
8644 case 'n':
8645 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
8646 break;
8647 case 'H': {
8648 static const int BUFFER_SIZE = 256;
8649 char buf[BUFFER_SIZE];
8650 __kmp_expand_host_name(buf, BUFFER_SIZE);
8651 rc = __kmp_str_buf_print(field_buffer, format, buf);
8652 } break;
8653 case 'P':
8654 rc = __kmp_str_buf_print(field_buffer, format, getpid());
8655 break;
8656 case 'i':
8657 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid()syscall(186));
8658 break;
8659 case 'N':
8660 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8661 break;
8662 case 'a':
8663 field_value =
8664 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8665 rc = __kmp_str_buf_print(field_buffer, format, field_value);
8666 break;
8667#if KMP_AFFINITY_SUPPORTED1
8668 case 'A': {
8669 kmp_str_buf_t buf;
8670 __kmp_str_buf_init(&buf){ (&buf)->str = (&buf)->bulk; (&buf)->size
= sizeof((&buf)->bulk); (&buf)->used = 0; (&
buf)->bulk[0] = 0; }
;
8671 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8672 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8673 __kmp_str_buf_free(&buf);
8674 } break;
8675#endif
8676 default:
8677 // According to spec, If an implementation does not have info for field
8678 // type, then "undefined" is printed
8679 rc = __kmp_str_buf_print(field_buffer, "%s", "undefined");
8680 // Skip the field
8681 if (parse_long_name) {
8682 SKIP_TOKEN(*ptr){ while ((*(*ptr) >= '0' && *(*ptr) <= '9') || (
*(*ptr) >= 'a' && *(*ptr) <= 'z') || (*(*ptr) >=
'A' && *(*ptr) <= 'Z') || *(*ptr) == '_') (*ptr)++
; }
;
8683 if (**ptr == '}')
8684 (*ptr)++;
8685 } else {
8686 (*ptr)++;
8687 }
8688 }
8689
8690 KMP_ASSERT(format_index <= FORMAT_SIZE)if (!(format_index <= FORMAT_SIZE)) { __kmp_debug_assert("format_index <= FORMAT_SIZE"
, "openmp/runtime/src/kmp_runtime.cpp", 8690); }
;
8691 return rc;
8692}
8693
8694/*
8695 * Return number of characters needed to hold the affinity string
8696 * (not including null byte character)
8697 * The resultant string is printed to buffer, which the caller can then
8698 * handle afterwards
8699 */
8700size_t __kmp_aux_capture_affinity(int gtid, const char *format,
8701 kmp_str_buf_t *buffer) {
8702 const char *parse_ptr;
8703 size_t retval;
8704 const kmp_info_t *th;
8705 kmp_str_buf_t field;
8706
8707 KMP_DEBUG_ASSERT(buffer)if (!(buffer)) { __kmp_debug_assert("buffer", "openmp/runtime/src/kmp_runtime.cpp"
, 8707); }
;
8708 KMP_DEBUG_ASSERT(gtid >= 0)if (!(gtid >= 0)) { __kmp_debug_assert("gtid >= 0", "openmp/runtime/src/kmp_runtime.cpp"
, 8708); }
;
8709
8710 __kmp_str_buf_init(&field){ (&field)->str = (&field)->bulk; (&field)->
size = sizeof((&field)->bulk); (&field)->used =
0; (&field)->bulk[0] = 0; }
;
8711 __kmp_str_buf_clear(buffer);
8712
8713 th = __kmp_threads[gtid];
8714 retval = 0;
8715
8716 // If format is NULL or zero-length string, then we use
8717 // affinity-format-var ICV
8718 parse_ptr = format;
8719 if (parse_ptr == NULL__null || *parse_ptr == '\0') {
8720 parse_ptr = __kmp_affinity_format;
8721 }
8722 KMP_DEBUG_ASSERT(parse_ptr)if (!(parse_ptr)) { __kmp_debug_assert("parse_ptr", "openmp/runtime/src/kmp_runtime.cpp"
, 8722); }
;
8723
8724 while (*parse_ptr != '\0') {
8725 // Parse a field
8726 if (*parse_ptr == '%') {
8727 // Put field in the buffer
8728 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8729 __kmp_str_buf_catbuf(buffer, &field);
8730 retval += rc;
8731 } else {
8732 // Put literal character in buffer
8733 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8734 retval++;
8735 parse_ptr++;
8736 }
8737 }
8738 __kmp_str_buf_free(&field);
8739 return retval;
8740}
8741
8742// Displays the affinity string to stdout
8743void __kmp_aux_display_affinity(int gtid, const char *format) {
8744 kmp_str_buf_t buf;
8745 __kmp_str_buf_init(&buf){ (&buf)->str = (&buf)->bulk; (&buf)->size
= sizeof((&buf)->bulk); (&buf)->used = 0; (&
buf)->bulk[0] = 0; }
;
8746 __kmp_aux_capture_affinity(gtid, format, &buf);
8747 __kmp_fprintf(kmp_out, "%s" KMP_END_OF_LINE"\n", buf.str);
8748 __kmp_str_buf_free(&buf);
8749}
8750
8751/* ------------------------------------------------------------------------ */
8752
8753void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid) {
8754 int blocktime = arg; /* argument is in milliseconds */
8755#if KMP_USE_MONITOR
8756 int bt_intervals;
8757#endif
8758 kmp_int8 bt_set;
8759
8760 __kmp_save_internal_controls(thread);
8761
8762 /* Normalize and set blocktime for the teams */
8763 if (blocktime < KMP_MIN_BLOCKTIME(0))
8764 blocktime = KMP_MIN_BLOCKTIME(0);
8765 else if (blocktime > KMP_MAX_BLOCKTIME(2147483647))
8766 blocktime = KMP_MAX_BLOCKTIME(2147483647);
8767
8768 set__blocktime_team(thread->th.th_team, tid, blocktime)(((thread->th.th_team)->t.t_threads[(tid)]->th.th_current_task
->td_icvs.blocktime) = (blocktime))
;
8769 set__blocktime_team(thread->th.th_serial_team, 0, blocktime)(((thread->th.th_serial_team)->t.t_threads[(0)]->th.
th_current_task->td_icvs.blocktime) = (blocktime))
;
8770
8771#if KMP_USE_MONITOR
8772 /* Calculate and set blocktime intervals for the teams */
8773 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8774
8775 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8776 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8777#endif
8778
8779 /* Set whether blocktime has been set to "TRUE" */
8780 bt_set = TRUE(!0);
8781
8782 set__bt_set_team(thread->th.th_team, tid, bt_set)(((thread->th.th_team)->t.t_threads[(tid)]->th.th_current_task
->td_icvs.bt_set) = (bt_set))
;
8783 set__bt_set_team(thread->th.th_serial_team, 0, bt_set)(((thread->th.th_serial_team)->t.t_threads[(0)]->th.
th_current_task->td_icvs.bt_set) = (bt_set))
;
8784#if KMP_USE_MONITOR
8785 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
"bt_intervals=%d, monitor_updates=%d\n", __kmp_gtid_from_tid
(tid, thread->th.th_team), thread->th.th_team->t.t_id
, tid, blocktime, bt_intervals, __kmp_monitor_wakeups); }
8786 "bt_intervals=%d, monitor_updates=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
"bt_intervals=%d, monitor_updates=%d\n", __kmp_gtid_from_tid
(tid, thread->th.th_team), thread->th.th_team->t.t_id
, tid, blocktime, bt_intervals, __kmp_monitor_wakeups); }
8787 __kmp_gtid_from_tid(tid, thread->th.th_team),if (kmp_f_debug >= 10) { __kmp_debug_printf ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
"bt_intervals=%d, monitor_updates=%d\n", __kmp_gtid_from_tid
(tid, thread->th.th_team), thread->th.th_team->t.t_id
, tid, blocktime, bt_intervals, __kmp_monitor_wakeups); }
8788 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,if (kmp_f_debug >= 10) { __kmp_debug_printf ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
"bt_intervals=%d, monitor_updates=%d\n", __kmp_gtid_from_tid
(tid, thread->th.th_team), thread->th.th_team->t.t_id
, tid, blocktime, bt_intervals, __kmp_monitor_wakeups); }
8789 __kmp_monitor_wakeups))if (kmp_f_debug >= 10) { __kmp_debug_printf ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
"bt_intervals=%d, monitor_updates=%d\n", __kmp_gtid_from_tid
(tid, thread->th.th_team), thread->th.th_team->t.t_id
, tid, blocktime, bt_intervals, __kmp_monitor_wakeups); }
;
8790#else
8791 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n"
, __kmp_gtid_from_tid(tid, thread->th.th_team), thread->
th.th_team->t.t_id, tid, blocktime); }
8792 __kmp_gtid_from_tid(tid, thread->th.th_team),if (kmp_f_debug >= 10) { __kmp_debug_printf ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n"
, __kmp_gtid_from_tid(tid, thread->th.th_team), thread->
th.th_team->t.t_id, tid, blocktime); }
8793 thread->th.th_team->t.t_id, tid, blocktime))if (kmp_f_debug >= 10) { __kmp_debug_printf ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n"
, __kmp_gtid_from_tid(tid, thread->th.th_team), thread->
th.th_team->t.t_id, tid, blocktime); }
;
8794#endif
8795}
8796
8797void __kmp_aux_set_defaults(char const *str, size_t len) {
8798 if (!__kmp_init_serial) {
8799 __kmp_serial_initialize();
8800 }
8801 __kmp_env_initialize(str);
8802
8803 if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {
8804 __kmp_env_print();
8805 }
8806} // __kmp_aux_set_defaults
8807
8808/* ------------------------------------------------------------------------ */
8809/* internal fast reduction routines */
8810
8811PACKED_REDUCTION_METHOD_T
8812__kmp_determine_reduction_method(
8813 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
8814 void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
8815 kmp_critical_name *lck) {
8816
8817 // Default reduction method: critical construct ( lck != NULL, like in current
8818 // PAROPT )
8819 // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method
8820 // can be selected by RTL
8821 // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method
8822 // can be selected by RTL
8823 // Finally, it's up to OpenMP RTL to make a decision on which method to select
8824 // among generated by PAROPT.
8825
8826 PACKED_REDUCTION_METHOD_T retval;
8827
8828 int team_size;
8829
8830 KMP_DEBUG_ASSERT(loc)if (!(loc)) { __kmp_debug_assert("loc", "openmp/runtime/src/kmp_runtime.cpp"
, 8830); }
; // it would be nice to test ( loc != 0 )
8831 KMP_DEBUG_ASSERT(lck)if (!(lck)) { __kmp_debug_assert("lck", "openmp/runtime/src/kmp_runtime.cpp"
, 8831); }
; // it would be nice to test ( lck != 0 )
8832
8833#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8834 (loc && \
8835 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)))
8836#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8837
8838 retval = critical_reduce_block;
8839
8840 // another choice of getting a team size (with 1 dynamic deference) is slower
8841 team_size = __kmp_get_team_num_threads(global_tid)(__kmp_threads[(global_tid)]->th.th_team->t.t_nproc);
8842 if (team_size == 1) {
8843
8844 retval = empty_reduce_block;
8845
8846 } else {
8847
8848 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8849
8850#if KMP_ARCH_X86_641 || KMP_ARCH_PPC64(0 || 0) || KMP_ARCH_AARCH640 || \
8851 KMP_ARCH_MIPS640 || KMP_ARCH_RISCV640 || KMP_ARCH_LOONGARCH640
8852
8853#if KMP_OS_LINUX1 || KMP_OS_DRAGONFLY0 || KMP_OS_FREEBSD0 || KMP_OS_NETBSD0 || \
8854 KMP_OS_OPENBSD0 || KMP_OS_WINDOWS0 || KMP_OS_DARWIN0 || KMP_OS_HURD0
8855
8856 int teamsize_cutoff = 4;
8857
8858#if KMP_MIC_SUPPORTED((0 || 1) && (1 || 0))
8859 if (__kmp_mic_type != non_mic) {
8860 teamsize_cutoff = 8;
8861 }
8862#endif
8863 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8864 if (tree_available) {
8865 if (team_size <= teamsize_cutoff) {
8866 if (atomic_available) {
8867 retval = atomic_reduce_block;
8868 }
8869 } else {
8870 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER(((tree_reduce_block) | (bs_reduction_barrier)));
8871 }
8872 } else if (atomic_available) {
8873 retval = atomic_reduce_block;
8874 }
8875#else
8876#error "Unknown or unsupported OS"
8877#endif // KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||
8878 // KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
8879
8880#elif KMP_ARCH_X860 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS0
8881
8882#if KMP_OS_LINUX1 || KMP_OS_FREEBSD0 || KMP_OS_WINDOWS0 || KMP_OS_HURD0
8883
8884 // basic tuning
8885
8886 if (atomic_available) {
8887 if (num_vars <= 2) { // && ( team_size <= 8 ) due to false-sharing ???
8888 retval = atomic_reduce_block;
8889 }
8890 } // otherwise: use critical section
8891
8892#elif KMP_OS_DARWIN0
8893
8894 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8895 if (atomic_available && (num_vars <= 3)) {
8896 retval = atomic_reduce_block;
8897 } else if (tree_available) {
8898 if ((reduce_size > (9 * sizeof(kmp_real64))) &&
8899 (reduce_size < (2000 * sizeof(kmp_real64)))) {
8900 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER(((tree_reduce_block) | (bs_plain_barrier)));
8901 }
8902 } // otherwise: use critical section
8903
8904#else
8905#error "Unknown or unsupported OS"
8906#endif
8907
8908#else
8909#error "Unknown or unsupported architecture"
8910#endif
8911 }
8912
8913 // KMP_FORCE_REDUCTION
8914
8915 // If the team is serialized (team_size == 1), ignore the forced reduction
8916 // method and stay with the unsynchronized method (empty_reduce_block)
8917 if (__kmp_force_reduction_method != reduction_method_not_defined &&
8918 team_size != 1) {
8919
8920 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
8921
8922 int atomic_available, tree_available;
8923
8924 switch ((forced_retval = __kmp_force_reduction_method)) {
8925 case critical_reduce_block:
8926 KMP_ASSERT(lck)if (!(lck)) { __kmp_debug_assert("lck", "openmp/runtime/src/kmp_runtime.cpp"
, 8926); }
; // lck should be != 0
8927 break;
8928
8929 case atomic_reduce_block:
8930 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8931 if (!atomic_available) {
8932 KMP_WARNING(RedMethodNotSupported, "atomic")__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_RedMethodNotSupported
, "atomic"), __kmp_msg_null)
;
8933 forced_retval = critical_reduce_block;
8934 }
8935 break;
8936
8937 case tree_reduce_block:
8938 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8939 if (!tree_available) {
8940 KMP_WARNING(RedMethodNotSupported, "tree")__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_RedMethodNotSupported
, "tree"), __kmp_msg_null)
;
8941 forced_retval = critical_reduce_block;
8942 } else {
8943#if KMP_FAST_REDUCTION_BARRIER1
8944 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER(((tree_reduce_block) | (bs_reduction_barrier)));
8945#endif
8946 }
8947 break;
8948
8949 default:
8950 KMP_ASSERT(0)if (!(0)) { __kmp_debug_assert("0", "openmp/runtime/src/kmp_runtime.cpp"
, 8950); }
; // "unsupported method specified"
8951 }
8952
8953 retval = forced_retval;
8954 }
8955
8956 KA_TRACE(10, ("reduction method selected=%08x\n", retval))if (kmp_a_debug >= 10) { __kmp_debug_printf ("reduction method selected=%08x\n"
, retval); }
;
8957
8958#undef FAST_REDUCTION_TREE_METHOD_GENERATED
8959#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
8960
8961 return (retval);
8962}
8963// this function is for testing set/get/determine reduce method
8964kmp_int32 __kmp_get_reduce_method(void) {
8965 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
8966}
8967
8968// Soft pause sets up threads to ignore blocktime and just go to sleep.
8969// Spin-wait code checks __kmp_pause_status and reacts accordingly.
8970void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
8971
8972// Hard pause shuts down the runtime completely. Resume happens naturally when
8973// OpenMP is used subsequently.
8974void __kmp_hard_pause() {
8975 __kmp_pause_status = kmp_hard_paused;
8976 __kmp_internal_end_thread(-1);
8977}
8978
8979// Soft resume sets __kmp_pause_status, and wakes up all threads.
8980void __kmp_resume_if_soft_paused() {
8981 if (__kmp_pause_status == kmp_soft_paused) {
8982 __kmp_pause_status = kmp_not_paused;
8983
8984 for (int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
8985 kmp_info_t *thread = __kmp_threads[gtid];
8986 if (thread) { // Wake it if sleeping
8987 kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
8988 thread);
8989 if (fl.is_sleeping())
8990 fl.resume(gtid);
8991 else if (__kmp_try_suspend_mx(thread)) { // got suspend lock
8992 __kmp_unlock_suspend_mx(thread); // unlock it; it won't sleep
8993 } else { // thread holds the lock and may sleep soon
8994 do { // until either the thread sleeps, or we can get the lock
8995 if (fl.is_sleeping()) {
8996 fl.resume(gtid);
8997 break;
8998 } else if (__kmp_try_suspend_mx(thread)) {
8999 __kmp_unlock_suspend_mx(thread);
9000 break;
9001 }
9002 } while (1);
9003 }
9004 }
9005 }
9006 }
9007}
9008
9009// This function is called via __kmpc_pause_resource. Returns 0 if successful.
9010// TODO: add warning messages
9011int __kmp_pause_resource(kmp_pause_status_t level) {
9012 if (level == kmp_not_paused) { // requesting resume
9013 if (__kmp_pause_status == kmp_not_paused) {
9014 // error message about runtime not being paused, so can't resume
9015 return 1;
9016 } else {
9017 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||if (!(__kmp_pause_status == kmp_soft_paused || __kmp_pause_status
== kmp_hard_paused)) { __kmp_debug_assert("__kmp_pause_status == kmp_soft_paused || __kmp_pause_status == kmp_hard_paused"
, "openmp/runtime/src/kmp_runtime.cpp", 9018); }
9018 __kmp_pause_status == kmp_hard_paused)if (!(__kmp_pause_status == kmp_soft_paused || __kmp_pause_status
== kmp_hard_paused)) { __kmp_debug_assert("__kmp_pause_status == kmp_soft_paused || __kmp_pause_status == kmp_hard_paused"
, "openmp/runtime/src/kmp_runtime.cpp", 9018); }
;
9019 __kmp_pause_status = kmp_not_paused;
9020 return 0;
9021 }
9022 } else if (level == kmp_soft_paused) { // requesting soft pause
9023 if (__kmp_pause_status != kmp_not_paused) {
9024 // error message about already being paused
9025 return 1;
9026 } else {
9027 __kmp_soft_pause();
9028 return 0;
9029 }
9030 } else if (level == kmp_hard_paused) { // requesting hard pause
9031 if (__kmp_pause_status != kmp_not_paused) {
9032 // error message about already being paused
9033 return 1;
9034 } else {
9035 __kmp_hard_pause();
9036 return 0;
9037 }
9038 } else {
9039 // error message about invalid level
9040 return 1;
9041 }
9042}
9043
9044void __kmp_omp_display_env(int verbose) {
9045 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
9046 if (__kmp_init_serial == 0)
9047 __kmp_do_serial_initialize();
9048 __kmp_display_env_impl(!verbose, verbose);
9049 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
9050}
9051
9052// The team size is changing, so distributed barrier must be modified
9053void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads,
9054 int new_nthreads) {
9055 KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==if (!(__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar
)) { __kmp_debug_assert("__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar"
, "openmp/runtime/src/kmp_runtime.cpp", 9056); }
9056 bp_dist_bar)if (!(__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar
)) { __kmp_debug_assert("__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar"
, "openmp/runtime/src/kmp_runtime.cpp", 9056); }
;
9057 kmp_info_t **other_threads = team->t.t_threads;
9058
9059 // We want all the workers to stop waiting on the barrier while we adjust the
9060 // size of the team.
9061 for (int f = 1; f < old_nthreads; ++f) {
9062 KMP_DEBUG_ASSERT(other_threads[f] != NULL)if (!(other_threads[f] != __null)) { __kmp_debug_assert("other_threads[f] != __null"
, "openmp/runtime/src/kmp_runtime.cpp", 9062); }
;
9063 // Ignore threads that are already inactive or not present in the team
9064 if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
9065 // teams construct causes thread_limit to get passed in, and some of
9066 // those could be inactive; just ignore them
9067 continue;
9068 }
9069 // If thread is transitioning still to in_use state, wait for it
9070 if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
9071 while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
9072 KMP_CPU_PAUSE()__kmp_x86_pause();
9073 }
9074 // The thread should be in_use now
9075 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1)if (!(team->t.t_threads[f]->th.th_used_in_team.load() ==
1)) { __kmp_debug_assert("team->t.t_threads[f]->th.th_used_in_team.load() == 1"
, "openmp/runtime/src/kmp_runtime.cpp", 9075); }
;
9076 // Transition to unused state
9077 team->t.t_threads[f]->th.th_used_in_team.store(2);
9078 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2)if (!(team->t.t_threads[f]->th.th_used_in_team.load() ==
2)) { __kmp_debug_assert("team->t.t_threads[f]->th.th_used_in_team.load() == 2"
, "openmp/runtime/src/kmp_runtime.cpp", 9078); }
;
9079 }
9080 // Release all the workers
9081 team->t.b->go_release();
9082
9083 KMP_MFENCE()if (__builtin_expect(!!(!__kmp_cpuinfo.initialized), 0)) { __kmp_query_cpuid
(&__kmp_cpuinfo); } if (__kmp_cpuinfo.flags.sse2) { __sync_synchronize
(); }
;
9084
9085 // Workers should see transition status 2 and move to 0; but may need to be
9086 // woken up first
9087 int count = old_nthreads - 1;
9088 while (count > 0) {
9089 count = old_nthreads - 1;
9090 for (int f = 1; f < old_nthreads; ++f) {
9091 if (other_threads[f]->th.th_used_in_team.load() != 0) {
9092 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) { // Wake up the workers
9093 kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(const_cast<void *>(other_threads[f]->th.th_sleep_loc
)
9094 void *, other_threads[f]->th.th_sleep_loc)const_cast<void *>(other_threads[f]->th.th_sleep_loc
)
;
9095 __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
9096 }
9097 } else {
9098 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0)if (!(team->t.t_threads[f]->th.th_used_in_team.load() ==
0)) { __kmp_debug_assert("team->t.t_threads[f]->th.th_used_in_team.load() == 0"
, "openmp/runtime/src/kmp_runtime.cpp", 9098); }
;
9099 count--;
9100 }
9101 }
9102 }
9103 // Now update the barrier size
9104 team->t.b->update_num_threads(new_nthreads);
9105 team->t.b->go_reset();
9106}
9107
9108void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads) {
9109 // Add the threads back to the team
9110 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 9110); }
;
9111 // Threads were paused and pointed at th_used_in_team temporarily during a
9112 // resize of the team. We're going to set th_used_in_team to 3 to indicate to
9113 // the thread that it should transition itself back into the team. Then, if
9114 // blocktime isn't infinite, the thread could be sleeping, so we send a resume
9115 // to wake it up.
9116 for (int f = 1; f < new_nthreads; ++f) {
9117 KMP_DEBUG_ASSERT(team->t.t_threads[f])if (!(team->t.t_threads[f])) { __kmp_debug_assert("team->t.t_threads[f]"
, "openmp/runtime/src/kmp_runtime.cpp", 9117); }
;
9118 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0,__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(team
->t.t_threads[f]->th.th_used_in_team)), (kmp_uint32)(0)
, (kmp_uint32)(3))
9119 3)__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(team
->t.t_threads[f]->th.th_used_in_team)), (kmp_uint32)(0)
, (kmp_uint32)(3))
;
9120 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) { // Wake up sleeping threads
9121 __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
9122 (kmp_flag_32<false, false> *)NULL__null);
9123 }
9124 }
9125 // The threads should be transitioning to the team; when they are done, they
9126 // should have set th_used_in_team to 1. This loop forces master to wait until
9127 // all threads have moved into the team and are waiting in the barrier.
9128 int count = new_nthreads - 1;
9129 while (count > 0) {
9130 count = new_nthreads - 1;
9131 for (int f = 1; f < new_nthreads; ++f) {
9132 if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
9133 count--;
9134 }
9135 }
9136 }
9137}
9138
9139// Globals and functions for hidden helper task
9140kmp_info_t **__kmp_hidden_helper_threads;
9141kmp_info_t *__kmp_hidden_helper_main_thread;
9142std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
9143#if KMP_OS_LINUX1
9144kmp_int32 __kmp_hidden_helper_threads_num = 8;
9145kmp_int32 __kmp_enable_hidden_helper = TRUE(!0);
9146#else
9147kmp_int32 __kmp_hidden_helper_threads_num = 0;
9148kmp_int32 __kmp_enable_hidden_helper = FALSE0;
9149#endif
9150
9151namespace {
9152std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;
9153
9154void __kmp_hidden_helper_wrapper_fn(int *gtid, int *, ...) {
9155 // This is an explicit synchronization on all hidden helper threads in case
9156 // that when a regular thread pushes a hidden helper task to one hidden
9157 // helper thread, the thread has not been awaken once since they're released
9158 // by the main thread after creating the team.
9159 KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num)(&__kmp_hit_hidden_helper_threads_num)->fetch_add(1, std
::memory_order_acq_rel)
;
9160 while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num)(&__kmp_hit_hidden_helper_threads_num)->load(std::memory_order_acquire
)
!=
9161 __kmp_hidden_helper_threads_num)
9162 ;
9163
9164 // If main thread, then wait for signal
9165 if (__kmpc_master(nullptr, *gtid)) {
9166 // First, unset the initial state and release the initial thread
9167 TCW_4(__kmp_init_hidden_helper_threads, FALSE)(__kmp_init_hidden_helper_threads) = (0);
9168 __kmp_hidden_helper_initz_release();
9169 __kmp_hidden_helper_main_thread_wait();
9170 // Now wake up all worker threads
9171 for (int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) {
9172 __kmp_hidden_helper_worker_thread_signal();
9173 }
9174 }
9175}
9176} // namespace
9177
9178void __kmp_hidden_helper_threads_initz_routine() {
9179 // Create a new root for hidden helper team/threads
9180 const int gtid = __kmp_register_root(TRUE(!0));
9181 __kmp_hidden_helper_main_thread = __kmp_threads[gtid];
9182 __kmp_hidden_helper_threads = &__kmp_threads[gtid];
9183 __kmp_hidden_helper_main_thread->th.th_set_nproc =
9184 __kmp_hidden_helper_threads_num;
9185
9186 KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0)(&__kmp_hit_hidden_helper_threads_num)->store(0, std::
memory_order_release)
;
9187
9188 __kmpc_fork_call(nullptr, 0, __kmp_hidden_helper_wrapper_fn);
9189
9190 // Set the initialization flag to FALSE
9191 TCW_SYNC_4(__kmp_init_hidden_helper, FALSE)(__kmp_init_hidden_helper) = (0);
9192
9193 __kmp_hidden_helper_threads_deinitz_release();
9194}
9195
9196/* Nesting Mode:
9197 Set via KMP_NESTING_MODE, which takes an integer.
9198 Note: we skip duplicate topology levels, and skip levels with only
9199 one entity.
9200 KMP_NESTING_MODE=0 is the default, and doesn't use nesting mode.
9201 KMP_NESTING_MODE=1 sets as many nesting levels as there are distinct levels
9202 in the topology, and initializes the number of threads at each of those
9203 levels to the number of entities at each level, respectively, below the
9204 entity at the parent level.
9205 KMP_NESTING_MODE=N, where N>1, attempts to create up to N nesting levels,
9206 but starts with nesting OFF -- max-active-levels-var is 1 -- and requires
9207 the user to turn nesting on explicitly. This is an even more experimental
9208 option to this experimental feature, and may change or go away in the
9209 future.
9210*/
9211
9212// Allocate space to store nesting levels
9213void __kmp_init_nesting_mode() {
9214 int levels = KMP_HW_LAST;
9215 __kmp_nesting_mode_nlevels = levels;
9216 __kmp_nesting_nth_level = (int *)KMP_INTERNAL_MALLOC(levels * sizeof(int))malloc(levels * sizeof(int));
9217 for (int i = 0; i < levels; ++i)
9218 __kmp_nesting_nth_level[i] = 0;
9219 if (__kmp_nested_nth.size < levels) {
9220 __kmp_nested_nth.nth =
9221 (int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels * sizeof(int))realloc((__kmp_nested_nth.nth), (levels * sizeof(int)));
9222 __kmp_nested_nth.size = levels;
9223 }
9224}
9225
9226// Set # threads for top levels of nesting; must be called after topology set
9227void __kmp_set_nesting_mode_threads() {
9228 kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()__kmp_get_global_thread_id_reg()];
9229
9230 if (__kmp_nesting_mode == 1)
9231 __kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT2147483647;
9232 else if (__kmp_nesting_mode > 1)
9233 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9234
9235 if (__kmp_topology) { // use topology info
9236 int loc, hw_level;
9237 for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() &&
9238 loc < __kmp_nesting_mode_nlevels;
9239 loc++, hw_level++) {
9240 __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level);
9241 if (__kmp_nesting_nth_level[loc] == 1)
9242 loc--;
9243 }
9244 // Make sure all cores are used
9245 if (__kmp_nesting_mode > 1 && loc > 1) {
9246 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
9247 int num_cores = __kmp_topology->get_count(core_level);
9248 int upper_levels = 1;
9249 for (int level = 0; level < loc - 1; ++level)
9250 upper_levels *= __kmp_nesting_nth_level[level];
9251 if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores)
9252 __kmp_nesting_nth_level[loc - 1] =
9253 num_cores / __kmp_nesting_nth_level[loc - 2];
9254 }
9255 __kmp_nesting_mode_nlevels = loc;
9256 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9257 } else { // no topology info available; provide a reasonable guesstimation
9258 if (__kmp_avail_proc >= 4) {
9259 __kmp_nesting_nth_level[0] = __kmp_avail_proc / 2;
9260 __kmp_nesting_nth_level[1] = 2;
9261 __kmp_nesting_mode_nlevels = 2;
9262 } else {
9263 __kmp_nesting_nth_level[0] = __kmp_avail_proc;
9264 __kmp_nesting_mode_nlevels = 1;
9265 }
9266 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9267 }
9268 for (int i = 0; i < __kmp_nesting_mode_nlevels; ++i) {
9269 __kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i];
9270 }
9271 set__nproc(thread, __kmp_nesting_nth_level[0])(((thread)->th.th_current_task->td_icvs.nproc) = (__kmp_nesting_nth_level
[0]))
;
9272 if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode)
9273 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9274 if (get__max_active_levels(thread)((thread)->th.th_current_task->td_icvs.max_active_levels
)
> 1) {
9275 // if max levels was set, set nesting mode levels to same
9276 __kmp_nesting_mode_nlevels = get__max_active_levels(thread)((thread)->th.th_current_task->td_icvs.max_active_levels
)
;
9277 }
9278 if (__kmp_nesting_mode == 1) // turn on nesting for this case only
9279 set__max_active_levels(thread, __kmp_nesting_mode_nlevels)(((thread)->th.th_current_task->td_icvs.max_active_levels
) = (__kmp_nesting_mode_nlevels))
;
9280}
9281
9282// Empty symbols to export (see exports_so.txt) when feature is disabled
9283extern "C" {
9284#if !KMP_STATS_ENABLED0
9285void __kmp_reset_stats() {}
9286#endif
9287#if !USE_DEBUGGER0
9288int __kmp_omp_debug_struct_info = FALSE0;
9289int __kmp_debugging = FALSE0;
9290#endif
9291#if !USE_ITT_BUILD1 || !USE_ITT_NOTIFY1
9292void __kmp_itt_fini_ittlib() {}
9293void __kmp_itt_init_ittlib() {}
9294#endif
9295}
9296
9297// end of file

/build/source/openmp/runtime/src/kmp.h

1/*! \file */
2/*
3 * kmp.h -- KPTS runtime header file.
4 */
5
6//===----------------------------------------------------------------------===//
7//
8// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9// See https://llvm.org/LICENSE.txt for license information.
10// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef KMP_H
15#define KMP_H
16
17#include "kmp_config.h"
18
19/* #define BUILD_PARALLEL_ORDERED 1 */
20
21/* This fix replaces gettimeofday with clock_gettime for better scalability on
22 the Altix. Requires user code to be linked with -lrt. */
23//#define FIX_SGI_CLOCK
24
25/* Defines for OpenMP 3.0 tasking and auto scheduling */
26
27#ifndef KMP_STATIC_STEAL_ENABLED1
28#define KMP_STATIC_STEAL_ENABLED1 1
29#endif
30
31#define TASK_CURRENT_NOT_QUEUED0 0
32#define TASK_CURRENT_QUEUED1 1
33
34#ifdef BUILD_TIED_TASK_STACK
35#define TASK_STACK_EMPTY 0 // entries when the stack is empty
36#define TASK_STACK_BLOCK_BITS 5 // Used in TASK_STACK_SIZE and TASK_STACK_MASK
37// Number of entries in each task stack array
38#define TASK_STACK_BLOCK_SIZE (1 << TASK_STACK_BLOCK_BITS)
39// Mask for determining index into stack block
40#define TASK_STACK_INDEX_MASK (TASK_STACK_BLOCK_SIZE - 1)
41#endif // BUILD_TIED_TASK_STACK
42
43#define TASK_NOT_PUSHED1 1
44#define TASK_SUCCESSFULLY_PUSHED0 0
45#define TASK_TIED1 1
46#define TASK_UNTIED0 0
47#define TASK_EXPLICIT1 1
48#define TASK_IMPLICIT0 0
49#define TASK_PROXY1 1
50#define TASK_FULL0 0
51#define TASK_DETACHABLE1 1
52#define TASK_UNDETACHABLE0 0
53
54#define KMP_CANCEL_THREADS
55#define KMP_THREAD_ATTR
56
57// Android does not have pthread_cancel. Undefine KMP_CANCEL_THREADS if being
58// built on Android
59#if defined(__ANDROID__)
60#undef KMP_CANCEL_THREADS
61#endif
62
63#include <signal.h>
64#include <stdarg.h>
65#include <stddef.h>
66#include <stdio.h>
67#include <stdlib.h>
68#include <string.h>
69#include <limits>
70#include <type_traits>
71/* include <ctype.h> don't use; problems with /MD on Windows* OS NT due to bad
72 Microsoft library. Some macros provided below to replace these functions */
73#ifndef __ABSOFT_WIN
74#include <sys/types.h>
75#endif
76#include <limits.h>
77#include <time.h>
78
79#include <errno(*__errno_location ()).h>
80
81#include "kmp_os.h"
82
83#include "kmp_safe_c_api.h"
84
85#if KMP_STATS_ENABLED0
86class kmp_stats_list;
87#endif
88
89#if KMP_USE_HIER_SCHED0
90// Only include hierarchical scheduling if affinity is supported
91#undef KMP_USE_HIER_SCHED0
92#define KMP_USE_HIER_SCHED0 KMP_AFFINITY_SUPPORTED1
93#endif
94
95#if KMP_USE_HWLOC0 && KMP_AFFINITY_SUPPORTED1
96#include "hwloc.h"
97#ifndef HWLOC_OBJ_NUMANODE
98#define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE
99#endif
100#ifndef HWLOC_OBJ_PACKAGE
101#define HWLOC_OBJ_PACKAGE HWLOC_OBJ_SOCKET
102#endif
103#endif
104
105#if KMP_ARCH_X860 || KMP_ARCH_X86_641
106#include <xmmintrin.h>
107#endif
108
109// The below has to be defined before including "kmp_barrier.h".
110#define KMP_INTERNAL_MALLOC(sz)malloc(sz) malloc(sz)
111#define KMP_INTERNAL_FREE(p)free(p) free(p)
112#define KMP_INTERNAL_REALLOC(p, sz)realloc((p), (sz)) realloc((p), (sz))
113#define KMP_INTERNAL_CALLOC(n, sz)calloc((n), (sz)) calloc((n), (sz))
114
115#include "kmp_debug.h"
116#include "kmp_lock.h"
117#include "kmp_version.h"
118#include "kmp_barrier.h"
119#if USE_DEBUGGER0
120#include "kmp_debugger.h"
121#endif
122#include "kmp_i18n.h"
123
124#define KMP_HANDLE_SIGNALS(1 || 0) (KMP_OS_UNIX1 || KMP_OS_WINDOWS0)
125
126#include "kmp_wrapper_malloc.h"
127#if KMP_OS_UNIX1
128#include <unistd.h>
129#if !defined NSIG(64 + 1) && defined _NSIG(64 + 1)
130#define NSIG(64 + 1) _NSIG(64 + 1)
131#endif
132#endif
133
134#if KMP_OS_LINUX1
135#pragma weak clock_gettime
136#endif
137
138#if OMPT_SUPPORT1
139#include "ompt-internal.h"
140#endif
141
142#if OMPD_SUPPORT1
143#include "ompd-specific.h"
144#endif
145
146#ifndef UNLIKELY
147#define UNLIKELY(x)__builtin_expect(!!(x), 0) (x)
148#endif
149
150// Affinity format function
151#include "kmp_str.h"
152
153// 0 - no fast memory allocation, alignment: 8-byte on x86, 16-byte on x64.
154// 3 - fast allocation using sync, non-sync free lists of any size, non-self
155// free lists of limited size.
156#ifndef USE_FAST_MEMORY3
157#define USE_FAST_MEMORY3 3
158#endif
159
160#ifndef KMP_NESTED_HOT_TEAMS1
161#define KMP_NESTED_HOT_TEAMS1 0
162#define USE_NESTED_HOT_ARG(x), x
163#else
164#if KMP_NESTED_HOT_TEAMS1
165#define USE_NESTED_HOT_ARG(x), x , x
166#else
167#define USE_NESTED_HOT_ARG(x), x
168#endif
169#endif
170
171// Assume using BGET compare_exchange instruction instead of lock by default.
172#ifndef USE_CMP_XCHG_FOR_BGET1
173#define USE_CMP_XCHG_FOR_BGET1 1
174#endif
175
176// Test to see if queuing lock is better than bootstrap lock for bget
177// #ifndef USE_QUEUING_LOCK_FOR_BGET
178// #define USE_QUEUING_LOCK_FOR_BGET
179// #endif
180
181#define KMP_NSEC_PER_SEC1000000000L 1000000000L
182#define KMP_USEC_PER_SEC1000000L 1000000L
183
184/*!
185@ingroup BASIC_TYPES
186@{
187*/
188
189/*!
190Values for bit flags used in the ident_t to describe the fields.
191*/
192enum {
193 /*! Use trampoline for internal microtasks */
194 KMP_IDENT_IMB = 0x01,
195 /*! Use c-style ident structure */
196 KMP_IDENT_KMPC = 0x02,
197 /* 0x04 is no longer used */
198 /*! Entry point generated by auto-parallelization */
199 KMP_IDENT_AUTOPAR = 0x08,
200 /*! Compiler generates atomic reduction option for kmpc_reduce* */
201 KMP_IDENT_ATOMIC_REDUCE = 0x10,
202 /*! To mark a 'barrier' directive in user code */
203 KMP_IDENT_BARRIER_EXPL = 0x20,
204 /*! To Mark implicit barriers. */
205 KMP_IDENT_BARRIER_IMPL = 0x0040,
206 KMP_IDENT_BARRIER_IMPL_MASK = 0x01C0,
207 KMP_IDENT_BARRIER_IMPL_FOR = 0x0040,
208 KMP_IDENT_BARRIER_IMPL_SECTIONS = 0x00C0,
209
210 KMP_IDENT_BARRIER_IMPL_SINGLE = 0x0140,
211 KMP_IDENT_BARRIER_IMPL_WORKSHARE = 0x01C0,
212
213 /*! To mark a static loop in OMPT callbacks */
214 KMP_IDENT_WORK_LOOP = 0x200,
215 /*! To mark a sections directive in OMPT callbacks */
216 KMP_IDENT_WORK_SECTIONS = 0x400,
217 /*! To mark a distribute construct in OMPT callbacks */
218 KMP_IDENT_WORK_DISTRIBUTE = 0x800,
219 /*! Atomic hint; bottom four bits as omp_sync_hint_t. Top four reserved and
220 not currently used. If one day we need more bits, then we can use
221 an invalid combination of hints to mean that another, larger field
222 should be used in a different flag. */
223 KMP_IDENT_ATOMIC_HINT_MASK = 0xFF0000,
224 KMP_IDENT_ATOMIC_HINT_UNCONTENDED = 0x010000,
225 KMP_IDENT_ATOMIC_HINT_CONTENDED = 0x020000,
226 KMP_IDENT_ATOMIC_HINT_NONSPECULATIVE = 0x040000,
227 KMP_IDENT_ATOMIC_HINT_SPECULATIVE = 0x080000,
228 KMP_IDENT_OPENMP_SPEC_VERSION_MASK = 0xFF000000
229};
230
231/*!
232 * The ident structure that describes a source location.
233 */
234typedef struct ident {
235 kmp_int32 reserved_1; /**< might be used in Fortran; see above */
236 kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC
237 identifies this union member */
238 kmp_int32 reserved_2; /**< not really used in Fortran any more; see above */
239#if USE_ITT_BUILD1
240/* but currently used for storing region-specific ITT */
241/* contextual information. */
242#endif /* USE_ITT_BUILD */
243 kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for C++ */
244 char const *psource; /**< String describing the source location.
245 The string is composed of semi-colon separated fields
246 which describe the source file, the function and a pair
247 of line numbers that delimit the construct. */
248 // Returns the OpenMP version in form major*10+minor (e.g., 50 for 5.0)
249 kmp_int32 get_openmp_version() {
250 return (((flags & KMP_IDENT_OPENMP_SPEC_VERSION_MASK) >> 24) & 0xFF);
251 }
252} ident_t;
253/*!
254@}
255*/
256
257// Some forward declarations.
258typedef union kmp_team kmp_team_t;
259typedef struct kmp_taskdata kmp_taskdata_t;
260typedef union kmp_task_team kmp_task_team_t;
261typedef union kmp_team kmp_team_p;
262typedef union kmp_info kmp_info_p;
263typedef union kmp_root kmp_root_p;
264
265template <bool C = false, bool S = true> class kmp_flag_32;
266template <bool C = false, bool S = true> class kmp_flag_64;
267template <bool C = false, bool S = true> class kmp_atomic_flag_64;
268class kmp_flag_oncore;
269
270#ifdef __cplusplus201703L
271extern "C" {
272#endif
273
274/* ------------------------------------------------------------------------ */
275
276/* Pack two 32-bit signed integers into a 64-bit signed integer */
277/* ToDo: Fix word ordering for big-endian machines. */
278#define KMP_PACK_64(HIGH_32, LOW_32)((kmp_int64)((((kmp_uint64)(HIGH_32)) << 32) | (kmp_uint64
)(LOW_32)))
\
279 ((kmp_int64)((((kmp_uint64)(HIGH_32)) << 32) | (kmp_uint64)(LOW_32)))
280
281// Generic string manipulation macros. Assume that _x is of type char *
282#define SKIP_WS(_x){ while (*(_x) == ' ' || *(_x) == '\t') (_x)++; } \
283 { \
284 while (*(_x) == ' ' || *(_x) == '\t') \
285 (_x)++; \
286 }
287#define SKIP_DIGITS(_x){ while (*(_x) >= '0' && *(_x) <= '9') (_x)++; } \
288 { \
289 while (*(_x) >= '0' && *(_x) <= '9') \
290 (_x)++; \
291 }
292#define SKIP_TOKEN(_x){ while ((*(_x) >= '0' && *(_x) <= '9') || (*(_x
) >= 'a' && *(_x) <= 'z') || (*(_x) >= 'A' &&
*(_x) <= 'Z') || *(_x) == '_') (_x)++; }
\
293 { \
294 while ((*(_x) >= '0' && *(_x) <= '9') || (*(_x) >= 'a' && *(_x) <= 'z') || \
295 (*(_x) >= 'A' && *(_x) <= 'Z') || *(_x) == '_') \
296 (_x)++; \
297 }
298#define SKIP_TO(_x, _c){ while (*(_x) != '\0' && *(_x) != (_c)) (_x)++; } \
299 { \
300 while (*(_x) != '\0' && *(_x) != (_c)) \
301 (_x)++; \
302 }
303
304/* ------------------------------------------------------------------------ */
305
306#define KMP_MAX(x, y)((x) > (y) ? (x) : (y)) ((x) > (y) ? (x) : (y))
307#define KMP_MIN(x, y)((x) < (y) ? (x) : (y)) ((x) < (y) ? (x) : (y))
308
309/* ------------------------------------------------------------------------ */
310/* Enumeration types */
311
312enum kmp_state_timer {
313 ts_stop,
314 ts_start,
315 ts_pause,
316
317 ts_last_state
318};
319
320enum dynamic_mode {
321 dynamic_default,
322#ifdef USE_LOAD_BALANCE1
323 dynamic_load_balance,
324#endif /* USE_LOAD_BALANCE */
325 dynamic_random,
326 dynamic_thread_limit,
327 dynamic_max
328};
329
330/* external schedule constants, duplicate enum omp_sched in omp.h in order to
331 * not include it here */
332#ifndef KMP_SCHED_TYPE_DEFINED
333#define KMP_SCHED_TYPE_DEFINED
334typedef enum kmp_sched {
335 kmp_sched_lower = 0, // lower and upper bounds are for routine parameter check
336 // Note: need to adjust __kmp_sch_map global array in case enum is changed
337 kmp_sched_static = 1, // mapped to kmp_sch_static_chunked (33)
338 kmp_sched_dynamic = 2, // mapped to kmp_sch_dynamic_chunked (35)
339 kmp_sched_guided = 3, // mapped to kmp_sch_guided_chunked (36)
340 kmp_sched_auto = 4, // mapped to kmp_sch_auto (38)
341 kmp_sched_upper_std = 5, // upper bound for standard schedules
342 kmp_sched_lower_ext = 100, // lower bound of Intel extension schedules
343 kmp_sched_trapezoidal = 101, // mapped to kmp_sch_trapezoidal (39)
344#if KMP_STATIC_STEAL_ENABLED1
345 kmp_sched_static_steal = 102, // mapped to kmp_sch_static_steal (44)
346#endif
347 kmp_sched_upper,
348 kmp_sched_default = kmp_sched_static, // default scheduling
349 kmp_sched_monotonic = 0x80000000
350} kmp_sched_t;
351#endif
352
353/*!
354 @ingroup WORK_SHARING
355 * Describes the loop schedule to be used for a parallel for loop.
356 */
357enum sched_type : kmp_int32 {
358 kmp_sch_lower = 32, /**< lower bound for unordered values */
359 kmp_sch_static_chunked = 33,
360 kmp_sch_static = 34, /**< static unspecialized */
361 kmp_sch_dynamic_chunked = 35,
362 kmp_sch_guided_chunked = 36, /**< guided unspecialized */
363 kmp_sch_runtime = 37,
364 kmp_sch_auto = 38, /**< auto */
365 kmp_sch_trapezoidal = 39,
366
367 /* accessible only through KMP_SCHEDULE environment variable */
368 kmp_sch_static_greedy = 40,
369 kmp_sch_static_balanced = 41,
370 /* accessible only through KMP_SCHEDULE environment variable */
371 kmp_sch_guided_iterative_chunked = 42,
372 kmp_sch_guided_analytical_chunked = 43,
373 /* accessible only through KMP_SCHEDULE environment variable */
374 kmp_sch_static_steal = 44,
375
376 /* static with chunk adjustment (e.g., simd) */
377 kmp_sch_static_balanced_chunked = 45,
378 kmp_sch_guided_simd = 46, /**< guided with chunk adjustment */
379 kmp_sch_runtime_simd = 47, /**< runtime with chunk adjustment */
380
381 /* accessible only through KMP_SCHEDULE environment variable */
382 kmp_sch_upper, /**< upper bound for unordered values */
383
384 kmp_ord_lower = 64, /**< lower bound for ordered values, must be power of 2 */
385 kmp_ord_static_chunked = 65,
386 kmp_ord_static = 66, /**< ordered static unspecialized */
387 kmp_ord_dynamic_chunked = 67,
388 kmp_ord_guided_chunked = 68,
389 kmp_ord_runtime = 69,
390 kmp_ord_auto = 70, /**< ordered auto */
391 kmp_ord_trapezoidal = 71,
392 kmp_ord_upper, /**< upper bound for ordered values */
393
394 /* Schedules for Distribute construct */
395 kmp_distribute_static_chunked = 91, /**< distribute static chunked */
396 kmp_distribute_static = 92, /**< distribute static unspecialized */
397
398 /* For the "nomerge" versions, kmp_dispatch_next*() will always return a
399 single iteration/chunk, even if the loop is serialized. For the schedule
400 types listed above, the entire iteration vector is returned if the loop is
401 serialized. This doesn't work for gcc/gcomp sections. */
402 kmp_nm_lower = 160, /**< lower bound for nomerge values */
403
404 kmp_nm_static_chunked =
405 (kmp_sch_static_chunked - kmp_sch_lower + kmp_nm_lower),
406 kmp_nm_static = 162, /**< static unspecialized */
407 kmp_nm_dynamic_chunked = 163,
408 kmp_nm_guided_chunked = 164, /**< guided unspecialized */
409 kmp_nm_runtime = 165,
410 kmp_nm_auto = 166, /**< auto */
411 kmp_nm_trapezoidal = 167,
412
413 /* accessible only through KMP_SCHEDULE environment variable */
414 kmp_nm_static_greedy = 168,
415 kmp_nm_static_balanced = 169,
416 /* accessible only through KMP_SCHEDULE environment variable */
417 kmp_nm_guided_iterative_chunked = 170,
418 kmp_nm_guided_analytical_chunked = 171,
419 kmp_nm_static_steal =
420 172, /* accessible only through OMP_SCHEDULE environment variable */
421
422 kmp_nm_ord_static_chunked = 193,
423 kmp_nm_ord_static = 194, /**< ordered static unspecialized */
424 kmp_nm_ord_dynamic_chunked = 195,
425 kmp_nm_ord_guided_chunked = 196,
426 kmp_nm_ord_runtime = 197,
427 kmp_nm_ord_auto = 198, /**< auto */
428 kmp_nm_ord_trapezoidal = 199,
429 kmp_nm_upper, /**< upper bound for nomerge values */
430
431 /* Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. Since
432 we need to distinguish the three possible cases (no modifier, monotonic
433 modifier, nonmonotonic modifier), we need separate bits for each modifier.
434 The absence of monotonic does not imply nonmonotonic, especially since 4.5
435 says that the behaviour of the "no modifier" case is implementation defined
436 in 4.5, but will become "nonmonotonic" in 5.0.
437
438 Since we're passing a full 32 bit value, we can use a couple of high bits
439 for these flags; out of paranoia we avoid the sign bit.
440
441 These modifiers can be or-ed into non-static schedules by the compiler to
442 pass the additional information. They will be stripped early in the
443 processing in __kmp_dispatch_init when setting up schedules, so most of the
444 code won't ever see schedules with these bits set. */
445 kmp_sch_modifier_monotonic =
446 (1 << 29), /**< Set if the monotonic schedule modifier was present */
447 kmp_sch_modifier_nonmonotonic =
448 (1 << 30), /**< Set if the nonmonotonic schedule modifier was present */
449
450#define SCHEDULE_WITHOUT_MODIFIERS(s)(enum sched_type)( (s) & ~(kmp_sch_modifier_nonmonotonic |
kmp_sch_modifier_monotonic))
\
451 (enum sched_type)( \
452 (s) & ~(kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic))
453#define SCHEDULE_HAS_MONOTONIC(s)(((s)&kmp_sch_modifier_monotonic) != 0) (((s)&kmp_sch_modifier_monotonic) != 0)
454#define SCHEDULE_HAS_NONMONOTONIC(s)(((s)&kmp_sch_modifier_nonmonotonic) != 0) (((s)&kmp_sch_modifier_nonmonotonic) != 0)
455#define SCHEDULE_HAS_NO_MODIFIERS(s)(((s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic
)) == 0)
\
456 (((s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)) == 0)
457#define SCHEDULE_GET_MODIFIERS(s)((enum sched_type)( (s) & (kmp_sch_modifier_nonmonotonic |
kmp_sch_modifier_monotonic)))
\
458 ((enum sched_type)( \
459 (s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)))
460#define SCHEDULE_SET_MODIFIERS(s, m)(s = (enum sched_type)((kmp_int32)s | (kmp_int32)m)) \
461 (s = (enum sched_type)((kmp_int32)s | (kmp_int32)m))
462#define SCHEDULE_NONMONOTONIC0 0
463#define SCHEDULE_MONOTONIC1 1
464
465 kmp_sch_default = kmp_sch_static /**< default scheduling algorithm */
466};
467
468// Apply modifiers on internal kind to standard kind
469static inline void
470__kmp_sched_apply_mods_stdkind(kmp_sched_t *kind,
471 enum sched_type internal_kind) {
472 if (SCHEDULE_HAS_MONOTONIC(internal_kind)(((internal_kind)&kmp_sch_modifier_monotonic) != 0)) {
473 *kind = (kmp_sched_t)((int)*kind | (int)kmp_sched_monotonic);
474 }
475}
476
477// Apply modifiers on standard kind to internal kind
478static inline void
479__kmp_sched_apply_mods_intkind(kmp_sched_t kind,
480 enum sched_type *internal_kind) {
481 if ((int)kind & (int)kmp_sched_monotonic) {
482 *internal_kind = (enum sched_type)((int)*internal_kind |
483 (int)kmp_sch_modifier_monotonic);
484 }
485}
486
487// Get standard schedule without modifiers
488static inline kmp_sched_t __kmp_sched_without_mods(kmp_sched_t kind) {
489 return (kmp_sched_t)((int)kind & ~((int)kmp_sched_monotonic));
490}
491
492/* Type to keep runtime schedule set via OMP_SCHEDULE or omp_set_schedule() */
493typedef union kmp_r_sched {
494 struct {
495 enum sched_type r_sched_type;
496 int chunk;
497 };
498 kmp_int64 sched;
499} kmp_r_sched_t;
500
501extern enum sched_type __kmp_sch_map[]; // map OMP 3.0 schedule types with our
502// internal schedule types
503
504enum library_type {
505 library_none,
506 library_serial,
507 library_turnaround,
508 library_throughput
509};
510
511#if KMP_OS_LINUX1
512enum clock_function_type {
513 clock_function_gettimeofday,
514 clock_function_clock_gettime
515};
516#endif /* KMP_OS_LINUX */
517
518#if KMP_MIC_SUPPORTED((0 || 1) && (1 || 0))
519enum mic_type { non_mic, mic1, mic2, mic3, dummy };
520#endif
521
522/* -- fast reduction stuff ------------------------------------------------ */
523
524#undef KMP_FAST_REDUCTION_BARRIER1
525#define KMP_FAST_REDUCTION_BARRIER1 1
526
527#undef KMP_FAST_REDUCTION_CORE_DUO1
528#if KMP_ARCH_X860 || KMP_ARCH_X86_641
529#define KMP_FAST_REDUCTION_CORE_DUO1 1
530#endif
531
532enum _reduction_method {
533 reduction_method_not_defined = 0,
534 critical_reduce_block = (1 << 8),
535 atomic_reduce_block = (2 << 8),
536 tree_reduce_block = (3 << 8),
537 empty_reduce_block = (4 << 8)
538};
539
540// Description of the packed_reduction_method variable:
541// The packed_reduction_method variable consists of two enum types variables
542// that are packed together into 0-th byte and 1-st byte:
543// 0: (packed_reduction_method & 0x000000FF) is a 'enum barrier_type' value of
544// barrier that will be used in fast reduction: bs_plain_barrier or
545// bs_reduction_barrier
546// 1: (packed_reduction_method & 0x0000FF00) is a reduction method that will
547// be used in fast reduction;
548// Reduction method is of 'enum _reduction_method' type and it's defined the way
549// so that the bits of 0-th byte are empty, so no need to execute a shift
550// instruction while packing/unpacking
551
552#if KMP_FAST_REDUCTION_BARRIER1
553#define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method, barrier_type)((reduction_method) | (barrier_type)) \
554 ((reduction_method) | (barrier_type))
555
556#define UNPACK_REDUCTION_METHOD(packed_reduction_method)((enum _reduction_method)((packed_reduction_method) & (0x0000FF00
)))
\
557 ((enum _reduction_method)((packed_reduction_method) & (0x0000FF00)))
558
559#define UNPACK_REDUCTION_BARRIER(packed_reduction_method)((enum barrier_type)((packed_reduction_method) & (0x000000FF
)))
\
560 ((enum barrier_type)((packed_reduction_method) & (0x000000FF)))
561#else
562#define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method, barrier_type)((reduction_method) | (barrier_type)) \
563 (reduction_method)
564
565#define UNPACK_REDUCTION_METHOD(packed_reduction_method)((enum _reduction_method)((packed_reduction_method) & (0x0000FF00
)))
\
566 (packed_reduction_method)
567
568#define UNPACK_REDUCTION_BARRIER(packed_reduction_method)((enum barrier_type)((packed_reduction_method) & (0x000000FF
)))
(bs_plain_barrier)
569#endif
570
571#define TEST_REDUCTION_METHOD(packed_reduction_method, which_reduction_block)((((enum _reduction_method)((packed_reduction_method) & (
0x0000FF00)))) == (which_reduction_block))
\
572 ((UNPACK_REDUCTION_METHOD(packed_reduction_method)((enum _reduction_method)((packed_reduction_method) & (0x0000FF00
)))
) == \
573 (which_reduction_block))
574
575#if KMP_FAST_REDUCTION_BARRIER1
576#define TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER(((tree_reduce_block) | (bs_reduction_barrier))) \
577 (PACK_REDUCTION_METHOD_AND_BARRIER(tree_reduce_block, bs_reduction_barrier)((tree_reduce_block) | (bs_reduction_barrier)))
578
579#define TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER(((tree_reduce_block) | (bs_plain_barrier))) \
580 (PACK_REDUCTION_METHOD_AND_BARRIER(tree_reduce_block, bs_plain_barrier)((tree_reduce_block) | (bs_plain_barrier)))
581#endif
582
583typedef int PACKED_REDUCTION_METHOD_T;
584
585/* -- end of fast reduction stuff ----------------------------------------- */
586
587#if KMP_OS_WINDOWS0
588#define USE_CBLKDATA
589#if KMP_MSVC_COMPAT0
590#pragma warning(push)
591#pragma warning(disable : 271 310)
592#endif
593#include <windows.h>
594#if KMP_MSVC_COMPAT0
595#pragma warning(pop)
596#endif
597#endif
598
599#if KMP_OS_UNIX1
600#include <dlfcn.h>
601#include <pthread.h>
602#endif
603
604enum kmp_hw_t : int {
605 KMP_HW_UNKNOWN = -1,
606 KMP_HW_SOCKET = 0,
607 KMP_HW_PROC_GROUP,
608 KMP_HW_NUMA,
609 KMP_HW_DIE,
610 KMP_HW_LLC,
611 KMP_HW_L3,
612 KMP_HW_TILE,
613 KMP_HW_MODULE,
614 KMP_HW_L2,
615 KMP_HW_L1,
616 KMP_HW_CORE,
617 KMP_HW_THREAD,
618 KMP_HW_LAST
619};
620
621typedef enum kmp_hw_core_type_t {
622 KMP_HW_CORE_TYPE_UNKNOWN = 0x0,
623#if KMP_ARCH_X860 || KMP_ARCH_X86_641
624 KMP_HW_CORE_TYPE_ATOM = 0x20,
625 KMP_HW_CORE_TYPE_CORE = 0x40,
626 KMP_HW_MAX_NUM_CORE_TYPES = 3,
627#else
628 KMP_HW_MAX_NUM_CORE_TYPES = 1,
629#endif
630} kmp_hw_core_type_t;
631
632#define KMP_HW_MAX_NUM_CORE_EFFS8 8
633
634#define KMP_DEBUG_ASSERT_VALID_HW_TYPE(type)if (!(type >= (kmp_hw_t)0 && type < KMP_HW_LAST
)) { __kmp_debug_assert("type >= (kmp_hw_t)0 && type < KMP_HW_LAST"
, "openmp/runtime/src/kmp.h", 634); }
\
635 KMP_DEBUG_ASSERT(type >= (kmp_hw_t)0 && type < KMP_HW_LAST)if (!(type >= (kmp_hw_t)0 && type < KMP_HW_LAST
)) { __kmp_debug_assert("type >= (kmp_hw_t)0 && type < KMP_HW_LAST"
, "openmp/runtime/src/kmp.h", 635); }
636#define KMP_ASSERT_VALID_HW_TYPE(type)if (!(type >= (kmp_hw_t)0 && type < KMP_HW_LAST
)) { __kmp_debug_assert("type >= (kmp_hw_t)0 && type < KMP_HW_LAST"
, "openmp/runtime/src/kmp.h", 636); }
\
637 KMP_ASSERT(type >= (kmp_hw_t)0 && type < KMP_HW_LAST)if (!(type >= (kmp_hw_t)0 && type < KMP_HW_LAST
)) { __kmp_debug_assert("type >= (kmp_hw_t)0 && type < KMP_HW_LAST"
, "openmp/runtime/src/kmp.h", 637); }
638
639#define KMP_FOREACH_HW_TYPE(type)for (kmp_hw_t type = (kmp_hw_t)0; type < KMP_HW_LAST; type
= (kmp_hw_t)((int)type + 1))
\
640 for (kmp_hw_t type = (kmp_hw_t)0; type < KMP_HW_LAST; \
641 type = (kmp_hw_t)((int)type + 1))
642
643const char *__kmp_hw_get_keyword(kmp_hw_t type, bool plural = false);
644const char *__kmp_hw_get_catalog_string(kmp_hw_t type, bool plural = false);
645const char *__kmp_hw_get_core_type_string(kmp_hw_core_type_t type);
646
647/* Only Linux* OS and Windows* OS support thread affinity. */
648#if KMP_AFFINITY_SUPPORTED1
649
650// GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later).
651#if KMP_OS_WINDOWS0
652#if _MSC_VER < 1600 && KMP_MSVC_COMPAT0
653typedef struct GROUP_AFFINITY {
654 KAFFINITY Mask;
655 WORD Group;
656 WORD Reserved[3];
657} GROUP_AFFINITY;
658#endif /* _MSC_VER < 1600 */
659#if KMP_GROUP_AFFINITY0
660extern int __kmp_num_proc_groups;
661#else
662static const int __kmp_num_proc_groups = 1;
663#endif /* KMP_GROUP_AFFINITY */
664typedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD);
665extern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount;
666
667typedef WORD (*kmp_GetActiveProcessorGroupCount_t)(void);
668extern kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount;
669
670typedef BOOL (*kmp_GetThreadGroupAffinity_t)(HANDLE, GROUP_AFFINITY *);
671extern kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity;
672
673typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *,
674 GROUP_AFFINITY *);
675extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity;
676#endif /* KMP_OS_WINDOWS */
677
678#if KMP_USE_HWLOC0
679extern hwloc_topology_t __kmp_hwloc_topology;
680extern int __kmp_hwloc_error;
681#endif
682
683extern size_t __kmp_affin_mask_size;
684#define KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0) (__kmp_affin_mask_size > 0)
685#define KMP_AFFINITY_DISABLE()(__kmp_affin_mask_size = 0) (__kmp_affin_mask_size = 0)
686#define KMP_AFFINITY_ENABLE(mask_size)(__kmp_affin_mask_size = mask_size) (__kmp_affin_mask_size = mask_size)
687#define KMP_CPU_SET_ITERATE(i, mask)for (i = (mask)->begin(); (int)i != (mask)->end(); i = (
mask)->next(i))
\
688 for (i = (mask)->begin(); (int)i != (mask)->end(); i = (mask)->next(i))
689#define KMP_CPU_SET(i, mask)(mask)->set(i) (mask)->set(i)
690#define KMP_CPU_ISSET(i, mask)(mask)->is_set(i) (mask)->is_set(i)
691#define KMP_CPU_CLR(i, mask)(mask)->clear(i) (mask)->clear(i)
692#define KMP_CPU_ZERO(mask)(mask)->zero() (mask)->zero()
693#define KMP_CPU_COPY(dest, src)(dest)->copy(src) (dest)->copy(src)
694#define KMP_CPU_AND(dest, src)(dest)->bitwise_and(src) (dest)->bitwise_and(src)
695#define KMP_CPU_COMPLEMENT(max_bit_number, mask)(mask)->bitwise_not() (mask)->bitwise_not()
696#define KMP_CPU_UNION(dest, src)(dest)->bitwise_or(src) (dest)->bitwise_or(src)
697#define KMP_CPU_ALLOC(ptr)(ptr = __kmp_affinity_dispatch->allocate_mask()) (ptr = __kmp_affinity_dispatch->allocate_mask())
698#define KMP_CPU_FREE(ptr)__kmp_affinity_dispatch->deallocate_mask(ptr) __kmp_affinity_dispatch->deallocate_mask(ptr)
699#define KMP_CPU_ALLOC_ON_STACK(ptr)(ptr = __kmp_affinity_dispatch->allocate_mask()) KMP_CPU_ALLOC(ptr)(ptr = __kmp_affinity_dispatch->allocate_mask())
700#define KMP_CPU_FREE_FROM_STACK(ptr)__kmp_affinity_dispatch->deallocate_mask(ptr) KMP_CPU_FREE(ptr)__kmp_affinity_dispatch->deallocate_mask(ptr)
701#define KMP_CPU_INTERNAL_ALLOC(ptr)(ptr = __kmp_affinity_dispatch->allocate_mask()) KMP_CPU_ALLOC(ptr)(ptr = __kmp_affinity_dispatch->allocate_mask())
702#define KMP_CPU_INTERNAL_FREE(ptr)__kmp_affinity_dispatch->deallocate_mask(ptr) KMP_CPU_FREE(ptr)__kmp_affinity_dispatch->deallocate_mask(ptr)
703#define KMP_CPU_INDEX(arr, i)__kmp_affinity_dispatch->index_mask_array(arr, i) __kmp_affinity_dispatch->index_mask_array(arr, i)
704#define KMP_CPU_ALLOC_ARRAY(arr, n)(arr = __kmp_affinity_dispatch->allocate_mask_array(n)) \
705 (arr = __kmp_affinity_dispatch->allocate_mask_array(n))
706#define KMP_CPU_FREE_ARRAY(arr, n)__kmp_affinity_dispatch->deallocate_mask_array(arr) \
707 __kmp_affinity_dispatch->deallocate_mask_array(arr)
708#define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n)(arr = __kmp_affinity_dispatch->allocate_mask_array(n)) KMP_CPU_ALLOC_ARRAY(arr, n)(arr = __kmp_affinity_dispatch->allocate_mask_array(n))
709#define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n)__kmp_affinity_dispatch->deallocate_mask_array(arr) KMP_CPU_FREE_ARRAY(arr, n)__kmp_affinity_dispatch->deallocate_mask_array(arr)
710#define __kmp_get_system_affinity(mask, abort_bool)(mask)->get_system_affinity(abort_bool) \
711 (mask)->get_system_affinity(abort_bool)
712#define __kmp_set_system_affinity(mask, abort_bool)(mask)->set_system_affinity(abort_bool) \
713 (mask)->set_system_affinity(abort_bool)
714#define __kmp_get_proc_group(mask)(mask)->get_proc_group() (mask)->get_proc_group()
715
716class KMPAffinity {
717public:
718 class Mask {
719 public:
720 void *operator new(size_t n);
721 void operator delete(void *p);
722 void *operator new[](size_t n);
723 void operator delete[](void *p);
724 virtual ~Mask() {}
725 // Set bit i to 1
726 virtual void set(int i) {}
727 // Return bit i
728 virtual bool is_set(int i) const { return false; }
729 // Set bit i to 0
730 virtual void clear(int i) {}
731 // Zero out entire mask
732 virtual void zero() {}
733 // Copy src into this mask
734 virtual void copy(const Mask *src) {}
735 // this &= rhs
736 virtual void bitwise_and(const Mask *rhs) {}
737 // this |= rhs
738 virtual void bitwise_or(const Mask *rhs) {}
739 // this = ~this
740 virtual void bitwise_not() {}
741 // API for iterating over an affinity mask
742 // for (int i = mask->begin(); i != mask->end(); i = mask->next(i))
743 virtual int begin() const { return 0; }
744 virtual int end() const { return 0; }
745 virtual int next(int previous) const { return 0; }
746#if KMP_OS_WINDOWS0
747 virtual int set_process_affinity(bool abort_on_error) const { return -1; }
748#endif
749 // Set the system's affinity to this affinity mask's value
750 virtual int set_system_affinity(bool abort_on_error) const { return -1; }
751 // Set this affinity mask to the current system affinity
752 virtual int get_system_affinity(bool abort_on_error) { return -1; }
753 // Only 1 DWORD in the mask should have any procs set.
754 // Return the appropriate index, or -1 for an invalid mask.
755 virtual int get_proc_group() const { return -1; }
756 int get_max_cpu() const {
757 int cpu;
758 int max_cpu = -1;
759 KMP_CPU_SET_ITERATE(cpu, this)for (cpu = (this)->begin(); (int)cpu != (this)->end(); cpu
= (this)->next(cpu))
{
760 if (cpu > max_cpu)
761 max_cpu = cpu;
762 }
763 return max_cpu;
764 }
765 };
766 void *operator new(size_t n);
767 void operator delete(void *p);
768 // Need virtual destructor
769 virtual ~KMPAffinity() = default;
770 // Determine if affinity is capable
771 virtual void determine_capable(const char *env_var) {}
772 // Bind the current thread to os proc
773 virtual void bind_thread(int proc) {}
774 // Factory functions to allocate/deallocate a mask
775 virtual Mask *allocate_mask() { return nullptr; }
776 virtual void deallocate_mask(Mask *m) {}
777 virtual Mask *allocate_mask_array(int num) { return nullptr; }
778 virtual void deallocate_mask_array(Mask *m) {}
779 virtual Mask *index_mask_array(Mask *m, int index) { return nullptr; }
780 static void pick_api();
781 static void destroy_api();
782 enum api_type {
783 NATIVE_OS
784#if KMP_USE_HWLOC0
785 ,
786 HWLOC
787#endif
788 };
789 virtual api_type get_api_type() const {
790 KMP_ASSERT(0)if (!(0)) { __kmp_debug_assert("0", "openmp/runtime/src/kmp.h"
, 790); }
;
791 return NATIVE_OS;
792 }
793
794private:
795 static bool picked_api;
796};
797
798typedef KMPAffinity::Mask kmp_affin_mask_t;
799extern KMPAffinity *__kmp_affinity_dispatch;
800
801// Declare local char buffers with this size for printing debug and info
802// messages, using __kmp_affinity_print_mask().
803#define KMP_AFFIN_MASK_PRINT_LEN1024 1024
804
805enum affinity_type {
806 affinity_none = 0,
807 affinity_physical,
808 affinity_logical,
809 affinity_compact,
810 affinity_scatter,
811 affinity_explicit,
812 affinity_balanced,
813 affinity_disabled, // not used outsize the env var parser
814 affinity_default
815};
816
817enum affinity_top_method {
818 affinity_top_method_all = 0, // try all (supported) methods, in order
819#if KMP_ARCH_X860 || KMP_ARCH_X86_641
820 affinity_top_method_apicid,
821 affinity_top_method_x2apicid,
822 affinity_top_method_x2apicid_1f,
823#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
824 affinity_top_method_cpuinfo, // KMP_CPUINFO_FILE is usable on Windows* OS, too
825#if KMP_GROUP_AFFINITY0
826 affinity_top_method_group,
827#endif /* KMP_GROUP_AFFINITY */
828 affinity_top_method_flat,
829#if KMP_USE_HWLOC0
830 affinity_top_method_hwloc,
831#endif
832 affinity_top_method_default
833};
834
835#define affinity_respect_mask_default(2) (2)
836
837typedef struct kmp_affinity_flags_t {
838 unsigned dups : 1;
839 unsigned verbose : 1;
840 unsigned warnings : 1;
841 unsigned respect : 2;
842 unsigned reset : 1;
843 unsigned initialized : 1;
844 unsigned reserved : 25;
845} kmp_affinity_flags_t;
846KMP_BUILD_ASSERT(sizeof(kmp_affinity_flags_t) == 4)static_assert(sizeof(kmp_affinity_flags_t) == 4, "Build condition error"
)
;
847
848typedef struct kmp_affinity_ids_t {
849 int ids[KMP_HW_LAST];
850 int operator[](size_t idx) const { return ids[idx]; }
851 int &operator[](size_t idx) { return ids[idx]; }
852 kmp_affinity_ids_t &operator=(const kmp_affinity_ids_t &rhs) {
853 for (int i = 0; i < KMP_HW_LAST; ++i)
854 ids[i] = rhs[i];
855 return *this;
856 }
857} kmp_affinity_ids_t;
858
859typedef struct kmp_affinity_attrs_t {
860 int core_type : 8;
861 int core_eff : 8;
862 unsigned valid : 1;
863 unsigned reserved : 15;
864} kmp_affinity_attrs_t;
865#define KMP_AFFINITY_ATTRS_UNKNOWN{ KMP_HW_CORE_TYPE_UNKNOWN, kmp_hw_attr_t::UNKNOWN_CORE_EFF, 0
, 0 }
\
866 { KMP_HW_CORE_TYPE_UNKNOWN, kmp_hw_attr_t::UNKNOWN_CORE_EFF, 0, 0 }
867
868typedef struct kmp_affinity_t {
869 char *proclist;
870 enum affinity_type type;
871 kmp_hw_t gran;
872 int gran_levels;
873 int compact;
874 int offset;
875 kmp_affinity_flags_t flags;
876 unsigned num_masks;
877 kmp_affin_mask_t *masks;
878 kmp_affinity_ids_t *ids;
879 kmp_affinity_attrs_t *attrs;
880 unsigned num_os_id_masks;
881 kmp_affin_mask_t *os_id_masks;
882 const char *env_var;
883} kmp_affinity_t;
884
885#define KMP_AFFINITY_INIT(env){ nullptr, affinity_default, KMP_HW_UNKNOWN, -1, 0, 0, {(!0),
0, (!0), (2), 0, 0}, 0, nullptr, nullptr, nullptr, 0, nullptr
, env }
\
886 { \
887 nullptr, affinity_default, KMP_HW_UNKNOWN, -1, 0, 0, \
888 {TRUE(!0), FALSE0, TRUE(!0), affinity_respect_mask_default(2), FALSE0, FALSE0}, 0, \
889 nullptr, nullptr, nullptr, 0, nullptr, env \
890 }
891
892extern enum affinity_top_method __kmp_affinity_top_method;
893extern kmp_affinity_t __kmp_affinity;
894extern kmp_affinity_t __kmp_hh_affinity;
895extern kmp_affinity_t *__kmp_affinities[2];
896
897extern void __kmp_affinity_bind_thread(int which);
898
899extern kmp_affin_mask_t *__kmp_affin_fullMask;
900extern kmp_affin_mask_t *__kmp_affin_origMask;
901extern char *__kmp_cpuinfo_file;
902
903#endif /* KMP_AFFINITY_SUPPORTED */
904
905// This needs to be kept in sync with the values in omp.h !!!
906typedef enum kmp_proc_bind_t {
907 proc_bind_false = 0,
908 proc_bind_true,
909 proc_bind_primary,
910 proc_bind_close,
911 proc_bind_spread,
912 proc_bind_intel, // use KMP_AFFINITY interface
913 proc_bind_default
914} kmp_proc_bind_t;
915
916typedef struct kmp_nested_proc_bind_t {
917 kmp_proc_bind_t *bind_types;
918 int size;
919 int used;
920} kmp_nested_proc_bind_t;
921
922extern kmp_nested_proc_bind_t __kmp_nested_proc_bind;
923extern kmp_proc_bind_t __kmp_teams_proc_bind;
924
925extern int __kmp_display_affinity;
926extern char *__kmp_affinity_format;
927static const size_t KMP_AFFINITY_FORMAT_SIZE = 512;
928#if OMPT_SUPPORT1
929extern int __kmp_tool;
930extern char *__kmp_tool_libraries;
931#endif // OMPT_SUPPORT
932
933#if KMP_AFFINITY_SUPPORTED1
934#define KMP_PLACE_ALL(-1) (-1)
935#define KMP_PLACE_UNDEFINED(-2) (-2)
936// Is KMP_AFFINITY is being used instead of OMP_PROC_BIND/OMP_PLACES?
937#define KMP_AFFINITY_NON_PROC_BIND((__kmp_nested_proc_bind.bind_types[0] == proc_bind_false || __kmp_nested_proc_bind
.bind_types[0] == proc_bind_intel) && (__kmp_affinity
.num_masks > 0 || __kmp_affinity.type == affinity_balanced
))
\
938 ((__kmp_nested_proc_bind.bind_types[0] == proc_bind_false || \
939 __kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) && \
940 (__kmp_affinity.num_masks > 0 || __kmp_affinity.type == affinity_balanced))
941#endif /* KMP_AFFINITY_SUPPORTED */
942
943extern int __kmp_affinity_num_places;
944
945typedef enum kmp_cancel_kind_t {
946 cancel_noreq = 0,
947 cancel_parallel = 1,
948 cancel_loop = 2,
949 cancel_sections = 3,
950 cancel_taskgroup = 4
951} kmp_cancel_kind_t;
952
953// KMP_HW_SUBSET support:
954typedef struct kmp_hws_item {
955 int num;
956 int offset;
957} kmp_hws_item_t;
958
959extern kmp_hws_item_t __kmp_hws_socket;
960extern kmp_hws_item_t __kmp_hws_die;
961extern kmp_hws_item_t __kmp_hws_node;
962extern kmp_hws_item_t __kmp_hws_tile;
963extern kmp_hws_item_t __kmp_hws_core;
964extern kmp_hws_item_t __kmp_hws_proc;
965extern int __kmp_hws_requested;
966extern int __kmp_hws_abs_flag; // absolute or per-item number requested
967
968/* ------------------------------------------------------------------------ */
969
970#define KMP_PAD(type, sz)(sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1)) \
971 (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1))
972
973// We need to avoid using -1 as a GTID as +1 is added to the gtid
974// when storing it in a lock, and the value 0 is reserved.
975#define KMP_GTID_DNE(-2) (-2) /* Does not exist */
976#define KMP_GTID_SHUTDOWN(-3) (-3) /* Library is shutting down */
977#define KMP_GTID_MONITOR(-4) (-4) /* Monitor thread ID */
978#define KMP_GTID_UNKNOWN(-5) (-5) /* Is not known */
979#define KMP_GTID_MIN(-6) (-6) /* Minimal gtid for low bound check in DEBUG */
980
981/* OpenMP 5.0 Memory Management support */
982
983#ifndef __OMP_H
984// Duplicate type definitions from omp.h
985typedef uintptr_t omp_uintptr_t;
986
987typedef enum {
988 omp_atk_sync_hint = 1,
989 omp_atk_alignment = 2,
990 omp_atk_access = 3,
991 omp_atk_pool_size = 4,
992 omp_atk_fallback = 5,
993 omp_atk_fb_data = 6,
994 omp_atk_pinned = 7,
995 omp_atk_partition = 8
996} omp_alloctrait_key_t;
997
998typedef enum {
999 omp_atv_false = 0,
1000 omp_atv_true = 1,
1001 omp_atv_contended = 3,
1002 omp_atv_uncontended = 4,
1003 omp_atv_serialized = 5,
1004 omp_atv_sequential = omp_atv_serialized, // (deprecated)
1005 omp_atv_private = 6,
1006 omp_atv_all = 7,
1007 omp_atv_thread = 8,
1008 omp_atv_pteam = 9,
1009 omp_atv_cgroup = 10,
1010 omp_atv_default_mem_fb = 11,
1011 omp_atv_null_fb = 12,
1012 omp_atv_abort_fb = 13,
1013 omp_atv_allocator_fb = 14,
1014 omp_atv_environment = 15,
1015 omp_atv_nearest = 16,
1016 omp_atv_blocked = 17,
1017 omp_atv_interleaved = 18
1018} omp_alloctrait_value_t;
1019#define omp_atv_default((omp_uintptr_t)-1) ((omp_uintptr_t)-1)
1020
1021typedef void *omp_memspace_handle_t;
1022extern omp_memspace_handle_t const omp_default_mem_space;
1023extern omp_memspace_handle_t const omp_large_cap_mem_space;
1024extern omp_memspace_handle_t const omp_const_mem_space;
1025extern omp_memspace_handle_t const omp_high_bw_mem_space;
1026extern omp_memspace_handle_t const omp_low_lat_mem_space;
1027extern omp_memspace_handle_t const llvm_omp_target_host_mem_space;
1028extern omp_memspace_handle_t const llvm_omp_target_shared_mem_space;
1029extern omp_memspace_handle_t const llvm_omp_target_device_mem_space;
1030
1031typedef struct {
1032 omp_alloctrait_key_t key;
1033 omp_uintptr_t value;
1034} omp_alloctrait_t;
1035
1036typedef void *omp_allocator_handle_t;
1037extern omp_allocator_handle_t const omp_null_allocator;
1038extern omp_allocator_handle_t const omp_default_mem_alloc;
1039extern omp_allocator_handle_t const omp_large_cap_mem_alloc;
1040extern omp_allocator_handle_t const omp_const_mem_alloc;
1041extern omp_allocator_handle_t const omp_high_bw_mem_alloc;
1042extern omp_allocator_handle_t const omp_low_lat_mem_alloc;
1043extern omp_allocator_handle_t const omp_cgroup_mem_alloc;
1044extern omp_allocator_handle_t const omp_pteam_mem_alloc;
1045extern omp_allocator_handle_t const omp_thread_mem_alloc;
1046extern omp_allocator_handle_t const llvm_omp_target_host_mem_alloc;
1047extern omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc;
1048extern omp_allocator_handle_t const llvm_omp_target_device_mem_alloc;
1049extern omp_allocator_handle_t const kmp_max_mem_alloc;
1050extern omp_allocator_handle_t __kmp_def_allocator;
1051
1052// end of duplicate type definitions from omp.h
1053#endif
1054
1055extern int __kmp_memkind_available;
1056
1057typedef omp_memspace_handle_t kmp_memspace_t; // placeholder
1058
1059typedef struct kmp_allocator_t {
1060 omp_memspace_handle_t memspace;
1061 void **memkind; // pointer to memkind
1062 size_t alignment;
1063 omp_alloctrait_value_t fb;
1064 kmp_allocator_t *fb_data;
1065 kmp_uint64 pool_size;
1066 kmp_uint64 pool_used;
1067 bool pinned;
1068} kmp_allocator_t;
1069
1070extern omp_allocator_handle_t __kmpc_init_allocator(int gtid,
1071 omp_memspace_handle_t,
1072 int ntraits,
1073 omp_alloctrait_t traits[]);
1074extern void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t al);
1075extern void __kmpc_set_default_allocator(int gtid, omp_allocator_handle_t al);
1076extern omp_allocator_handle_t __kmpc_get_default_allocator(int gtid);
1077// external interfaces, may be used by compiler
1078extern void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
1079extern void *__kmpc_aligned_alloc(int gtid, size_t align, size_t sz,
1080 omp_allocator_handle_t al);
1081extern void *__kmpc_calloc(int gtid, size_t nmemb, size_t sz,
1082 omp_allocator_handle_t al);
1083extern void *__kmpc_realloc(int gtid, void *ptr, size_t sz,
1084 omp_allocator_handle_t al,
1085 omp_allocator_handle_t free_al);
1086extern void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
1087// internal interfaces, contain real implementation
1088extern void *__kmp_alloc(int gtid, size_t align, size_t sz,
1089 omp_allocator_handle_t al);
1090extern void *__kmp_calloc(int gtid, size_t align, size_t nmemb, size_t sz,
1091 omp_allocator_handle_t al);
1092extern void *__kmp_realloc(int gtid, void *ptr, size_t sz,
1093 omp_allocator_handle_t al,
1094 omp_allocator_handle_t free_al);
1095extern void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
1096
1097extern void __kmp_init_memkind();
1098extern void __kmp_fini_memkind();
1099extern void __kmp_init_target_mem();
1100
1101/* ------------------------------------------------------------------------ */
1102
1103#if ENABLE_LIBOMPTARGET1
1104extern void __kmp_init_target_task();
1105#endif
1106
1107/* ------------------------------------------------------------------------ */
1108
1109#define KMP_UINT64_MAX(~((kmp_uint64)1 << ((sizeof(kmp_uint64) * (1 << 3
)) - 1)))
\
1110 (~((kmp_uint64)1 << ((sizeof(kmp_uint64) * (1 << 3)) - 1)))
1111
1112#define KMP_MIN_NTH1 1
1113
1114#ifndef KMP_MAX_NTH2147483647
1115#if defined(PTHREAD_THREADS_MAX) && PTHREAD_THREADS_MAX < INT_MAX2147483647
1116#define KMP_MAX_NTH2147483647 PTHREAD_THREADS_MAX
1117#else
1118#define KMP_MAX_NTH2147483647 INT_MAX2147483647
1119#endif
1120#endif /* KMP_MAX_NTH */
1121
1122#ifdef PTHREAD_STACK_MIN16384
1123#define KMP_MIN_STKSIZE16384 PTHREAD_STACK_MIN16384
1124#else
1125#define KMP_MIN_STKSIZE16384 ((size_t)(32 * 1024))
1126#endif
1127
1128#define KMP_MAX_STKSIZE(~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)
))
(~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)))
1129
1130#if KMP_ARCH_X860
1131#define KMP_DEFAULT_STKSIZE((size_t)(4 * 1024 * 1024)) ((size_t)(2 * 1024 * 1024))
1132#elif KMP_ARCH_X86_641
1133#define KMP_DEFAULT_STKSIZE((size_t)(4 * 1024 * 1024)) ((size_t)(4 * 1024 * 1024))
1134#define KMP_BACKUP_STKSIZE((size_t)(2 * 1024 * 1024)) ((size_t)(2 * 1024 * 1024))
1135#else
1136#define KMP_DEFAULT_STKSIZE((size_t)(4 * 1024 * 1024)) ((size_t)(1024 * 1024))
1137#endif
1138
1139#define KMP_DEFAULT_MALLOC_POOL_INCR((size_t)(1024 * 1024)) ((size_t)(1024 * 1024))
1140#define KMP_MIN_MALLOC_POOL_INCR((size_t)(4 * 1024)) ((size_t)(4 * 1024))
1141#define KMP_MAX_MALLOC_POOL_INCR(~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)
))
\
1142 (~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)))
1143
1144#define KMP_MIN_STKOFFSET(0) (0)
1145#define KMP_MAX_STKOFFSET(~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)
))
KMP_MAX_STKSIZE(~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)
))
1146#if KMP_OS_DARWIN0
1147#define KMP_DEFAULT_STKOFFSET64 KMP_MIN_STKOFFSET(0)
1148#else
1149#define KMP_DEFAULT_STKOFFSET64 CACHE_LINE64
1150#endif
1151
1152#define KMP_MIN_STKPADDING(0) (0)
1153#define KMP_MAX_STKPADDING(2 * 1024 * 1024) (2 * 1024 * 1024)
1154
1155#define KMP_BLOCKTIME_MULTIPLIER(1000) \
1156 (1000) /* number of blocktime units per second */
1157#define KMP_MIN_BLOCKTIME(0) (0)
1158#define KMP_MAX_BLOCKTIME(2147483647) \
1159 (INT_MAX2147483647) /* Must be this for "infinite" setting the work */
1160
1161/* __kmp_blocktime is in milliseconds */
1162#define KMP_DEFAULT_BLOCKTIME(__kmp_is_hybrid_cpu() ? (0) : (200)) (__kmp_is_hybrid_cpu() ? (0) : (200))
1163
1164#if KMP_USE_MONITOR
1165#define KMP_DEFAULT_MONITOR_STKSIZE ((size_t)(64 * 1024))
1166#define KMP_MIN_MONITOR_WAKEUPS (1) // min times monitor wakes up per second
1167#define KMP_MAX_MONITOR_WAKEUPS (1000) // max times monitor can wake up per sec
1168
1169/* Calculate new number of monitor wakeups for a specific block time based on
1170 previous monitor_wakeups. Only allow increasing number of wakeups */
1171#define KMP_WAKEUPS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \
1172 (((blocktime) == KMP_MAX_BLOCKTIME(2147483647)) ? (monitor_wakeups) \
1173 : ((blocktime) == KMP_MIN_BLOCKTIME(0)) ? KMP_MAX_MONITOR_WAKEUPS \
1174 : ((monitor_wakeups) > (KMP_BLOCKTIME_MULTIPLIER(1000) / (blocktime))) \
1175 ? (monitor_wakeups) \
1176 : (KMP_BLOCKTIME_MULTIPLIER(1000)) / (blocktime))
1177
1178/* Calculate number of intervals for a specific block time based on
1179 monitor_wakeups */
1180#define KMP_INTERVALS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \
1181 (((blocktime) + (KMP_BLOCKTIME_MULTIPLIER(1000) / (monitor_wakeups)) - 1) / \
1182 (KMP_BLOCKTIME_MULTIPLIER(1000) / (monitor_wakeups)))
1183#else
1184#define KMP_BLOCKTIME(team, tid)(((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs
.bt_set) ? ((team)->t.t_threads[(tid)]->th.th_current_task
->td_icvs.blocktime) : __kmp_dflt_blocktime)
\
1185 (get__bt_set(team, tid)((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs
.bt_set)
? get__blocktime(team, tid)((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs
.blocktime)
: __kmp_dflt_blocktime)
1186#if KMP_OS_UNIX1 && (KMP_ARCH_X860 || KMP_ARCH_X86_641)
1187// HW TSC is used to reduce overhead (clock tick instead of nanosecond).
1188extern kmp_uint64 __kmp_ticks_per_msec;
1189#if KMP_COMPILER_ICC0 || KMP_COMPILER_ICX0
1190#define KMP_NOW()__kmp_hardware_timestamp() ((kmp_uint64)_rdtsc())
1191#else
1192#define KMP_NOW()__kmp_hardware_timestamp() __kmp_hardware_timestamp()
1193#endif
1194#define KMP_NOW_MSEC()(__kmp_hardware_timestamp() / __kmp_ticks_per_msec) (KMP_NOW()__kmp_hardware_timestamp() / __kmp_ticks_per_msec)
1195#define KMP_BLOCKTIME_INTERVAL(team, tid)((((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs
.bt_set) ? ((team)->t.t_threads[(tid)]->th.th_current_task
->td_icvs.blocktime) : __kmp_dflt_blocktime) * __kmp_ticks_per_msec
)
\
1196 (KMP_BLOCKTIME(team, tid)(((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs
.bt_set) ? ((team)->t.t_threads[(tid)]->th.th_current_task
->td_icvs.blocktime) : __kmp_dflt_blocktime)
* __kmp_ticks_per_msec)
1197#define KMP_BLOCKING(goal, count)((goal) > __kmp_hardware_timestamp()) ((goal) > KMP_NOW()__kmp_hardware_timestamp())
1198#else
1199// System time is retrieved sporadically while blocking.
1200extern kmp_uint64 __kmp_now_nsec();
1201#define KMP_NOW()__kmp_hardware_timestamp() __kmp_now_nsec()
1202#define KMP_NOW_MSEC()(__kmp_hardware_timestamp() / __kmp_ticks_per_msec) (KMP_NOW()__kmp_hardware_timestamp() / KMP_USEC_PER_SEC1000000L)
1203#define KMP_BLOCKTIME_INTERVAL(team, tid)((((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs
.bt_set) ? ((team)->t.t_threads[(tid)]->th.th_current_task
->td_icvs.blocktime) : __kmp_dflt_blocktime) * __kmp_ticks_per_msec
)
\
1204 (KMP_BLOCKTIME(team, tid)(((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs
.bt_set) ? ((team)->t.t_threads[(tid)]->th.th_current_task
->td_icvs.blocktime) : __kmp_dflt_blocktime)
* KMP_USEC_PER_SEC1000000L)
1205#define KMP_BLOCKING(goal, count)((goal) > __kmp_hardware_timestamp()) ((count) % 1000 != 0 || (goal) > KMP_NOW()__kmp_hardware_timestamp())
1206#endif
1207#endif // KMP_USE_MONITOR
1208
1209#define KMP_MIN_STATSCOLS40 40
1210#define KMP_MAX_STATSCOLS4096 4096
1211#define KMP_DEFAULT_STATSCOLS80 80
1212
1213#define KMP_MIN_INTERVAL0 0
1214#define KMP_MAX_INTERVAL(2147483647 - 1) (INT_MAX2147483647 - 1)
1215#define KMP_DEFAULT_INTERVAL0 0
1216
1217#define KMP_MIN_CHUNK1 1
1218#define KMP_MAX_CHUNK(2147483647 - 1) (INT_MAX2147483647 - 1)
1219#define KMP_DEFAULT_CHUNK1 1
1220
1221#define KMP_MIN_DISP_NUM_BUFF1 1
1222#define KMP_DFLT_DISP_NUM_BUFF7 7
1223#define KMP_MAX_DISP_NUM_BUFF4096 4096
1224
1225#define KMP_MAX_ORDERED8 8
1226
1227#define KMP_MAX_FIELDS32 32
1228
1229#define KMP_MAX_BRANCH_BITS31 31
1230
1231#define KMP_MAX_ACTIVE_LEVELS_LIMIT2147483647 INT_MAX2147483647
1232
1233#define KMP_MAX_DEFAULT_DEVICE_LIMIT2147483647 INT_MAX2147483647
1234
1235#define KMP_MAX_TASK_PRIORITY_LIMIT2147483647 INT_MAX2147483647
1236
1237/* Minimum number of threads before switch to TLS gtid (experimentally
1238 determined) */
1239/* josh TODO: what about OS X* tuning? */
1240#if KMP_ARCH_X860 || KMP_ARCH_X86_641
1241#define KMP_TLS_GTID_MIN5 5
1242#else
1243#define KMP_TLS_GTID_MIN5 INT_MAX2147483647
1244#endif
1245
1246#define KMP_MASTER_TID(tid)(0 == (tid)) (0 == (tid))
1247#define KMP_WORKER_TID(tid)(0 != (tid)) (0 != (tid))
1248
1249#define KMP_MASTER_GTID(gtid)(0 == __kmp_tid_from_gtid((gtid))) (0 == __kmp_tid_from_gtid((gtid)))
1250#define KMP_WORKER_GTID(gtid)(0 != __kmp_tid_from_gtid((gtid))) (0 != __kmp_tid_from_gtid((gtid)))
1251#define KMP_INITIAL_GTID(gtid)(0 == (gtid)) (0 == (gtid))
1252
1253#ifndef TRUE(!0)
1254#define FALSE0 0
1255#define TRUE(!0) (!FALSE0)
1256#endif
1257
1258/* NOTE: all of the following constants must be even */
1259
1260#if KMP_OS_WINDOWS0
1261#define KMP_INIT_WAIT1024U 64U /* initial number of spin-tests */
1262#define KMP_NEXT_WAIT512U 32U /* susequent number of spin-tests */
1263#elif KMP_OS_LINUX1
1264#define KMP_INIT_WAIT1024U 1024U /* initial number of spin-tests */
1265#define KMP_NEXT_WAIT512U 512U /* susequent number of spin-tests */
1266#elif KMP_OS_DARWIN0
1267/* TODO: tune for KMP_OS_DARWIN */
1268#define KMP_INIT_WAIT1024U 1024U /* initial number of spin-tests */
1269#define KMP_NEXT_WAIT512U 512U /* susequent number of spin-tests */
1270#elif KMP_OS_DRAGONFLY0
1271/* TODO: tune for KMP_OS_DRAGONFLY */
1272#define KMP_INIT_WAIT1024U 1024U /* initial number of spin-tests */
1273#define KMP_NEXT_WAIT512U 512U /* susequent number of spin-tests */
1274#elif KMP_OS_FREEBSD0
1275/* TODO: tune for KMP_OS_FREEBSD */
1276#define KMP_INIT_WAIT1024U 1024U /* initial number of spin-tests */
1277#define KMP_NEXT_WAIT512U 512U /* susequent number of spin-tests */
1278#elif KMP_OS_NETBSD0
1279/* TODO: tune for KMP_OS_NETBSD */
1280#define KMP_INIT_WAIT1024U 1024U /* initial number of spin-tests */
1281#define KMP_NEXT_WAIT512U 512U /* susequent number of spin-tests */
1282#elif KMP_OS_HURD0
1283/* TODO: tune for KMP_OS_HURD */
1284#define KMP_INIT_WAIT1024U 1024U /* initial number of spin-tests */
1285#define KMP_NEXT_WAIT512U 512U /* susequent number of spin-tests */
1286#elif KMP_OS_OPENBSD0
1287/* TODO: tune for KMP_OS_OPENBSD */
1288#define KMP_INIT_WAIT1024U 1024U /* initial number of spin-tests */
1289#define KMP_NEXT_WAIT512U 512U /* susequent number of spin-tests */
1290#endif
1291
1292#if KMP_ARCH_X860 || KMP_ARCH_X86_641
1293typedef struct kmp_cpuid {
1294 kmp_uint32 eax;
1295 kmp_uint32 ebx;
1296 kmp_uint32 ecx;
1297 kmp_uint32 edx;
1298} kmp_cpuid_t;
1299
1300typedef struct kmp_cpuinfo_flags_t {
1301 unsigned sse2 : 1; // 0 if SSE2 instructions are not supported, 1 otherwise.
1302 unsigned rtm : 1; // 0 if RTM instructions are not supported, 1 otherwise.
1303 unsigned hybrid : 1;
1304 unsigned reserved : 29; // Ensure size of 32 bits
1305} kmp_cpuinfo_flags_t;
1306
1307typedef struct kmp_cpuinfo {
1308 int initialized; // If 0, other fields are not initialized.
1309 int signature; // CPUID(1).EAX
1310 int family; // CPUID(1).EAX[27:20]+CPUID(1).EAX[11:8] (Extended Family+Family)
1311 int model; // ( CPUID(1).EAX[19:16] << 4 ) + CPUID(1).EAX[7:4] ( ( Extended
1312 // Model << 4 ) + Model)
1313 int stepping; // CPUID(1).EAX[3:0] ( Stepping )
1314 kmp_cpuinfo_flags_t flags;
1315 int apic_id;
1316 int physical_id;
1317 int logical_id;
1318 kmp_uint64 frequency; // Nominal CPU frequency in Hz.
1319 char name[3 * sizeof(kmp_cpuid_t)]; // CPUID(0x80000002,0x80000003,0x80000004)
1320} kmp_cpuinfo_t;
1321
1322extern void __kmp_query_cpuid(kmp_cpuinfo_t *p);
1323
1324#if KMP_OS_UNIX1
1325// subleaf is only needed for cache and topology discovery and can be set to
1326// zero in most cases
1327static inline void __kmp_x86_cpuid(int leaf, int subleaf, struct kmp_cpuid *p) {
1328 __asm__ __volatile__("cpuid"
1329 : "=a"(p->eax), "=b"(p->ebx), "=c"(p->ecx), "=d"(p->edx)
1330 : "a"(leaf), "c"(subleaf));
1331}
1332// Load p into FPU control word
1333static inline void __kmp_load_x87_fpu_control_word(const kmp_int16 *p) {
1334 __asm__ __volatile__("fldcw %0" : : "m"(*p));
1335}
1336// Store FPU control word into p
1337static inline void __kmp_store_x87_fpu_control_word(kmp_int16 *p) {
1338 __asm__ __volatile__("fstcw %0" : "=m"(*p));
1339}
1340static inline void __kmp_clear_x87_fpu_status_word() {
1341#if KMP_MIC0
1342 // 32-bit protected mode x87 FPU state
1343 struct x87_fpu_state {
1344 unsigned cw;
1345 unsigned sw;
1346 unsigned tw;
1347 unsigned fip;
1348 unsigned fips;
1349 unsigned fdp;
1350 unsigned fds;
1351 };
1352 struct x87_fpu_state fpu_state = {0, 0, 0, 0, 0, 0, 0};
1353 __asm__ __volatile__("fstenv %0\n\t" // store FP env
1354 "andw $0x7f00, %1\n\t" // clear 0-7,15 bits of FP SW
1355 "fldenv %0\n\t" // load FP env back
1356 : "+m"(fpu_state), "+m"(fpu_state.sw));
1357#else
1358 __asm__ __volatile__("fnclex");
1359#endif // KMP_MIC
1360}
1361#if __SSE__1
1362static inline void __kmp_load_mxcsr(const kmp_uint32 *p) { _mm_setcsr(*p); }
1363static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = _mm_getcsr(); }
1364#else
1365static inline void __kmp_load_mxcsr(const kmp_uint32 *p) {}
1366static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = 0; }
1367#endif
1368#else
1369// Windows still has these as external functions in assembly file
1370extern void __kmp_x86_cpuid(int mode, int mode2, struct kmp_cpuid *p);
1371extern void __kmp_load_x87_fpu_control_word(const kmp_int16 *p);
1372extern void __kmp_store_x87_fpu_control_word(kmp_int16 *p);
1373extern void __kmp_clear_x87_fpu_status_word();
1374static inline void __kmp_load_mxcsr(const kmp_uint32 *p) { _mm_setcsr(*p); }
1375static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = _mm_getcsr(); }
1376#endif // KMP_OS_UNIX
1377
1378#define KMP_X86_MXCSR_MASK0xffffffc0 0xffffffc0 /* ignore status flags (6 lsb) */
1379
1380// User-level Monitor/Mwait
1381#if KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0)
1382// We always try for UMWAIT first
1383#if KMP_HAVE_WAITPKG_INTRINSICS1
1384#if KMP_HAVE_IMMINTRIN_H1
1385#include <immintrin.h>
1386#elif KMP_HAVE_INTRIN_H0
1387#include <intrin.h>
1388#endif
1389#endif // KMP_HAVE_WAITPKG_INTRINSICS
1390
1391KMP_ATTRIBUTE_TARGET_WAITPKG__attribute__((target("waitpkg")))
1392static inline int __kmp_tpause(uint32_t hint, uint64_t counter) {
1393#if !KMP_HAVE_WAITPKG_INTRINSICS1
1394 uint32_t timeHi = uint32_t(counter >> 32);
1395 uint32_t timeLo = uint32_t(counter & 0xffffffff);
1396 char flag;
1397 __asm__ volatile("#tpause\n.byte 0x66, 0x0F, 0xAE, 0xF1\n"
1398 "setb %0"
1399 // The "=q" restraint means any register accessible as rl
1400 // in 32-bit mode: a, b, c, and d;
1401 // in 64-bit mode: any integer register
1402 : "=q"(flag)
1403 : "a"(timeLo), "d"(timeHi), "c"(hint)
1404 :);
1405 return flag;
1406#else
1407 return _tpause(hint, counter);
1408#endif
1409}
1410KMP_ATTRIBUTE_TARGET_WAITPKG__attribute__((target("waitpkg")))
1411static inline void __kmp_umonitor(void *cacheline) {
1412#if !KMP_HAVE_WAITPKG_INTRINSICS1
1413 __asm__ volatile("# umonitor\n.byte 0xF3, 0x0F, 0xAE, 0x01 "
1414 :
1415 : "a"(cacheline)
1416 :);
1417#else
1418 _umonitor(cacheline);
1419#endif
1420}
1421KMP_ATTRIBUTE_TARGET_WAITPKG__attribute__((target("waitpkg")))
1422static inline int __kmp_umwait(uint32_t hint, uint64_t counter) {
1423#if !KMP_HAVE_WAITPKG_INTRINSICS1
1424 uint32_t timeHi = uint32_t(counter >> 32);
1425 uint32_t timeLo = uint32_t(counter & 0xffffffff);
1426 char flag;
1427 __asm__ volatile("#umwait\n.byte 0xF2, 0x0F, 0xAE, 0xF1\n"
1428 "setb %0"
1429 // The "=q" restraint means any register accessible as rl
1430 // in 32-bit mode: a, b, c, and d;
1431 // in 64-bit mode: any integer register
1432 : "=q"(flag)
1433 : "a"(timeLo), "d"(timeHi), "c"(hint)
1434 :);
1435 return flag;
1436#else
1437 return _umwait(hint, counter);
1438#endif
1439}
1440#elif KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0)
1441#if KMP_OS_UNIX1
1442#include <pmmintrin.h>
1443#else
1444#include <intrin.h>
1445#endif
1446#if KMP_OS_UNIX1
1447__attribute__((target("sse3")))
1448#endif
1449static inline void
1450__kmp_mm_monitor(void *cacheline, unsigned extensions, unsigned hints) {
1451 _mm_monitor(cacheline, extensions, hints);
1452}
1453#if KMP_OS_UNIX1
1454__attribute__((target("sse3")))
1455#endif
1456static inline void
1457__kmp_mm_mwait(unsigned extensions, unsigned hints) {
1458 _mm_mwait(extensions, hints);
1459}
1460#endif // KMP_HAVE_UMWAIT
1461
1462#if KMP_ARCH_X860
1463extern void __kmp_x86_pause(void);
1464#elif KMP_MIC0
1465// Performance testing on KNC (C0QS-7120 P/A/X/D, 61-core, 16 GB Memory) showed
1466// regression after removal of extra PAUSE from spin loops. Changing
1467// the delay from 100 to 300 showed even better performance than double PAUSE
1468// on Spec OMP2001 and LCPC tasking tests, no regressions on EPCC.
1469static inline void __kmp_x86_pause(void) { _mm_delay_32(300); }
1470#else
1471static inline void __kmp_x86_pause(void) { _mm_pause(); }
1472#endif
1473#define KMP_CPU_PAUSE()__kmp_x86_pause() __kmp_x86_pause()
1474#elif KMP_ARCH_PPC64(0 || 0)
1475#define KMP_PPC64_PRI_LOW() __asm__ volatile("or 1, 1, 1")
1476#define KMP_PPC64_PRI_MED() __asm__ volatile("or 2, 2, 2")
1477#define KMP_PPC64_PRI_LOC_MB() __asm__ volatile("" : : : "memory")
1478#define KMP_CPU_PAUSE()__kmp_x86_pause() \
1479 do { \
1480 KMP_PPC64_PRI_LOW(); \
1481 KMP_PPC64_PRI_MED(); \
1482 KMP_PPC64_PRI_LOC_MB(); \
1483 } while (0)
1484#else
1485#define KMP_CPU_PAUSE()__kmp_x86_pause() /* nothing to do */
1486#endif
1487
1488#define KMP_INIT_YIELD(count){ (count) = __kmp_yield_init; } \
1489 { (count) = __kmp_yield_init; }
1490
1491#define KMP_INIT_BACKOFF(time){ (time) = __kmp_pause_init; } \
1492 { (time) = __kmp_pause_init; }
1493
1494#define KMP_OVERSUBSCRIBED((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
))
\
1495 (TCR_4(__kmp_nth)(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))
1496
1497#define KMP_TRY_YIELD((__kmp_use_yield == 1) || (__kmp_use_yield == 2 && (
((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
)))))
\
1498 ((__kmp_use_yield == 1) || (__kmp_use_yield == 2 && (KMP_OVERSUBSCRIBED((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
))
)))
1499
1500#define KMP_TRY_YIELD_OVERSUB((__kmp_use_yield == 1 || __kmp_use_yield == 2) && ((
(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
))))
\
1501 ((__kmp_use_yield == 1 || __kmp_use_yield == 2) && (KMP_OVERSUBSCRIBED((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
))
))
1502
1503#define KMP_YIELD(cond){ __kmp_x86_pause(); if ((cond) && (((__kmp_use_yield
== 1) || (__kmp_use_yield == 2 && (((__kmp_nth) >
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))))))) __kmp_yield
(); }
\
1504 { \
1505 KMP_CPU_PAUSE()__kmp_x86_pause(); \
1506 if ((cond) && (KMP_TRY_YIELD((__kmp_use_yield == 1) || (__kmp_use_yield == 2 && (
((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
)))))
)) \
1507 __kmp_yield(); \
1508 }
1509
1510#define KMP_YIELD_OVERSUB(){ __kmp_x86_pause(); if ((((__kmp_use_yield == 1 || __kmp_use_yield
== 2) && (((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
: __kmp_xproc)))))) __kmp_yield(); }
\
1511 { \
1512 KMP_CPU_PAUSE()__kmp_x86_pause(); \
1513 if ((KMP_TRY_YIELD_OVERSUB((__kmp_use_yield == 1 || __kmp_use_yield == 2) && ((
(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
))))
)) \
1514 __kmp_yield(); \
1515 }
1516
1517// Note the decrement of 2 in the following Macros. With KMP_LIBRARY=turnaround,
1518// there should be no yielding since initial value from KMP_INIT_YIELD() is odd.
1519#define KMP_YIELD_SPIN(count){ __kmp_x86_pause(); if (((__kmp_use_yield == 1) || (__kmp_use_yield
== 2 && (((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
: __kmp_xproc)))))) { (count) -= 2; if (!(count)) { __kmp_yield
(); (count) = __kmp_yield_next; } } }
\
1520 { \
1521 KMP_CPU_PAUSE()__kmp_x86_pause(); \
1522 if (KMP_TRY_YIELD((__kmp_use_yield == 1) || (__kmp_use_yield == 2 && (
((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
)))))
) { \
1523 (count) -= 2; \
1524 if (!(count)) { \
1525 __kmp_yield(); \
1526 (count) = __kmp_yield_next; \
1527 } \
1528 } \
1529 }
1530
1531// If TPAUSE is available & enabled, use it. If oversubscribed, use the slower
1532// (C0.2) state, which improves performance of other SMT threads on the same
1533// core, otherwise, use the fast (C0.1) default state, or whatever the user has
1534// requested. Uses a timed TPAUSE, and exponential backoff. If TPAUSE isn't
1535// available, fall back to the regular CPU pause and yield combination.
1536#if KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0)
1537#define KMP_TPAUSE_MAX_MASK((kmp_uint64)0xFFFF) ((kmp_uint64)0xFFFF)
1538#define KMP_YIELD_OVERSUB_ELSE_SPIN(count, time){ if (__kmp_tpause_enabled) { if (((__kmp_nth) > (__kmp_avail_proc
? __kmp_avail_proc : __kmp_xproc))) { __kmp_tpause(0, (time)
); } else { __kmp_tpause(__kmp_tpause_hint, (time)); } (time)
= (time << 1 | 1) & ((kmp_uint64)0xFFFF); } else {
__kmp_x86_pause(); if ((((__kmp_use_yield == 1 || __kmp_use_yield
== 2) && (((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
: __kmp_xproc)))))) { __kmp_yield(); } else if (__kmp_use_yield
== 1) { (count) -= 2; if (!(count)) { __kmp_yield(); (count)
= __kmp_yield_next; } } } }
\
1539 { \
1540 if (__kmp_tpause_enabled) { \
1541 if (KMP_OVERSUBSCRIBED((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
))
) { \
1542 __kmp_tpause(0, (time)); \
1543 } else { \
1544 __kmp_tpause(__kmp_tpause_hint, (time)); \
1545 } \
1546 (time) = (time << 1 | 1) & KMP_TPAUSE_MAX_MASK((kmp_uint64)0xFFFF); \
1547 } else { \
1548 KMP_CPU_PAUSE()__kmp_x86_pause(); \
1549 if ((KMP_TRY_YIELD_OVERSUB((__kmp_use_yield == 1 || __kmp_use_yield == 2) && ((
(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
))))
)) { \
1550 __kmp_yield(); \
1551 } else if (__kmp_use_yield == 1) { \
1552 (count) -= 2; \
1553 if (!(count)) { \
1554 __kmp_yield(); \
1555 (count) = __kmp_yield_next; \
1556 } \
1557 } \
1558 } \
1559 }
1560#else
1561#define KMP_YIELD_OVERSUB_ELSE_SPIN(count, time){ if (__kmp_tpause_enabled) { if (((__kmp_nth) > (__kmp_avail_proc
? __kmp_avail_proc : __kmp_xproc))) { __kmp_tpause(0, (time)
); } else { __kmp_tpause(__kmp_tpause_hint, (time)); } (time)
= (time << 1 | 1) & ((kmp_uint64)0xFFFF); } else {
__kmp_x86_pause(); if ((((__kmp_use_yield == 1 || __kmp_use_yield
== 2) && (((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
: __kmp_xproc)))))) { __kmp_yield(); } else if (__kmp_use_yield
== 1) { (count) -= 2; if (!(count)) { __kmp_yield(); (count)
= __kmp_yield_next; } } } }
\
1562 { \
1563 KMP_CPU_PAUSE()__kmp_x86_pause(); \
1564 if ((KMP_TRY_YIELD_OVERSUB((__kmp_use_yield == 1 || __kmp_use_yield == 2) && ((
(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
))))
)) \
1565 __kmp_yield(); \
1566 else if (__kmp_use_yield == 1) { \
1567 (count) -= 2; \
1568 if (!(count)) { \
1569 __kmp_yield(); \
1570 (count) = __kmp_yield_next; \
1571 } \
1572 } \
1573 }
1574#endif // KMP_HAVE_UMWAIT
1575
1576/* ------------------------------------------------------------------------ */
1577/* Support datatypes for the orphaned construct nesting checks. */
1578/* ------------------------------------------------------------------------ */
1579
1580/* When adding to this enum, add its corresponding string in cons_text_c[]
1581 * array in kmp_error.cpp */
1582enum cons_type {
1583 ct_none,
1584 ct_parallel,
1585 ct_pdo,
1586 ct_pdo_ordered,
1587 ct_psections,
1588 ct_psingle,
1589 ct_critical,
1590 ct_ordered_in_parallel,
1591 ct_ordered_in_pdo,
1592 ct_master,
1593 ct_reduce,
1594 ct_barrier,
1595 ct_masked
1596};
1597
1598#define IS_CONS_TYPE_ORDERED(ct)((ct) == ct_pdo_ordered) ((ct) == ct_pdo_ordered)
1599
1600struct cons_data {
1601 ident_t const *ident;
1602 enum cons_type type;
1603 int prev;
1604 kmp_user_lock_p
1605 name; /* address exclusively for critical section name comparison */
1606};
1607
1608struct cons_header {
1609 int p_top, w_top, s_top;
1610 int stack_size, stack_top;
1611 struct cons_data *stack_data;
1612};
1613
1614struct kmp_region_info {
1615 char *text;
1616 int offset[KMP_MAX_FIELDS32];
1617 int length[KMP_MAX_FIELDS32];
1618};
1619
1620/* ---------------------------------------------------------------------- */
1621/* ---------------------------------------------------------------------- */
1622
1623#if KMP_OS_WINDOWS0
1624typedef HANDLE kmp_thread_t;
1625typedef DWORD kmp_key_t;
1626#endif /* KMP_OS_WINDOWS */
1627
1628#if KMP_OS_UNIX1
1629typedef pthread_t kmp_thread_t;
1630typedef pthread_key_t kmp_key_t;
1631#endif
1632
1633extern kmp_key_t __kmp_gtid_threadprivate_key;
1634
1635typedef struct kmp_sys_info {
1636 long maxrss; /* the maximum resident set size utilized (in kilobytes) */
1637 long minflt; /* the number of page faults serviced without any I/O */
1638 long majflt; /* the number of page faults serviced that required I/O */
1639 long nswap; /* the number of times a process was "swapped" out of memory */
1640 long inblock; /* the number of times the file system had to perform input */
1641 long oublock; /* the number of times the file system had to perform output */
1642 long nvcsw; /* the number of times a context switch was voluntarily */
1643 long nivcsw; /* the number of times a context switch was forced */
1644} kmp_sys_info_t;
1645
1646#if USE_ITT_BUILD1
1647// We cannot include "kmp_itt.h" due to circular dependency. Declare the only
1648// required type here. Later we will check the type meets requirements.
1649typedef int kmp_itt_mark_t;
1650#define KMP_ITT_DEBUG0 0
1651#endif /* USE_ITT_BUILD */
1652
1653typedef kmp_int32 kmp_critical_name[8];
1654
1655/*!
1656@ingroup PARALLEL
1657The type for a microtask which gets passed to @ref __kmpc_fork_call().
1658The arguments to the outlined function are
1659@param global_tid the global thread identity of the thread executing the
1660function.
1661@param bound_tid the local identity of the thread executing the function
1662@param ... pointers to shared variables accessed by the function.
1663*/
1664typedef void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid, ...);
1665typedef void (*kmpc_micro_bound)(kmp_int32 *bound_tid, kmp_int32 *bound_nth,
1666 ...);
1667
1668/*!
1669@ingroup THREADPRIVATE
1670@{
1671*/
1672/* ---------------------------------------------------------------------------
1673 */
1674/* Threadprivate initialization/finalization function declarations */
1675
1676/* for non-array objects: __kmpc_threadprivate_register() */
1677
1678/*!
1679 Pointer to the constructor function.
1680 The first argument is the <tt>this</tt> pointer
1681*/
1682typedef void *(*kmpc_ctor)(void *);
1683
1684/*!
1685 Pointer to the destructor function.
1686 The first argument is the <tt>this</tt> pointer
1687*/
1688typedef void (*kmpc_dtor)(
1689 void * /*, size_t */); /* 2nd arg: magic number for KCC unused by Intel
1690 compiler */
1691/*!
1692 Pointer to an alternate constructor.
1693 The first argument is the <tt>this</tt> pointer.
1694*/
1695typedef void *(*kmpc_cctor)(void *, void *);
1696
1697/* for array objects: __kmpc_threadprivate_register_vec() */
1698/* First arg: "this" pointer */
1699/* Last arg: number of array elements */
1700/*!
1701 Array constructor.
1702 First argument is the <tt>this</tt> pointer
1703 Second argument the number of array elements.
1704*/
1705typedef void *(*kmpc_ctor_vec)(void *, size_t);
1706/*!
1707 Pointer to the array destructor function.
1708 The first argument is the <tt>this</tt> pointer
1709 Second argument the number of array elements.
1710*/
1711typedef void (*kmpc_dtor_vec)(void *, size_t);
1712/*!
1713 Array constructor.
1714 First argument is the <tt>this</tt> pointer
1715 Third argument the number of array elements.
1716*/
1717typedef void *(*kmpc_cctor_vec)(void *, void *,
1718 size_t); /* function unused by compiler */
1719
1720/*!
1721@}
1722*/
1723
1724/* keeps tracked of threadprivate cache allocations for cleanup later */
1725typedef struct kmp_cached_addr {
1726 void **addr; /* address of allocated cache */
1727 void ***compiler_cache; /* pointer to compiler's cache */
1728 void *data; /* pointer to global data */
1729 struct kmp_cached_addr *next; /* pointer to next cached address */
1730} kmp_cached_addr_t;
1731
1732struct private_data {
1733 struct private_data *next; /* The next descriptor in the list */
1734 void *data; /* The data buffer for this descriptor */
1735 int more; /* The repeat count for this descriptor */
1736 size_t size; /* The data size for this descriptor */
1737};
1738
1739struct private_common {
1740 struct private_common *next;
1741 struct private_common *link;
1742 void *gbl_addr;
1743 void *par_addr; /* par_addr == gbl_addr for PRIMARY thread */
1744 size_t cmn_size;
1745};
1746
1747struct shared_common {
1748 struct shared_common *next;
1749 struct private_data *pod_init;
1750 void *obj_init;
1751 void *gbl_addr;
1752 union {
1753 kmpc_ctor ctor;
1754 kmpc_ctor_vec ctorv;
1755 } ct;
1756 union {
1757 kmpc_cctor cctor;
1758 kmpc_cctor_vec cctorv;
1759 } cct;
1760 union {
1761 kmpc_dtor dtor;
1762 kmpc_dtor_vec dtorv;
1763 } dt;
1764 size_t vec_len;
1765 int is_vec;
1766 size_t cmn_size;
1767};
1768
1769#define KMP_HASH_TABLE_LOG29 9 /* log2 of the hash table size */
1770#define KMP_HASH_TABLE_SIZE(1 << 9) \
1771 (1 << KMP_HASH_TABLE_LOG29) /* size of the hash table */
1772#define KMP_HASH_SHIFT3 3 /* throw away this many low bits from the address */
1773#define KMP_HASH(x)((((kmp_uintptr_t)x) >> 3) & ((1 << 9) - 1)) \
1774 ((((kmp_uintptr_t)x) >> KMP_HASH_SHIFT3) & (KMP_HASH_TABLE_SIZE(1 << 9) - 1))
1775
1776struct common_table {
1777 struct private_common *data[KMP_HASH_TABLE_SIZE(1 << 9)];
1778};
1779
1780struct shared_table {
1781 struct shared_common *data[KMP_HASH_TABLE_SIZE(1 << 9)];
1782};
1783
1784/* ------------------------------------------------------------------------ */
1785
1786#if KMP_USE_HIER_SCHED0
1787// Shared barrier data that exists inside a single unit of the scheduling
1788// hierarchy
1789typedef struct kmp_hier_private_bdata_t {
1790 kmp_int32 num_active;
1791 kmp_uint64 index;
1792 kmp_uint64 wait_val[2];
1793} kmp_hier_private_bdata_t;
1794#endif
1795
1796typedef struct kmp_sched_flags {
1797 unsigned ordered : 1;
1798 unsigned nomerge : 1;
1799 unsigned contains_last : 1;
1800#if KMP_USE_HIER_SCHED0
1801 unsigned use_hier : 1;
1802 unsigned unused : 28;
1803#else
1804 unsigned unused : 29;
1805#endif
1806} kmp_sched_flags_t;
1807
1808KMP_BUILD_ASSERT(sizeof(kmp_sched_flags_t) == 4)static_assert(sizeof(kmp_sched_flags_t) == 4, "Build condition error"
)
;
1809
1810#if KMP_STATIC_STEAL_ENABLED1
1811typedef struct KMP_ALIGN_CACHE__attribute__((aligned(64))) dispatch_private_info32 {
1812 kmp_int32 count;
1813 kmp_int32 ub;
1814 /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */
1815 kmp_int32 lb;
1816 kmp_int32 st;
1817 kmp_int32 tc;
1818 kmp_lock_t *steal_lock; // lock used for chunk stealing
1819 // KMP_ALIGN(32) ensures (if the KMP_ALIGN macro is turned on)
1820 // a) parm3 is properly aligned and
1821 // b) all parm1-4 are on the same cache line.
1822 // Because of parm1-4 are used together, performance seems to be better
1823 // if they are on the same cache line (not measured though).
1824
1825 struct KMP_ALIGN(32)__attribute__((aligned(32))) { // AC: changed 16 to 32 in order to simplify template
1826 kmp_int32 parm1; // structures in kmp_dispatch.cpp. This should
1827 kmp_int32 parm2; // make no real change at least while padding is off.
1828 kmp_int32 parm3;
1829 kmp_int32 parm4;
1830 };
1831
1832 kmp_uint32 ordered_lower;
1833 kmp_uint32 ordered_upper;
1834#if KMP_OS_WINDOWS0
1835 kmp_int32 last_upper;
1836#endif /* KMP_OS_WINDOWS */
1837} dispatch_private_info32_t;
1838
1839typedef struct KMP_ALIGN_CACHE__attribute__((aligned(64))) dispatch_private_info64 {
1840 kmp_int64 count; // current chunk number for static & static-steal scheduling
1841 kmp_int64 ub; /* upper-bound */
1842 /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */
1843 kmp_int64 lb; /* lower-bound */
1844 kmp_int64 st; /* stride */
1845 kmp_int64 tc; /* trip count (number of iterations) */
1846 kmp_lock_t *steal_lock; // lock used for chunk stealing
1847 /* parm[1-4] are used in different ways by different scheduling algorithms */
1848
1849 // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on )
1850 // a) parm3 is properly aligned and
1851 // b) all parm1-4 are in the same cache line.
1852 // Because of parm1-4 are used together, performance seems to be better
1853 // if they are in the same line (not measured though).
1854
1855 struct KMP_ALIGN(32)__attribute__((aligned(32))) {
1856 kmp_int64 parm1;
1857 kmp_int64 parm2;
1858 kmp_int64 parm3;
1859 kmp_int64 parm4;
1860 };
1861
1862 kmp_uint64 ordered_lower;
1863 kmp_uint64 ordered_upper;
1864#if KMP_OS_WINDOWS0
1865 kmp_int64 last_upper;
1866#endif /* KMP_OS_WINDOWS */
1867} dispatch_private_info64_t;
1868#else /* KMP_STATIC_STEAL_ENABLED */
1869typedef struct KMP_ALIGN_CACHE__attribute__((aligned(64))) dispatch_private_info32 {
1870 kmp_int32 lb;
1871 kmp_int32 ub;
1872 kmp_int32 st;
1873 kmp_int32 tc;
1874
1875 kmp_int32 parm1;
1876 kmp_int32 parm2;
1877 kmp_int32 parm3;
1878 kmp_int32 parm4;
1879
1880 kmp_int32 count;
1881
1882 kmp_uint32 ordered_lower;
1883 kmp_uint32 ordered_upper;
1884#if KMP_OS_WINDOWS0
1885 kmp_int32 last_upper;
1886#endif /* KMP_OS_WINDOWS */
1887} dispatch_private_info32_t;
1888
1889typedef struct KMP_ALIGN_CACHE__attribute__((aligned(64))) dispatch_private_info64 {
1890 kmp_int64 lb; /* lower-bound */
1891 kmp_int64 ub; /* upper-bound */
1892 kmp_int64 st; /* stride */
1893 kmp_int64 tc; /* trip count (number of iterations) */
1894
1895 /* parm[1-4] are used in different ways by different scheduling algorithms */
1896 kmp_int64 parm1;
1897 kmp_int64 parm2;
1898 kmp_int64 parm3;
1899 kmp_int64 parm4;
1900
1901 kmp_int64 count; /* current chunk number for static scheduling */
1902
1903 kmp_uint64 ordered_lower;
1904 kmp_uint64 ordered_upper;
1905#if KMP_OS_WINDOWS0
1906 kmp_int64 last_upper;
1907#endif /* KMP_OS_WINDOWS */
1908} dispatch_private_info64_t;
1909#endif /* KMP_STATIC_STEAL_ENABLED */
1910
1911typedef struct KMP_ALIGN_CACHE__attribute__((aligned(64))) dispatch_private_info {
1912 union private_info {
1913 dispatch_private_info32_t p32;
1914 dispatch_private_info64_t p64;
1915 } u;
1916 enum sched_type schedule; /* scheduling algorithm */
1917 kmp_sched_flags_t flags; /* flags (e.g., ordered, nomerge, etc.) */
1918 std::atomic<kmp_uint32> steal_flag; // static_steal only, state of a buffer
1919 kmp_int32 ordered_bumped;
1920 // Stack of buffers for nest of serial regions
1921 struct dispatch_private_info *next;
1922 kmp_int32 type_size; /* the size of types in private_info */
1923#if KMP_USE_HIER_SCHED0
1924 kmp_int32 hier_id;
1925 void *parent; /* hierarchical scheduling parent pointer */
1926#endif
1927 enum cons_type pushed_ws;
1928} dispatch_private_info_t;
1929
1930typedef struct dispatch_shared_info32 {
1931 /* chunk index under dynamic, number of idle threads under static-steal;
1932 iteration index otherwise */
1933 volatile kmp_uint32 iteration;
1934 volatile kmp_int32 num_done;
1935 volatile kmp_uint32 ordered_iteration;
1936 // Dummy to retain the structure size after making ordered_iteration scalar
1937 kmp_int32 ordered_dummy[KMP_MAX_ORDERED8 - 1];
1938} dispatch_shared_info32_t;
1939
1940typedef struct dispatch_shared_info64 {
1941 /* chunk index under dynamic, number of idle threads under static-steal;
1942 iteration index otherwise */
1943 volatile kmp_uint64 iteration;
1944 volatile kmp_int64 num_done;
1945 volatile kmp_uint64 ordered_iteration;
1946 // Dummy to retain the structure size after making ordered_iteration scalar
1947 kmp_int64 ordered_dummy[KMP_MAX_ORDERED8 - 3];
1948} dispatch_shared_info64_t;
1949
1950typedef struct dispatch_shared_info {
1951 union shared_info {
1952 dispatch_shared_info32_t s32;
1953 dispatch_shared_info64_t s64;
1954 } u;
1955 volatile kmp_uint32 buffer_index;
1956 volatile kmp_int32 doacross_buf_idx; // teamwise index
1957 volatile kmp_uint32 *doacross_flags; // shared array of iteration flags (0/1)
1958 kmp_int32 doacross_num_done; // count finished threads
1959#if KMP_USE_HIER_SCHED0
1960 void *hier;
1961#endif
1962#if KMP_USE_HWLOC0
1963 // When linking with libhwloc, the ORDERED EPCC test slows down on big
1964 // machines (> 48 cores). Performance analysis showed that a cache thrash
1965 // was occurring and this padding helps alleviate the problem.
1966 char padding[64];
1967#endif
1968} dispatch_shared_info_t;
1969
1970typedef struct kmp_disp {
1971 /* Vector for ORDERED SECTION */
1972 void (*th_deo_fcn)(int *gtid, int *cid, ident_t *);
1973 /* Vector for END ORDERED SECTION */
1974 void (*th_dxo_fcn)(int *gtid, int *cid, ident_t *);
1975
1976 dispatch_shared_info_t *th_dispatch_sh_current;
1977 dispatch_private_info_t *th_dispatch_pr_current;
1978
1979 dispatch_private_info_t *th_disp_buffer;
1980 kmp_uint32 th_disp_index;
1981 kmp_int32 th_doacross_buf_idx; // thread's doacross buffer index
1982 volatile kmp_uint32 *th_doacross_flags; // pointer to shared array of flags
1983 kmp_int64 *th_doacross_info; // info on loop bounds
1984#if KMP_USE_INTERNODE_ALIGNMENT0
1985 char more_padding[INTERNODE_CACHE_LINE4096];
1986#endif
1987} kmp_disp_t;
1988
1989/* ------------------------------------------------------------------------ */
1990/* Barrier stuff */
1991
1992/* constants for barrier state update */
1993#define KMP_INIT_BARRIER_STATE0 0 /* should probably start from zero */
1994#define KMP_BARRIER_SLEEP_BIT0 0 /* bit used for suspend/sleep part of state */
1995#define KMP_BARRIER_UNUSED_BIT1 1 // bit that must never be set for valid state
1996#define KMP_BARRIER_BUMP_BIT2 2 /* lsb used for bump of go/arrived state */
1997
1998#define KMP_BARRIER_SLEEP_STATE(1 << 0) (1 << KMP_BARRIER_SLEEP_BIT0)
1999#define KMP_BARRIER_UNUSED_STATE(1 << 1) (1 << KMP_BARRIER_UNUSED_BIT1)
2000#define KMP_BARRIER_STATE_BUMP(1 << 2) (1 << KMP_BARRIER_BUMP_BIT2)
2001
2002#if (KMP_BARRIER_SLEEP_BIT0 >= KMP_BARRIER_BUMP_BIT2)
2003#error "Barrier sleep bit must be smaller than barrier bump bit"
2004#endif
2005#if (KMP_BARRIER_UNUSED_BIT1 >= KMP_BARRIER_BUMP_BIT2)
2006#error "Barrier unused bit must be smaller than barrier bump bit"
2007#endif
2008
2009// Constants for release barrier wait state: currently, hierarchical only
2010#define KMP_BARRIER_NOT_WAITING0 0 // Normal state; worker not in wait_sleep
2011#define KMP_BARRIER_OWN_FLAG1 \
2012 1 // Normal state; worker waiting on own b_go flag in release
2013#define KMP_BARRIER_PARENT_FLAG2 \
2014 2 // Special state; worker waiting on parent's b_go flag in release
2015#define KMP_BARRIER_SWITCH_TO_OWN_FLAG3 \
2016 3 // Special state; tells worker to shift from parent to own b_go
2017#define KMP_BARRIER_SWITCHING4 \
2018 4 // Special state; worker resets appropriate flag on wake-up
2019
2020#define KMP_NOT_SAFE_TO_REAP0 \
2021 0 // Thread th_reap_state: not safe to reap (tasking)
2022#define KMP_SAFE_TO_REAP1 1 // Thread th_reap_state: safe to reap (not tasking)
2023
2024// The flag_type describes the storage used for the flag.
2025enum flag_type {
2026 flag32, /**< atomic 32 bit flags */
2027 flag64, /**< 64 bit flags */
2028 atomic_flag64, /**< atomic 64 bit flags */
2029 flag_oncore, /**< special 64-bit flag for on-core barrier (hierarchical) */
2030 flag_unset
2031};
2032
2033enum barrier_type {
2034 bs_plain_barrier = 0, /* 0, All non-fork/join barriers (except reduction
2035 barriers if enabled) */
2036 bs_forkjoin_barrier, /* 1, All fork/join (parallel region) barriers */
2037#if KMP_FAST_REDUCTION_BARRIER1
2038 bs_reduction_barrier, /* 2, All barriers that are used in reduction */
2039#endif // KMP_FAST_REDUCTION_BARRIER
2040 bs_last_barrier /* Just a placeholder to mark the end */
2041};
2042
2043// to work with reduction barriers just like with plain barriers
2044#if !KMP_FAST_REDUCTION_BARRIER1
2045#define bs_reduction_barrier bs_plain_barrier
2046#endif // KMP_FAST_REDUCTION_BARRIER
2047
2048typedef enum kmp_bar_pat { /* Barrier communication patterns */
2049 bp_linear_bar =
2050 0, /* Single level (degenerate) tree */
2051 bp_tree_bar =
2052 1, /* Balanced tree with branching factor 2^n */
2053 bp_hyper_bar = 2, /* Hypercube-embedded tree with min
2054 branching factor 2^n */
2055 bp_hierarchical_bar = 3, /* Machine hierarchy tree */
2056 bp_dist_bar = 4, /* Distributed barrier */
2057 bp_last_bar /* Placeholder to mark the end */
2058} kmp_bar_pat_e;
2059
2060#define KMP_BARRIER_ICV_PUSH1 1
2061
2062/* Record for holding the values of the internal controls stack records */
2063typedef struct kmp_internal_control {
2064 int serial_nesting_level; /* corresponds to the value of the
2065 th_team_serialized field */
2066 kmp_int8 dynamic; /* internal control for dynamic adjustment of threads (per
2067 thread) */
2068 kmp_int8
2069 bt_set; /* internal control for whether blocktime is explicitly set */
2070 int blocktime; /* internal control for blocktime */
2071#if KMP_USE_MONITOR
2072 int bt_intervals; /* internal control for blocktime intervals */
2073#endif
2074 int nproc; /* internal control for #threads for next parallel region (per
2075 thread) */
2076 int thread_limit; /* internal control for thread-limit-var */
2077 int max_active_levels; /* internal control for max_active_levels */
2078 kmp_r_sched_t
2079 sched; /* internal control for runtime schedule {sched,chunk} pair */
2080 kmp_proc_bind_t proc_bind; /* internal control for affinity */
2081 kmp_int32 default_device; /* internal control for default device */
2082 struct kmp_internal_control *next;
2083} kmp_internal_control_t;
2084
2085static inline void copy_icvs(kmp_internal_control_t *dst,
2086 kmp_internal_control_t *src) {
2087 *dst = *src;
2088}
2089
2090/* Thread barrier needs volatile barrier fields */
2091typedef struct KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_bstate {
2092 // th_fixed_icvs is aligned by virtue of kmp_bstate being aligned (and all
2093 // uses of it). It is not explicitly aligned below, because we *don't* want
2094 // it to be padded -- instead, we fit b_go into the same cache line with
2095 // th_fixed_icvs, enabling NGO cache lines stores in the hierarchical barrier.
2096 kmp_internal_control_t th_fixed_icvs; // Initial ICVs for the thread
2097 // Tuck b_go into end of th_fixed_icvs cache line, so it can be stored with
2098 // same NGO store
2099 volatile kmp_uint64 b_go; // STATE => task should proceed (hierarchical)
2100 KMP_ALIGN_CACHE__attribute__((aligned(64))) volatile kmp_uint64
2101 b_arrived; // STATE => task reached synch point.
2102 kmp_uint32 *skip_per_level;
2103 kmp_uint32 my_level;
2104 kmp_int32 parent_tid;
2105 kmp_int32 old_tid;
2106 kmp_uint32 depth;
2107 struct kmp_bstate *parent_bar;
2108 kmp_team_t *team;
2109 kmp_uint64 leaf_state;
2110 kmp_uint32 nproc;
2111 kmp_uint8 base_leaf_kids;
2112 kmp_uint8 leaf_kids;
2113 kmp_uint8 offset;
2114 kmp_uint8 wait_flag;
2115 kmp_uint8 use_oncore_barrier;
2116#if USE_DEBUGGER0
2117 // The following field is intended for the debugger solely. Only the worker
2118 // thread itself accesses this field: the worker increases it by 1 when it
2119 // arrives to a barrier.
2120 KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_uint b_worker_arrived;
2121#endif /* USE_DEBUGGER */
2122} kmp_bstate_t;
2123
2124union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_barrier_union {
2125 double b_align; /* use worst case alignment */
2126 char b_pad[KMP_PAD(kmp_bstate_t, CACHE_LINE)(sizeof(kmp_bstate_t) + (64 - ((sizeof(kmp_bstate_t) - 1) % (
64)) - 1))
];
2127 kmp_bstate_t bb;
2128};
2129
2130typedef union kmp_barrier_union kmp_balign_t;
2131
2132/* Team barrier needs only non-volatile arrived counter */
2133union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_barrier_team_union {
2134 double b_align; /* use worst case alignment */
2135 char b_pad[CACHE_LINE64];
2136 struct {
2137 kmp_uint64 b_arrived; /* STATE => task reached synch point. */
2138#if USE_DEBUGGER0
2139 // The following two fields are indended for the debugger solely. Only
2140 // primary thread of the team accesses these fields: the first one is
2141 // increased by 1 when the primary thread arrives to a barrier, the second
2142 // one is increased by one when all the threads arrived.
2143 kmp_uint b_master_arrived;
2144 kmp_uint b_team_arrived;
2145#endif
2146 };
2147};
2148
2149typedef union kmp_barrier_team_union kmp_balign_team_t;
2150
2151/* Padding for Linux* OS pthreads condition variables and mutexes used to signal
2152 threads when a condition changes. This is to workaround an NPTL bug where
2153 padding was added to pthread_cond_t which caused the initialization routine
2154 to write outside of the structure if compiled on pre-NPTL threads. */
2155#if KMP_OS_WINDOWS0
2156typedef struct kmp_win32_mutex {
2157 /* The Lock */
2158 CRITICAL_SECTION cs;
2159} kmp_win32_mutex_t;
2160
2161typedef struct kmp_win32_cond {
2162 /* Count of the number of waiters. */
2163 int waiters_count_;
2164
2165 /* Serialize access to <waiters_count_> */
2166 kmp_win32_mutex_t waiters_count_lock_;
2167
2168 /* Number of threads to release via a <cond_broadcast> or a <cond_signal> */
2169 int release_count_;
2170
2171 /* Keeps track of the current "generation" so that we don't allow */
2172 /* one thread to steal all the "releases" from the broadcast. */
2173 int wait_generation_count_;
2174
2175 /* A manual-reset event that's used to block and release waiting threads. */
2176 HANDLE event_;
2177} kmp_win32_cond_t;
2178#endif
2179
2180#if KMP_OS_UNIX1
2181
2182union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_cond_union {
2183 double c_align;
2184 char c_pad[CACHE_LINE64];
2185 pthread_cond_t c_cond;
2186};
2187
2188typedef union kmp_cond_union kmp_cond_align_t;
2189
2190union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_mutex_union {
2191 double m_align;
2192 char m_pad[CACHE_LINE64];
2193 pthread_mutex_t m_mutex;
2194};
2195
2196typedef union kmp_mutex_union kmp_mutex_align_t;
2197
2198#endif /* KMP_OS_UNIX */
2199
2200typedef struct kmp_desc_base {
2201 void *ds_stackbase;
2202 size_t ds_stacksize;
2203 int ds_stackgrow;
2204 kmp_thread_t ds_thread;
2205 volatile int ds_tid;
2206 int ds_gtid;
2207#if KMP_OS_WINDOWS0
2208 volatile int ds_alive;
2209 DWORD ds_thread_id;
2210/* ds_thread keeps thread handle on Windows* OS. It is enough for RTL purposes.
2211 However, debugger support (libomp_db) cannot work with handles, because they
2212 uncomparable. For example, debugger requests info about thread with handle h.
2213 h is valid within debugger process, and meaningless within debugee process.
2214 Even if h is duped by call to DuplicateHandle(), so the result h' is valid
2215 within debugee process, but it is a *new* handle which does *not* equal to
2216 any other handle in debugee... The only way to compare handles is convert
2217 them to system-wide ids. GetThreadId() function is available only in
2218 Longhorn and Server 2003. :-( In contrast, GetCurrentThreadId() is available
2219 on all Windows* OS flavours (including Windows* 95). Thus, we have to get
2220 thread id by call to GetCurrentThreadId() from within the thread and save it
2221 to let libomp_db identify threads. */
2222#endif /* KMP_OS_WINDOWS */
2223} kmp_desc_base_t;
2224
2225typedef union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_desc {
2226 double ds_align; /* use worst case alignment */
2227 char ds_pad[KMP_PAD(kmp_desc_base_t, CACHE_LINE)(sizeof(kmp_desc_base_t) + (64 - ((sizeof(kmp_desc_base_t) - 1
) % (64)) - 1))
];
2228 kmp_desc_base_t ds;
2229} kmp_desc_t;
2230
2231typedef struct kmp_local {
2232 volatile int this_construct; /* count of single's encountered by thread */
2233 void *reduce_data;
2234#if KMP_USE_BGET1
2235 void *bget_data;
2236 void *bget_list;
2237#if !USE_CMP_XCHG_FOR_BGET1
2238#ifdef USE_QUEUING_LOCK_FOR_BGET
2239 kmp_lock_t bget_lock; /* Lock for accessing bget free list */
2240#else
2241 kmp_bootstrap_lock_t bget_lock; // Lock for accessing bget free list. Must be
2242// bootstrap lock so we can use it at library
2243// shutdown.
2244#endif /* USE_LOCK_FOR_BGET */
2245#endif /* ! USE_CMP_XCHG_FOR_BGET */
2246#endif /* KMP_USE_BGET */
2247
2248 PACKED_REDUCTION_METHOD_T
2249 packed_reduction_method; /* stored by __kmpc_reduce*(), used by
2250 __kmpc_end_reduce*() */
2251
2252} kmp_local_t;
2253
2254#define KMP_CHECK_UPDATE(a, b)if ((a) != (b)) (a) = (b) \
2255 if ((a) != (b)) \
2256 (a) = (b)
2257#define KMP_CHECK_UPDATE_SYNC(a, b)if ((a) != (b)) (((a))) = (((b))) \
2258 if ((a) != (b)) \
2259 TCW_SYNC_PTR((a), (b))(((a))) = (((b)))
2260
2261#define get__blocktime(xteam, xtid)((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs
.blocktime)
\
2262 ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime)
2263#define get__bt_set(xteam, xtid)((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs
.bt_set)
\
2264 ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set)
2265#if KMP_USE_MONITOR
2266#define get__bt_intervals(xteam, xtid) \
2267 ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals)
2268#endif
2269
2270#define get__dynamic_2(xteam, xtid)((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs
.dynamic)
\
2271 ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.dynamic)
2272#define get__nproc_2(xteam, xtid)((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs
.nproc)
\
2273 ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nproc)
2274#define get__sched_2(xteam, xtid)((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs
.sched)
\
2275 ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.sched)
2276
2277#define set__blocktime_team(xteam, xtid, xval)(((xteam)->t.t_threads[(xtid)]->th.th_current_task->
td_icvs.blocktime) = (xval))
\
2278 (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime) = \
2279 (xval))
2280
2281#if KMP_USE_MONITOR
2282#define set__bt_intervals_team(xteam, xtid, xval) \
2283 (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals) = \
2284 (xval))
2285#endif
2286
2287#define set__bt_set_team(xteam, xtid, xval)(((xteam)->t.t_threads[(xtid)]->th.th_current_task->
td_icvs.bt_set) = (xval))
\
2288 (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set) = (xval))
2289
2290#define set__dynamic(xthread, xval)(((xthread)->th.th_current_task->td_icvs.dynamic) = (xval
))
\
2291 (((xthread)->th.th_current_task->td_icvs.dynamic) = (xval))
2292#define get__dynamic(xthread)(((xthread)->th.th_current_task->td_icvs.dynamic) ? ((!
0)) : (0))
\
2293 (((xthread)->th.th_current_task->td_icvs.dynamic) ? (FTN_TRUE(!0)) : (FTN_FALSE0))
2294
2295#define set__nproc(xthread, xval)(((xthread)->th.th_current_task->td_icvs.nproc) = (xval
))
\
2296 (((xthread)->th.th_current_task->td_icvs.nproc) = (xval))
2297
2298#define set__thread_limit(xthread, xval)(((xthread)->th.th_current_task->td_icvs.thread_limit) =
(xval))
\
2299 (((xthread)->th.th_current_task->td_icvs.thread_limit) = (xval))
2300
2301#define set__max_active_levels(xthread, xval)(((xthread)->th.th_current_task->td_icvs.max_active_levels
) = (xval))
\
2302 (((xthread)->th.th_current_task->td_icvs.max_active_levels) = (xval))
2303
2304#define get__max_active_levels(xthread)((xthread)->th.th_current_task->td_icvs.max_active_levels
)
\
2305 ((xthread)->th.th_current_task->td_icvs.max_active_levels)
2306
2307#define set__sched(xthread, xval)(((xthread)->th.th_current_task->td_icvs.sched) = (xval
))
\
2308 (((xthread)->th.th_current_task->td_icvs.sched) = (xval))
2309
2310#define set__proc_bind(xthread, xval)(((xthread)->th.th_current_task->td_icvs.proc_bind) = (
xval))
\
2311 (((xthread)->th.th_current_task->td_icvs.proc_bind) = (xval))
2312#define get__proc_bind(xthread)((xthread)->th.th_current_task->td_icvs.proc_bind) \
2313 ((xthread)->th.th_current_task->td_icvs.proc_bind)
2314
2315// OpenMP tasking data structures
2316
2317typedef enum kmp_tasking_mode {
2318 tskm_immediate_exec = 0,
2319 tskm_extra_barrier = 1,
2320 tskm_task_teams = 2,
2321 tskm_max = 2
2322} kmp_tasking_mode_t;
2323
2324extern kmp_tasking_mode_t
2325 __kmp_tasking_mode; /* determines how/when to execute tasks */
2326extern int __kmp_task_stealing_constraint;
2327extern int __kmp_enable_task_throttling;
2328extern kmp_int32 __kmp_default_device; // Set via OMP_DEFAULT_DEVICE if
2329// specified, defaults to 0 otherwise
2330// Set via OMP_MAX_TASK_PRIORITY if specified, defaults to 0 otherwise
2331extern kmp_int32 __kmp_max_task_priority;
2332// Set via KMP_TASKLOOP_MIN_TASKS if specified, defaults to 0 otherwise
2333extern kmp_uint64 __kmp_taskloop_min_tasks;
2334
2335/* NOTE: kmp_taskdata_t and kmp_task_t structures allocated in single block with
2336 taskdata first */
2337#define KMP_TASK_TO_TASKDATA(task)(((kmp_taskdata_t *)task) - 1) (((kmp_taskdata_t *)task) - 1)
2338#define KMP_TASKDATA_TO_TASK(taskdata)(kmp_task_t *)(taskdata + 1) (kmp_task_t *)(taskdata + 1)
2339
2340// The tt_found_tasks flag is a signal to all threads in the team that tasks
2341// were spawned and queued since the previous barrier release.
2342#define KMP_TASKING_ENABLED(task_team)((!0) == ((task_team)->tt.tt_found_tasks)) \
2343 (TRUE(!0) == TCR_SYNC_4((task_team)->tt.tt_found_tasks)((task_team)->tt.tt_found_tasks))
2344/*!
2345@ingroup BASIC_TYPES
2346@{
2347*/
2348
2349/*!
2350 */
2351typedef kmp_int32 (*kmp_routine_entry_t)(kmp_int32, void *);
2352
2353typedef union kmp_cmplrdata {
2354 kmp_int32 priority; /**< priority specified by user for the task */
2355 kmp_routine_entry_t
2356 destructors; /* pointer to function to invoke deconstructors of
2357 firstprivate C++ objects */
2358 /* future data */
2359} kmp_cmplrdata_t;
2360
2361/* sizeof_kmp_task_t passed as arg to kmpc_omp_task call */
2362/*!
2363 */
2364typedef struct kmp_task { /* GEH: Shouldn't this be aligned somehow? */
2365 void *shareds; /**< pointer to block of pointers to shared vars */
2366 kmp_routine_entry_t
2367 routine; /**< pointer to routine to call for executing task */
2368 kmp_int32 part_id; /**< part id for the task */
2369 kmp_cmplrdata_t
2370 data1; /* Two known optional additions: destructors and priority */
2371 kmp_cmplrdata_t data2; /* Process destructors first, priority second */
2372 /* future data */
2373 /* private vars */
2374} kmp_task_t;
2375
2376/*!
2377@}
2378*/
2379
2380typedef struct kmp_taskgroup {
2381 std::atomic<kmp_int32> count; // number of allocated and incomplete tasks
2382 std::atomic<kmp_int32>
2383 cancel_request; // request for cancellation of this taskgroup
2384 struct kmp_taskgroup *parent; // parent taskgroup
2385 // Block of data to perform task reduction
2386 void *reduce_data; // reduction related info
2387 kmp_int32 reduce_num_data; // number of data items to reduce
2388 uintptr_t *gomp_data; // gomp reduction data
2389} kmp_taskgroup_t;
2390
2391// forward declarations
2392typedef union kmp_depnode kmp_depnode_t;
2393typedef struct kmp_depnode_list kmp_depnode_list_t;
2394typedef struct kmp_dephash_entry kmp_dephash_entry_t;
2395
2396// macros for checking dep flag as an integer
2397#define KMP_DEP_IN0x1 0x1
2398#define KMP_DEP_OUT0x2 0x2
2399#define KMP_DEP_INOUT0x3 0x3
2400#define KMP_DEP_MTX0x4 0x4
2401#define KMP_DEP_SET0x8 0x8
2402#define KMP_DEP_ALL0x80 0x80
2403// Compiler sends us this info:
2404typedef struct kmp_depend_info {
2405 kmp_intptr_t base_addr;
2406 size_t len;
2407 union {
2408 kmp_uint8 flag; // flag as an unsigned char
2409 struct { // flag as a set of 8 bits
2410 unsigned in : 1;
2411 unsigned out : 1;
2412 unsigned mtx : 1;
2413 unsigned set : 1;
2414 unsigned unused : 3;
2415 unsigned all : 1;
2416 } flags;
2417 };
2418} kmp_depend_info_t;
2419
2420// Internal structures to work with task dependencies:
2421struct kmp_depnode_list {
2422 kmp_depnode_t *node;
2423 kmp_depnode_list_t *next;
2424};
2425
2426// Max number of mutexinoutset dependencies per node
2427#define MAX_MTX_DEPS4 4
2428
2429typedef struct kmp_base_depnode {
2430 kmp_depnode_list_t *successors; /* used under lock */
2431 kmp_task_t *task; /* non-NULL if depnode is active, used under lock */
2432 kmp_lock_t *mtx_locks[MAX_MTX_DEPS4]; /* lock mutexinoutset dependent tasks */
2433 kmp_int32 mtx_num_locks; /* number of locks in mtx_locks array */
2434 kmp_lock_t lock; /* guards shared fields: task, successors */
2435#if KMP_SUPPORT_GRAPH_OUTPUT
2436 kmp_uint32 id;
2437#endif
2438 std::atomic<kmp_int32> npredecessors;
2439 std::atomic<kmp_int32> nrefs;
2440} kmp_base_depnode_t;
2441
2442union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_depnode {
2443 double dn_align; /* use worst case alignment */
2444 char dn_pad[KMP_PAD(kmp_base_depnode_t, CACHE_LINE)(sizeof(kmp_base_depnode_t) + (64 - ((sizeof(kmp_base_depnode_t
) - 1) % (64)) - 1))
];
2445 kmp_base_depnode_t dn;
2446};
2447
2448struct kmp_dephash_entry {
2449 kmp_intptr_t addr;
2450 kmp_depnode_t *last_out;
2451 kmp_depnode_list_t *last_set;
2452 kmp_depnode_list_t *prev_set;
2453 kmp_uint8 last_flag;
2454 kmp_lock_t *mtx_lock; /* is referenced by depnodes w/mutexinoutset dep */
2455 kmp_dephash_entry_t *next_in_bucket;
2456};
2457
2458typedef struct kmp_dephash {
2459 kmp_dephash_entry_t **buckets;
2460 size_t size;
2461 kmp_depnode_t *last_all;
2462 size_t generation;
2463 kmp_uint32 nelements;
2464 kmp_uint32 nconflicts;
2465} kmp_dephash_t;
2466
2467typedef struct kmp_task_affinity_info {
2468 kmp_intptr_t base_addr;
2469 size_t len;
2470 struct {
2471 bool flag1 : 1;
2472 bool flag2 : 1;
2473 kmp_int32 reserved : 30;
2474 } flags;
2475} kmp_task_affinity_info_t;
2476
2477typedef enum kmp_event_type_t {
2478 KMP_EVENT_UNINITIALIZED = 0,
2479 KMP_EVENT_ALLOW_COMPLETION = 1
2480} kmp_event_type_t;
2481
2482typedef struct {
2483 kmp_event_type_t type;
2484 kmp_tas_lock_t lock;
2485 union {
2486 kmp_task_t *task;
2487 } ed;
2488} kmp_event_t;
2489
2490#ifdef BUILD_TIED_TASK_STACK
2491
2492/* Tied Task stack definitions */
2493typedef struct kmp_stack_block {
2494 kmp_taskdata_t *sb_block[TASK_STACK_BLOCK_SIZE];
2495 struct kmp_stack_block *sb_next;
2496 struct kmp_stack_block *sb_prev;
2497} kmp_stack_block_t;
2498
2499typedef struct kmp_task_stack {
2500 kmp_stack_block_t ts_first_block; // first block of stack entries
2501 kmp_taskdata_t **ts_top; // pointer to the top of stack
2502 kmp_int32 ts_entries; // number of entries on the stack
2503} kmp_task_stack_t;
2504
2505#endif // BUILD_TIED_TASK_STACK
2506
2507typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */
2508 /* Compiler flags */ /* Total compiler flags must be 16 bits */
2509 unsigned tiedness : 1; /* task is either tied (1) or untied (0) */
2510 unsigned final : 1; /* task is final(1) so execute immediately */
2511 unsigned merged_if0 : 1; /* no __kmpc_task_{begin/complete}_if0 calls in if0
2512 code path */
2513 unsigned destructors_thunk : 1; /* set if the compiler creates a thunk to
2514 invoke destructors from the runtime */
2515 unsigned proxy : 1; /* task is a proxy task (it will be executed outside the
2516 context of the RTL) */
2517 unsigned priority_specified : 1; /* set if the compiler provides priority
2518 setting for the task */
2519 unsigned detachable : 1; /* 1 == can detach */
2520 unsigned hidden_helper : 1; /* 1 == hidden helper task */
2521 unsigned reserved : 8; /* reserved for compiler use */
2522
2523 /* Library flags */ /* Total library flags must be 16 bits */
2524 unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */
2525 unsigned task_serial : 1; // task is executed immediately (1) or deferred (0)
2526 unsigned tasking_ser : 1; // all tasks in team are either executed immediately
2527 // (1) or may be deferred (0)
2528 unsigned team_serial : 1; // entire team is serial (1) [1 thread] or parallel
2529 // (0) [>= 2 threads]
2530 /* If either team_serial or tasking_ser is set, task team may be NULL */
2531 /* Task State Flags: */
2532 unsigned started : 1; /* 1==started, 0==not started */
2533 unsigned executing : 1; /* 1==executing, 0==not executing */
2534 unsigned complete : 1; /* 1==complete, 0==not complete */
2535 unsigned freed : 1; /* 1==freed, 0==allocated */
2536 unsigned native : 1; /* 1==gcc-compiled task, 0==intel */
2537 unsigned reserved31 : 7; /* reserved for library use */
2538
2539} kmp_tasking_flags_t;
2540
2541typedef struct kmp_target_data {
2542 void *async_handle; // libomptarget async handle for task completion query
2543} kmp_target_data_t;
2544
2545struct kmp_taskdata { /* aligned during dynamic allocation */
2546 kmp_int32 td_task_id; /* id, assigned by debugger */
2547 kmp_tasking_flags_t td_flags; /* task flags */
2548 kmp_team_t *td_team; /* team for this task */
2549 kmp_info_p *td_alloc_thread; /* thread that allocated data structures */
2550 /* Currently not used except for perhaps IDB */
2551 kmp_taskdata_t *td_parent; /* parent task */
2552 kmp_int32 td_level; /* task nesting level */
2553 std::atomic<kmp_int32> td_untied_count; // untied task active parts counter
2554 ident_t *td_ident; /* task identifier */
2555 // Taskwait data.
2556 ident_t *td_taskwait_ident;
2557 kmp_uint32 td_taskwait_counter;
2558 kmp_int32 td_taskwait_thread; /* gtid + 1 of thread encountered taskwait */
2559 KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_internal_control_t
2560 td_icvs; /* Internal control variables for the task */
2561 KMP_ALIGN_CACHE__attribute__((aligned(64))) std::atomic<kmp_int32>
2562 td_allocated_child_tasks; /* Child tasks (+ current task) not yet
2563 deallocated */
2564 std::atomic<kmp_int32>
2565 td_incomplete_child_tasks; /* Child tasks not yet complete */
2566 kmp_taskgroup_t
2567 *td_taskgroup; // Each task keeps pointer to its current taskgroup
2568 kmp_dephash_t
2569 *td_dephash; // Dependencies for children tasks are tracked from here
2570 kmp_depnode_t
2571 *td_depnode; // Pointer to graph node if this task has dependencies
2572 kmp_task_team_t *td_task_team;
2573 size_t td_size_alloc; // Size of task structure, including shareds etc.
2574#if defined(KMP_GOMP_COMPAT)
2575 // 4 or 8 byte integers for the loop bounds in GOMP_taskloop
2576 kmp_int32 td_size_loop_bounds;
2577#endif
2578 kmp_taskdata_t *td_last_tied; // keep tied task for task scheduling constraint
2579#if defined(KMP_GOMP_COMPAT)
2580 // GOMP sends in a copy function for copy constructors
2581 void (*td_copy_func)(void *, void *);
2582#endif
2583 kmp_event_t td_allow_completion_event;
2584#if OMPT_SUPPORT1
2585 ompt_task_info_t ompt_task_info;
2586#endif
2587 kmp_target_data_t td_target_data;
2588}; // struct kmp_taskdata
2589
2590// Make sure padding above worked
2591KMP_BUILD_ASSERT(sizeof(kmp_taskdata_t) % sizeof(void *) == 0)static_assert(sizeof(kmp_taskdata_t) % sizeof(void *) == 0, "Build condition error"
)
;
2592
2593// Data for task team but per thread
2594typedef struct kmp_base_thread_data {
2595 kmp_info_p *td_thr; // Pointer back to thread info
2596 // Used only in __kmp_execute_tasks_template, maybe not avail until task is
2597 // queued?
2598 kmp_bootstrap_lock_t td_deque_lock; // Lock for accessing deque
2599 kmp_taskdata_t *
2600 *td_deque; // Deque of tasks encountered by td_thr, dynamically allocated
2601 kmp_int32 td_deque_size; // Size of deck
2602 kmp_uint32 td_deque_head; // Head of deque (will wrap)
2603 kmp_uint32 td_deque_tail; // Tail of deque (will wrap)
2604 kmp_int32 td_deque_ntasks; // Number of tasks in deque
2605 // GEH: shouldn't this be volatile since used in while-spin?
2606 kmp_int32 td_deque_last_stolen; // Thread number of last successful steal
2607#ifdef BUILD_TIED_TASK_STACK
2608 kmp_task_stack_t td_susp_tied_tasks; // Stack of suspended tied tasks for task
2609// scheduling constraint
2610#endif // BUILD_TIED_TASK_STACK
2611} kmp_base_thread_data_t;
2612
2613#define TASK_DEQUE_BITS8 8 // Used solely to define INITIAL_TASK_DEQUE_SIZE
2614#define INITIAL_TASK_DEQUE_SIZE(1 << 8) (1 << TASK_DEQUE_BITS8)
2615
2616#define TASK_DEQUE_SIZE(td)((td).td_deque_size) ((td).td_deque_size)
2617#define TASK_DEQUE_MASK(td)((td).td_deque_size - 1) ((td).td_deque_size - 1)
2618
2619typedef union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_thread_data {
2620 kmp_base_thread_data_t td;
2621 double td_align; /* use worst case alignment */
2622 char td_pad[KMP_PAD(kmp_base_thread_data_t, CACHE_LINE)(sizeof(kmp_base_thread_data_t) + (64 - ((sizeof(kmp_base_thread_data_t
) - 1) % (64)) - 1))
];
2623} kmp_thread_data_t;
2624
2625typedef struct kmp_task_pri {
2626 kmp_thread_data_t td;
2627 kmp_int32 priority;
2628 kmp_task_pri *next;
2629} kmp_task_pri_t;
2630
2631// Data for task teams which are used when tasking is enabled for the team
2632typedef struct kmp_base_task_team {
2633 kmp_bootstrap_lock_t
2634 tt_threads_lock; /* Lock used to allocate per-thread part of task team */
2635 /* must be bootstrap lock since used at library shutdown*/
2636
2637 // TODO: check performance vs kmp_tas_lock_t
2638 kmp_bootstrap_lock_t tt_task_pri_lock; /* Lock to access priority tasks */
2639 kmp_task_pri_t *tt_task_pri_list;
2640
2641 kmp_task_team_t *tt_next; /* For linking the task team free list */
2642 kmp_thread_data_t
2643 *tt_threads_data; /* Array of per-thread structures for task team */
2644 /* Data survives task team deallocation */
2645 kmp_int32 tt_found_tasks; /* Have we found tasks and queued them while
2646 executing this team? */
2647 /* TRUE means tt_threads_data is set up and initialized */
2648 kmp_int32 tt_nproc; /* #threads in team */
2649 kmp_int32 tt_max_threads; // # entries allocated for threads_data array
2650 kmp_int32 tt_found_proxy_tasks; // found proxy tasks since last barrier
2651 kmp_int32 tt_untied_task_encountered;
2652 std::atomic<kmp_int32> tt_num_task_pri; // number of priority tasks enqueued
2653 // There is hidden helper thread encountered in this task team so that we must
2654 // wait when waiting on task team
2655 kmp_int32 tt_hidden_helper_task_encountered;
2656
2657 KMP_ALIGN_CACHE__attribute__((aligned(64)))
2658 std::atomic<kmp_int32> tt_unfinished_threads; /* #threads still active */
2659
2660 KMP_ALIGN_CACHE__attribute__((aligned(64)))
2661 volatile kmp_uint32
2662 tt_active; /* is the team still actively executing tasks */
2663} kmp_base_task_team_t;
2664
2665union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_task_team {
2666 kmp_base_task_team_t tt;
2667 double tt_align; /* use worst case alignment */
2668 char tt_pad[KMP_PAD(kmp_base_task_team_t, CACHE_LINE)(sizeof(kmp_base_task_team_t) + (64 - ((sizeof(kmp_base_task_team_t
) - 1) % (64)) - 1))
];
2669};
2670
2671#if (USE_FAST_MEMORY3 == 3) || (USE_FAST_MEMORY3 == 5)
2672// Free lists keep same-size free memory slots for fast memory allocation
2673// routines
2674typedef struct kmp_free_list {
2675 void *th_free_list_self; // Self-allocated tasks free list
2676 void *th_free_list_sync; // Self-allocated tasks stolen/returned by other
2677 // threads
2678 void *th_free_list_other; // Non-self free list (to be returned to owner's
2679 // sync list)
2680} kmp_free_list_t;
2681#endif
2682#if KMP_NESTED_HOT_TEAMS1
2683// Hot teams array keeps hot teams and their sizes for given thread. Hot teams
2684// are not put in teams pool, and they don't put threads in threads pool.
2685typedef struct kmp_hot_team_ptr {
2686 kmp_team_p *hot_team; // pointer to hot_team of given nesting level
2687 kmp_int32 hot_team_nth; // number of threads allocated for the hot_team
2688} kmp_hot_team_ptr_t;
2689#endif
2690typedef struct kmp_teams_size {
2691 kmp_int32 nteams; // number of teams in a league
2692 kmp_int32 nth; // number of threads in each team of the league
2693} kmp_teams_size_t;
2694
2695// This struct stores a thread that acts as a "root" for a contention
2696// group. Contention groups are rooted at kmp_root threads, but also at
2697// each primary thread of each team created in the teams construct.
2698// This struct therefore also stores a thread_limit associated with
2699// that contention group, and a counter to track the number of threads
2700// active in that contention group. Each thread has a list of these: CG
2701// root threads have an entry in their list in which cg_root refers to
2702// the thread itself, whereas other workers in the CG will have a
2703// single entry where cg_root is same as the entry containing their CG
2704// root. When a thread encounters a teams construct, it will add a new
2705// entry to the front of its list, because it now roots a new CG.
2706typedef struct kmp_cg_root {
2707 kmp_info_p *cg_root; // "root" thread for a contention group
2708 // The CG root's limit comes from OMP_THREAD_LIMIT for root threads, or
2709 // thread_limit clause for teams primary threads
2710 kmp_int32 cg_thread_limit;
2711 kmp_int32 cg_nthreads; // Count of active threads in CG rooted at cg_root
2712 struct kmp_cg_root *up; // pointer to higher level CG root in list
2713} kmp_cg_root_t;
2714
2715// OpenMP thread data structures
2716
2717typedef struct KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_base_info {
2718 /* Start with the readonly data which is cache aligned and padded. This is
2719 written before the thread starts working by the primary thread. Uber
2720 masters may update themselves later. Usage does not consider serialized
2721 regions. */
2722 kmp_desc_t th_info;
2723 kmp_team_p *th_team; /* team we belong to */
2724 kmp_root_p *th_root; /* pointer to root of task hierarchy */
2725 kmp_info_p *th_next_pool; /* next available thread in the pool */
2726 kmp_disp_t *th_dispatch; /* thread's dispatch data */
2727 int th_in_pool; /* in thread pool (32 bits for TCR/TCW) */
2728
2729 /* The following are cached from the team info structure */
2730 /* TODO use these in more places as determined to be needed via profiling */
2731 int th_team_nproc; /* number of threads in a team */
2732 kmp_info_p *th_team_master; /* the team's primary thread */
2733 int th_team_serialized; /* team is serialized */
2734 microtask_t th_teams_microtask; /* save entry address for teams construct */
2735 int th_teams_level; /* save initial level of teams construct */
2736/* it is 0 on device but may be any on host */
2737
2738/* The blocktime info is copied from the team struct to the thread struct */
2739/* at the start of a barrier, and the values stored in the team are used */
2740/* at points in the code where the team struct is no longer guaranteed */
2741/* to exist (from the POV of worker threads). */
2742#if KMP_USE_MONITOR
2743 int th_team_bt_intervals;
2744 int th_team_bt_set;
2745#else
2746 kmp_uint64 th_team_bt_intervals;
2747#endif
2748
2749#if KMP_AFFINITY_SUPPORTED1
2750 kmp_affin_mask_t *th_affin_mask; /* thread's current affinity mask */
2751 kmp_affinity_ids_t th_topology_ids; /* thread's current topology ids */
2752 kmp_affinity_attrs_t th_topology_attrs; /* thread's current topology attrs */
2753#endif
2754 omp_allocator_handle_t th_def_allocator; /* default allocator */
2755 /* The data set by the primary thread at reinit, then R/W by the worker */
2756 KMP_ALIGN_CACHE__attribute__((aligned(64))) int
2757 th_set_nproc; /* if > 0, then only use this request for the next fork */
2758#if KMP_NESTED_HOT_TEAMS1
2759 kmp_hot_team_ptr_t *th_hot_teams; /* array of hot teams */
2760#endif
2761 kmp_proc_bind_t
2762 th_set_proc_bind; /* if != proc_bind_default, use request for next fork */
2763 kmp_teams_size_t
2764 th_teams_size; /* number of teams/threads in teams construct */
2765#if KMP_AFFINITY_SUPPORTED1
2766 int th_current_place; /* place currently bound to */
2767 int th_new_place; /* place to bind to in par reg */
2768 int th_first_place; /* first place in partition */
2769 int th_last_place; /* last place in partition */
2770#endif
2771 int th_prev_level; /* previous level for affinity format */
2772 int th_prev_num_threads; /* previous num_threads for affinity format */
2773#if USE_ITT_BUILD1
2774 kmp_uint64 th_bar_arrive_time; /* arrival to barrier timestamp */
2775 kmp_uint64 th_bar_min_time; /* minimum arrival time at the barrier */
2776 kmp_uint64 th_frame_time; /* frame timestamp */
2777#endif /* USE_ITT_BUILD */
2778 kmp_local_t th_local;
2779 struct private_common *th_pri_head;
2780
2781 /* Now the data only used by the worker (after initial allocation) */
2782 /* TODO the first serial team should actually be stored in the info_t
2783 structure. this will help reduce initial allocation overhead */
2784 KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_team_p
2785 *th_serial_team; /*serialized team held in reserve*/
2786
2787#if OMPT_SUPPORT1
2788 ompt_thread_info_t ompt_thread_info;
2789#endif
2790
2791 /* The following are also read by the primary thread during reinit */
2792 struct common_table *th_pri_common;
2793
2794 volatile kmp_uint32 th_spin_here; /* thread-local location for spinning */
2795 /* while awaiting queuing lock acquire */
2796
2797 volatile void *th_sleep_loc; // this points at a kmp_flag<T>
2798 flag_type th_sleep_loc_type; // enum type of flag stored in th_sleep_loc
2799
2800 ident_t *th_ident;
2801 unsigned th_x; // Random number generator data
2802 unsigned th_a; // Random number generator data
2803
2804 /* Tasking-related data for the thread */
2805 kmp_task_team_t *th_task_team; // Task team struct
2806 kmp_taskdata_t *th_current_task; // Innermost Task being executed
2807 kmp_uint8 th_task_state; // alternating 0/1 for task team identification
2808 kmp_uint8 *th_task_state_memo_stack; // Stack holding memos of th_task_state
2809 // at nested levels
2810 kmp_uint32 th_task_state_top; // Top element of th_task_state_memo_stack
2811 kmp_uint32 th_task_state_stack_sz; // Size of th_task_state_memo_stack
2812 kmp_uint32 th_reap_state; // Non-zero indicates thread is not
2813 // tasking, thus safe to reap
2814
2815 /* More stuff for keeping track of active/sleeping threads (this part is
2816 written by the worker thread) */
2817 kmp_uint8 th_active_in_pool; // included in count of #active threads in pool
2818 int th_active; // ! sleeping; 32 bits for TCR/TCW
2819 std::atomic<kmp_uint32> th_used_in_team; // Flag indicating use in team
2820 // 0 = not used in team; 1 = used in team;
2821 // 2 = transitioning to not used in team; 3 = transitioning to used in team
2822 struct cons_header *th_cons; // used for consistency check
2823#if KMP_USE_HIER_SCHED0
2824 // used for hierarchical scheduling
2825 kmp_hier_private_bdata_t *th_hier_bar_data;
2826#endif
2827
2828 /* Add the syncronizing data which is cache aligned and padded. */
2829 KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_balign_t th_bar[bs_last_barrier];
2830
2831 KMP_ALIGN_CACHE__attribute__((aligned(64))) volatile kmp_int32
2832 th_next_waiting; /* gtid+1 of next thread on lock wait queue, 0 if none */
2833
2834#if (USE_FAST_MEMORY3 == 3) || (USE_FAST_MEMORY3 == 5)
2835#define NUM_LISTS4 4
2836 kmp_free_list_t th_free_lists[NUM_LISTS4]; // Free lists for fast memory
2837// allocation routines
2838#endif
2839
2840#if KMP_OS_WINDOWS0
2841 kmp_win32_cond_t th_suspend_cv;
2842 kmp_win32_mutex_t th_suspend_mx;
2843 std::atomic<int> th_suspend_init;
2844#endif
2845#if KMP_OS_UNIX1
2846 kmp_cond_align_t th_suspend_cv;
2847 kmp_mutex_align_t th_suspend_mx;
2848 std::atomic<int> th_suspend_init_count;
2849#endif
2850
2851#if USE_ITT_BUILD1
2852 kmp_itt_mark_t th_itt_mark_single;
2853// alignment ???
2854#endif /* USE_ITT_BUILD */
2855#if KMP_STATS_ENABLED0
2856 kmp_stats_list *th_stats;
2857#endif
2858#if KMP_OS_UNIX1
2859 std::atomic<bool> th_blocking;
2860#endif
2861 kmp_cg_root_t *th_cg_roots; // list of cg_roots associated with this thread
2862} kmp_base_info_t;
2863
2864typedef union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_info {
2865 double th_align; /* use worst case alignment */
2866 char th_pad[KMP_PAD(kmp_base_info_t, CACHE_LINE)(sizeof(kmp_base_info_t) + (64 - ((sizeof(kmp_base_info_t) - 1
) % (64)) - 1))
];
2867 kmp_base_info_t th;
2868} kmp_info_t;
2869
2870// OpenMP thread team data structures
2871
2872typedef struct kmp_base_data {
2873 volatile kmp_uint32 t_value;
2874} kmp_base_data_t;
2875
2876typedef union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_sleep_team {
2877 double dt_align; /* use worst case alignment */
2878 char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)(sizeof(kmp_base_data_t) + (64 - ((sizeof(kmp_base_data_t) - 1
) % (64)) - 1))
];
2879 kmp_base_data_t dt;
2880} kmp_sleep_team_t;
2881
2882typedef union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_ordered_team {
2883 double dt_align; /* use worst case alignment */
2884 char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)(sizeof(kmp_base_data_t) + (64 - ((sizeof(kmp_base_data_t) - 1
) % (64)) - 1))
];
2885 kmp_base_data_t dt;
2886} kmp_ordered_team_t;
2887
2888typedef int (*launch_t)(int gtid);
2889
2890/* Minimum number of ARGV entries to malloc if necessary */
2891#define KMP_MIN_MALLOC_ARGV_ENTRIES100 100
2892
2893// Set up how many argv pointers will fit in cache lines containing
2894// t_inline_argv. Historically, we have supported at least 96 bytes. Using a
2895// larger value for more space between the primary write/worker read section and
2896// read/write by all section seems to buy more performance on EPCC PARALLEL.
2897#if KMP_ARCH_X860 || KMP_ARCH_X86_641
2898#define KMP_INLINE_ARGV_BYTES(4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 * sizeof
(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) % 64))
\
2899 (4 * CACHE_LINE64 - \
2900 ((3 * KMP_PTR_SKIP(sizeof(void *)) + 2 * sizeof(int) + 2 * sizeof(kmp_int8) + \
2901 sizeof(kmp_int16) + sizeof(kmp_uint32)) % \
2902 CACHE_LINE64))
2903#else
2904#define KMP_INLINE_ARGV_BYTES(4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 * sizeof
(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) % 64))
\
2905 (2 * CACHE_LINE64 - ((3 * KMP_PTR_SKIP(sizeof(void *)) + 2 * sizeof(int)) % CACHE_LINE64))
2906#endif
2907#define KMP_INLINE_ARGV_ENTRIES(int)((4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 *
sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) %
64)) / (sizeof(void *)))
(int)(KMP_INLINE_ARGV_BYTES(4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 * sizeof
(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) % 64))
/ KMP_PTR_SKIP(sizeof(void *)))
2908
2909typedef struct KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_base_team {
2910 // Synchronization Data
2911 // ---------------------------------------------------------------------------
2912 KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_ordered_team_t t_ordered;
2913 kmp_balign_team_t t_bar[bs_last_barrier];
2914 std::atomic<int> t_construct; // count of single directive encountered by team
2915 char pad[sizeof(kmp_lock_t)]; // padding to maintain performance on big iron
2916
2917 // [0] - parallel / [1] - worksharing task reduction data shared by taskgroups
2918 std::atomic<void *> t_tg_reduce_data[2]; // to support task modifier
2919 std::atomic<int> t_tg_fini_counter[2]; // sync end of task reductions
2920
2921 // Primary thread only
2922 // ---------------------------------------------------------------------------
2923 KMP_ALIGN_CACHE__attribute__((aligned(64))) int t_master_tid; // tid of primary thread in parent team
2924 int t_master_this_cons; // "this_construct" single counter of primary thread
2925 // in parent team
2926 ident_t *t_ident; // if volatile, have to change too much other crud to
2927 // volatile too
2928 kmp_team_p *t_parent; // parent team
2929 kmp_team_p *t_next_pool; // next free team in the team pool
2930 kmp_disp_t *t_dispatch; // thread's dispatch data
2931 kmp_task_team_t *t_task_team[2]; // Task team struct; switch between 2
2932 kmp_proc_bind_t t_proc_bind; // bind type for par region
2933#if USE_ITT_BUILD1
2934 kmp_uint64 t_region_time; // region begin timestamp
2935#endif /* USE_ITT_BUILD */
2936
2937 // Primary thread write, workers read
2938 // --------------------------------------------------------------------------
2939 KMP_ALIGN_CACHE__attribute__((aligned(64))) void **t_argv;
2940 int t_argc;
2941 int t_nproc; // number of threads in team
2942 microtask_t t_pkfn;
2943 launch_t t_invoke; // procedure to launch the microtask
2944
2945#if OMPT_SUPPORT1
2946 ompt_team_info_t ompt_team_info;
2947 ompt_lw_taskteam_t *ompt_serialized_team_info;
2948#endif
2949
2950#if KMP_ARCH_X860 || KMP_ARCH_X86_641
2951 kmp_int8 t_fp_control_saved;
2952 kmp_int8 t_pad2b;
2953 kmp_int16 t_x87_fpu_control_word; // FP control regs
2954 kmp_uint32 t_mxcsr;
2955#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
2956
2957 void *t_inline_argv[KMP_INLINE_ARGV_ENTRIES(int)((4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 *
sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) %
64)) / (sizeof(void *)))
];
2958
2959 KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_info_t **t_threads;
2960 kmp_taskdata_t
2961 *t_implicit_task_taskdata; // Taskdata for the thread's implicit task
2962 int t_level; // nested parallel level
2963
2964 KMP_ALIGN_CACHE__attribute__((aligned(64))) int t_max_argc;
2965 int t_max_nproc; // max threads this team can handle (dynamically expandable)
2966 int t_serialized; // levels deep of serialized teams
2967 dispatch_shared_info_t *t_disp_buffer; // buffers for dispatch system
2968 int t_id; // team's id, assigned by debugger.
2969 int t_active_level; // nested active parallel level
2970 kmp_r_sched_t t_sched; // run-time schedule for the team
2971#if KMP_AFFINITY_SUPPORTED1
2972 int t_first_place; // first & last place in parent thread's partition.
2973 int t_last_place; // Restore these values to primary thread after par region.
2974#endif // KMP_AFFINITY_SUPPORTED
2975 int t_display_affinity;
2976 int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via
2977 // omp_set_num_threads() call
2978 omp_allocator_handle_t t_def_allocator; /* default allocator */
2979
2980// Read/write by workers as well
2981#if (KMP_ARCH_X860 || KMP_ARCH_X86_641)
2982 // Using CACHE_LINE=64 reduces memory footprint, but causes a big perf
2983 // regression of epcc 'parallel' and 'barrier' on fxe256lin01. This extra
2984 // padding serves to fix the performance of epcc 'parallel' and 'barrier' when
2985 // CACHE_LINE=64. TODO: investigate more and get rid if this padding.
2986 char dummy_padding[1024];
2987#endif
2988 // Internal control stack for additional nested teams.
2989 KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_internal_control_t *t_control_stack_top;
2990 // for SERIALIZED teams nested 2 or more levels deep
2991 // typed flag to store request state of cancellation
2992 std::atomic<kmp_int32> t_cancel_request;
2993 int t_master_active; // save on fork, restore on join
2994 void *t_copypriv_data; // team specific pointer to copyprivate data array
2995#if KMP_OS_WINDOWS0
2996 std::atomic<kmp_uint32> t_copyin_counter;
2997#endif
2998#if USE_ITT_BUILD1
2999 void *t_stack_id; // team specific stack stitching id (for ittnotify)
3000#endif /* USE_ITT_BUILD */
3001 distributedBarrier *b; // Distributed barrier data associated with team
3002} kmp_base_team_t;
3003
3004union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_team {
3005 kmp_base_team_t t;
3006 double t_align; /* use worst case alignment */
3007 char t_pad[KMP_PAD(kmp_base_team_t, CACHE_LINE)(sizeof(kmp_base_team_t) + (64 - ((sizeof(kmp_base_team_t) - 1
) % (64)) - 1))
];
3008};
3009
3010typedef union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_time_global {
3011 double dt_align; /* use worst case alignment */
3012 char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)(sizeof(kmp_base_data_t) + (64 - ((sizeof(kmp_base_data_t) - 1
) % (64)) - 1))
];
3013 kmp_base_data_t dt;
3014} kmp_time_global_t;
3015
3016typedef struct kmp_base_global {
3017 /* cache-aligned */
3018 kmp_time_global_t g_time;
3019
3020 /* non cache-aligned */
3021 volatile int g_abort;
3022 volatile int g_done;
3023
3024 int g_dynamic;
3025 enum dynamic_mode g_dynamic_mode;
3026} kmp_base_global_t;
3027
3028typedef union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_global {
3029 kmp_base_global_t g;
3030 double g_align; /* use worst case alignment */
3031 char g_pad[KMP_PAD(kmp_base_global_t, CACHE_LINE)(sizeof(kmp_base_global_t) + (64 - ((sizeof(kmp_base_global_t
) - 1) % (64)) - 1))
];
3032} kmp_global_t;
3033
3034typedef struct kmp_base_root {
3035 // TODO: GEH - combine r_active with r_in_parallel then r_active ==
3036 // (r_in_parallel>= 0)
3037 // TODO: GEH - then replace r_active with t_active_levels if we can to reduce
3038 // the synch overhead or keeping r_active
3039 volatile int r_active; /* TRUE if some region in a nest has > 1 thread */
3040 // keeps a count of active parallel regions per root
3041 std::atomic<int> r_in_parallel;
3042 // GEH: This is misnamed, should be r_active_levels
3043 kmp_team_t *r_root_team;
3044 kmp_team_t *r_hot_team;
3045 kmp_info_t *r_uber_thread;
3046 kmp_lock_t r_begin_lock;
3047 volatile int r_begin;
3048 int r_blocktime; /* blocktime for this root and descendants */
3049#if KMP_AFFINITY_SUPPORTED1
3050 int r_affinity_assigned;
3051#endif // KMP_AFFINITY_SUPPORTED
3052} kmp_base_root_t;
3053
3054typedef union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_root {
3055 kmp_base_root_t r;
3056 double r_align; /* use worst case alignment */
3057 char r_pad[KMP_PAD(kmp_base_root_t, CACHE_LINE)(sizeof(kmp_base_root_t) + (64 - ((sizeof(kmp_base_root_t) - 1
) % (64)) - 1))
];
3058} kmp_root_t;
3059
3060struct fortran_inx_info {
3061 kmp_int32 data;
3062};
3063
3064// This list type exists to hold old __kmp_threads arrays so that
3065// old references to them may complete while reallocation takes place when
3066// expanding the array. The items in this list are kept alive until library
3067// shutdown.
3068typedef struct kmp_old_threads_list_t {
3069 kmp_info_t **threads;
3070 struct kmp_old_threads_list_t *next;
3071} kmp_old_threads_list_t;
3072
3073/* ------------------------------------------------------------------------ */
3074
3075extern int __kmp_settings;
3076extern int __kmp_duplicate_library_ok;
3077#if USE_ITT_BUILD1
3078extern int __kmp_forkjoin_frames;
3079extern int __kmp_forkjoin_frames_mode;
3080#endif
3081extern PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method;
3082extern int __kmp_determ_red;
3083
3084#ifdef KMP_DEBUG1
3085extern int kmp_a_debug;
3086extern int kmp_b_debug;
3087extern int kmp_c_debug;
3088extern int kmp_d_debug;
3089extern int kmp_e_debug;
3090extern int kmp_f_debug;
3091#endif /* KMP_DEBUG */
3092
3093/* For debug information logging using rotating buffer */
3094#define KMP_DEBUG_BUF_LINES_INIT512 512
3095#define KMP_DEBUG_BUF_LINES_MIN1 1
3096
3097#define KMP_DEBUG_BUF_CHARS_INIT128 128
3098#define KMP_DEBUG_BUF_CHARS_MIN2 2
3099
3100extern int
3101 __kmp_debug_buf; /* TRUE means use buffer, FALSE means print to stderr */
3102extern int __kmp_debug_buf_lines; /* How many lines of debug stored in buffer */
3103extern int
3104 __kmp_debug_buf_chars; /* How many characters allowed per line in buffer */
3105extern int __kmp_debug_buf_atomic; /* TRUE means use atomic update of buffer
3106 entry pointer */
3107
3108extern char *__kmp_debug_buffer; /* Debug buffer itself */
3109extern std::atomic<int> __kmp_debug_count; /* Counter for number of lines
3110 printed in buffer so far */
3111extern int __kmp_debug_buf_warn_chars; /* Keep track of char increase
3112 recommended in warnings */
3113/* end rotating debug buffer */
3114
3115#ifdef KMP_DEBUG1
3116extern int __kmp_par_range; /* +1 => only go par for constructs in range */
3117
3118#define KMP_PAR_RANGE_ROUTINE_LEN1024 1024
3119extern char __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN1024];
3120#define KMP_PAR_RANGE_FILENAME_LEN1024 1024
3121extern char __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN1024];
3122extern int __kmp_par_range_lb;
3123extern int __kmp_par_range_ub;
3124#endif
3125
3126/* For printing out dynamic storage map for threads and teams */
3127extern int
3128 __kmp_storage_map; /* True means print storage map for threads and teams */
3129extern int __kmp_storage_map_verbose; /* True means storage map includes
3130 placement info */
3131extern int __kmp_storage_map_verbose_specified;
3132
3133#if KMP_ARCH_X860 || KMP_ARCH_X86_641
3134extern kmp_cpuinfo_t __kmp_cpuinfo;
3135static inline bool __kmp_is_hybrid_cpu() { return __kmp_cpuinfo.flags.hybrid; }
3136#elif KMP_OS_DARWIN0 && KMP_ARCH_AARCH640
3137static inline bool __kmp_is_hybrid_cpu() { return true; }
3138#else
3139static inline bool __kmp_is_hybrid_cpu() { return false; }
3140#endif
3141
3142extern volatile int __kmp_init_serial;
3143extern volatile int __kmp_init_gtid;
3144extern volatile int __kmp_init_common;
3145extern volatile int __kmp_need_register_serial;
3146extern volatile int __kmp_init_middle;
3147extern volatile int __kmp_init_parallel;
3148#if KMP_USE_MONITOR
3149extern volatile int __kmp_init_monitor;
3150#endif
3151extern volatile int __kmp_init_user_locks;
3152extern volatile int __kmp_init_hidden_helper_threads;
3153extern int __kmp_init_counter;
3154extern int __kmp_root_counter;
3155extern int __kmp_version;
3156
3157/* list of address of allocated caches for commons */
3158extern kmp_cached_addr_t *__kmp_threadpriv_cache_list;
3159
3160/* Barrier algorithm types and options */
3161extern kmp_uint32 __kmp_barrier_gather_bb_dflt;
3162extern kmp_uint32 __kmp_barrier_release_bb_dflt;
3163extern kmp_bar_pat_e __kmp_barrier_gather_pat_dflt;
3164extern kmp_bar_pat_e __kmp_barrier_release_pat_dflt;
3165extern kmp_uint32 __kmp_barrier_gather_branch_bits[bs_last_barrier];
3166extern kmp_uint32 __kmp_barrier_release_branch_bits[bs_last_barrier];
3167extern kmp_bar_pat_e __kmp_barrier_gather_pattern[bs_last_barrier];
3168extern kmp_bar_pat_e __kmp_barrier_release_pattern[bs_last_barrier];
3169extern char const *__kmp_barrier_branch_bit_env_name[bs_last_barrier];
3170extern char const *__kmp_barrier_pattern_env_name[bs_last_barrier];
3171extern char const *__kmp_barrier_type_name[bs_last_barrier];
3172extern char const *__kmp_barrier_pattern_name[bp_last_bar];
3173
3174/* Global Locks */
3175extern kmp_bootstrap_lock_t __kmp_initz_lock; /* control initialization */
3176extern kmp_bootstrap_lock_t __kmp_forkjoin_lock; /* control fork/join access */
3177extern kmp_bootstrap_lock_t __kmp_task_team_lock;
3178extern kmp_bootstrap_lock_t
3179 __kmp_exit_lock; /* exit() is not always thread-safe */
3180#if KMP_USE_MONITOR
3181extern kmp_bootstrap_lock_t
3182 __kmp_monitor_lock; /* control monitor thread creation */
3183#endif
3184extern kmp_bootstrap_lock_t
3185 __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and
3186 __kmp_threads expansion to co-exist */
3187
3188extern kmp_lock_t __kmp_global_lock; /* control OS/global access */
3189extern kmp_queuing_lock_t __kmp_dispatch_lock; /* control dispatch access */
3190extern kmp_lock_t __kmp_debug_lock; /* control I/O access for KMP_DEBUG */
3191
3192extern enum library_type __kmp_library;
3193
3194extern enum sched_type __kmp_sched; /* default runtime scheduling */
3195extern enum sched_type __kmp_static; /* default static scheduling method */
3196extern enum sched_type __kmp_guided; /* default guided scheduling method */
3197extern enum sched_type __kmp_auto; /* default auto scheduling method */
3198extern int __kmp_chunk; /* default runtime chunk size */
3199extern int __kmp_force_monotonic; /* whether monotonic scheduling forced */
3200
3201extern size_t __kmp_stksize; /* stack size per thread */
3202#if KMP_USE_MONITOR
3203extern size_t __kmp_monitor_stksize; /* stack size for monitor thread */
3204#endif
3205extern size_t __kmp_stkoffset; /* stack offset per thread */
3206extern int __kmp_stkpadding; /* Should we pad root thread(s) stack */
3207
3208extern size_t
3209 __kmp_malloc_pool_incr; /* incremental size of pool for kmp_malloc() */
3210extern int __kmp_env_stksize; /* was KMP_STACKSIZE specified? */
3211extern int __kmp_env_blocktime; /* was KMP_BLOCKTIME specified? */
3212extern int __kmp_env_checks; /* was KMP_CHECKS specified? */
3213extern int __kmp_env_consistency_check; // was KMP_CONSISTENCY_CHECK specified?
3214extern int __kmp_generate_warnings; /* should we issue warnings? */
3215extern int __kmp_reserve_warn; /* have we issued reserve_threads warning? */
3216
3217#ifdef DEBUG_SUSPEND
3218extern int __kmp_suspend_count; /* count inside __kmp_suspend_template() */
3219#endif
3220
3221extern kmp_int32 __kmp_use_yield;
3222extern kmp_int32 __kmp_use_yield_exp_set;
3223extern kmp_uint32 __kmp_yield_init;
3224extern kmp_uint32 __kmp_yield_next;
3225extern kmp_uint64 __kmp_pause_init;
3226
3227/* ------------------------------------------------------------------------- */
3228extern int __kmp_allThreadsSpecified;
3229
3230extern size_t __kmp_align_alloc;
3231/* following data protected by initialization routines */
3232extern int __kmp_xproc; /* number of processors in the system */
3233extern int __kmp_avail_proc; /* number of processors available to the process */
3234extern size_t __kmp_sys_min_stksize; /* system-defined minimum stack size */
3235extern int __kmp_sys_max_nth; /* system-imposed maximum number of threads */
3236// maximum total number of concurrently-existing threads on device
3237extern int __kmp_max_nth;
3238// maximum total number of concurrently-existing threads in a contention group
3239extern int __kmp_cg_max_nth;
3240extern int __kmp_teams_max_nth; // max threads used in a teams construct
3241extern int __kmp_threads_capacity; /* capacity of the arrays __kmp_threads and
3242 __kmp_root */
3243extern int __kmp_dflt_team_nth; /* default number of threads in a parallel
3244 region a la OMP_NUM_THREADS */
3245extern int __kmp_dflt_team_nth_ub; /* upper bound on "" determined at serial
3246 initialization */
3247extern int __kmp_tp_capacity; /* capacity of __kmp_threads if threadprivate is
3248 used (fixed) */
3249extern int __kmp_tp_cached; /* whether threadprivate cache has been created
3250 (__kmpc_threadprivate_cached()) */
3251extern int __kmp_dflt_blocktime; /* number of milliseconds to wait before
3252 blocking (env setting) */
3253extern bool __kmp_wpolicy_passive; /* explicitly set passive wait policy */
3254#if KMP_USE_MONITOR
3255extern int
3256 __kmp_monitor_wakeups; /* number of times monitor wakes up per second */
3257extern int __kmp_bt_intervals; /* number of monitor timestamp intervals before
3258 blocking */
3259#endif
3260#ifdef KMP_ADJUST_BLOCKTIME1
3261extern int __kmp_zero_bt; /* whether blocktime has been forced to zero */
3262#endif /* KMP_ADJUST_BLOCKTIME */
3263#ifdef KMP_DFLT_NTH_CORES
3264extern int __kmp_ncores; /* Total number of cores for threads placement */
3265#endif
3266/* Number of millisecs to delay on abort for Intel(R) VTune(TM) tools */
3267extern int __kmp_abort_delay;
3268
3269extern int __kmp_need_register_atfork_specified;
3270extern int __kmp_need_register_atfork; /* At initialization, call pthread_atfork
3271 to install fork handler */
3272extern int __kmp_gtid_mode; /* Method of getting gtid, values:
3273 0 - not set, will be set at runtime
3274 1 - using stack search
3275 2 - dynamic TLS (pthread_getspecific(Linux* OS/OS
3276 X*) or TlsGetValue(Windows* OS))
3277 3 - static TLS (__declspec(thread) __kmp_gtid),
3278 Linux* OS .so only. */
3279extern int
3280 __kmp_adjust_gtid_mode; /* If true, adjust method based on #threads */
3281#ifdef KMP_TDATA_GTID1
3282extern KMP_THREAD_LOCAL__thread int __kmp_gtid;
3283#endif
3284extern int __kmp_tls_gtid_min; /* #threads below which use sp search for gtid */
3285extern int __kmp_foreign_tp; // If true, separate TP var for each foreign thread
3286#if KMP_ARCH_X860 || KMP_ARCH_X86_641
3287extern int __kmp_inherit_fp_control; // copy fp creg(s) parent->workers at fork
3288extern kmp_int16 __kmp_init_x87_fpu_control_word; // init thread's FP ctrl reg
3289extern kmp_uint32 __kmp_init_mxcsr; /* init thread's mxscr */
3290#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
3291
3292// max_active_levels for nested parallelism enabled by default via
3293// OMP_MAX_ACTIVE_LEVELS, OMP_NESTED, OMP_NUM_THREADS, and OMP_PROC_BIND
3294extern int __kmp_dflt_max_active_levels;
3295// Indicates whether value of __kmp_dflt_max_active_levels was already
3296// explicitly set by OMP_MAX_ACTIVE_LEVELS or OMP_NESTED=false
3297extern bool __kmp_dflt_max_active_levels_set;
3298extern int __kmp_dispatch_num_buffers; /* max possible dynamic loops in
3299 concurrent execution per team */
3300#if KMP_NESTED_HOT_TEAMS1
3301extern int __kmp_hot_teams_mode;
3302extern int __kmp_hot_teams_max_level;
3303#endif
3304
3305#if KMP_OS_LINUX1
3306extern enum clock_function_type __kmp_clock_function;
3307extern int __kmp_clock_function_param;
3308#endif /* KMP_OS_LINUX */
3309
3310#if KMP_MIC_SUPPORTED((0 || 1) && (1 || 0))
3311extern enum mic_type __kmp_mic_type;
3312#endif
3313
3314#ifdef USE_LOAD_BALANCE1
3315extern double __kmp_load_balance_interval; // load balance algorithm interval
3316#endif /* USE_LOAD_BALANCE */
3317
3318// OpenMP 3.1 - Nested num threads array
3319typedef struct kmp_nested_nthreads_t {
3320 int *nth;
3321 int size;
3322 int used;
3323} kmp_nested_nthreads_t;
3324
3325extern kmp_nested_nthreads_t __kmp_nested_nth;
3326
3327#if KMP_USE_ADAPTIVE_LOCKS(0 || 1) && !0
3328
3329// Parameters for the speculative lock backoff system.
3330struct kmp_adaptive_backoff_params_t {
3331 // Number of soft retries before it counts as a hard retry.
3332 kmp_uint32 max_soft_retries;
3333 // Badness is a bit mask : 0,1,3,7,15,... on each hard failure we move one to
3334 // the right
3335 kmp_uint32 max_badness;
3336};
3337
3338extern kmp_adaptive_backoff_params_t __kmp_adaptive_backoff_params;
3339
3340#if KMP_DEBUG_ADAPTIVE_LOCKS0
3341extern const char *__kmp_speculative_statsfile;
3342#endif
3343
3344#endif // KMP_USE_ADAPTIVE_LOCKS
3345
3346extern int __kmp_display_env; /* TRUE or FALSE */
3347extern int __kmp_display_env_verbose; /* TRUE if OMP_DISPLAY_ENV=VERBOSE */
3348extern int __kmp_omp_cancellation; /* TRUE or FALSE */
3349extern int __kmp_nteams;
3350extern int __kmp_teams_thread_limit;
3351
3352/* ------------------------------------------------------------------------- */
3353
3354/* the following are protected by the fork/join lock */
3355/* write: lock read: anytime */
3356extern kmp_info_t **__kmp_threads; /* Descriptors for the threads */
3357/* Holds old arrays of __kmp_threads until library shutdown */
3358extern kmp_old_threads_list_t *__kmp_old_threads_list;
3359/* read/write: lock */
3360extern volatile kmp_team_t *__kmp_team_pool;
3361extern volatile kmp_info_t *__kmp_thread_pool;
3362extern kmp_info_t *__kmp_thread_pool_insert_pt;
3363
3364// total num threads reachable from some root thread including all root threads
3365extern volatile int __kmp_nth;
3366/* total number of threads reachable from some root thread including all root
3367 threads, and those in the thread pool */
3368extern volatile int __kmp_all_nth;
3369extern std::atomic<int> __kmp_thread_pool_active_nth;
3370
3371extern kmp_root_t **__kmp_root; /* root of thread hierarchy */
3372/* end data protected by fork/join lock */
3373/* ------------------------------------------------------------------------- */
3374
3375#define __kmp_get_gtid()__kmp_get_global_thread_id() __kmp_get_global_thread_id()
3376#define __kmp_entry_gtid()__kmp_get_global_thread_id_reg() __kmp_get_global_thread_id_reg()
3377#define __kmp_get_tid()(__kmp_tid_from_gtid(__kmp_get_global_thread_id())) (__kmp_tid_from_gtid(__kmp_get_gtid()__kmp_get_global_thread_id()))
3378#define __kmp_get_team()(__kmp_threads[(__kmp_get_global_thread_id())]->th.th_team
)
(__kmp_threads[(__kmp_get_gtid()__kmp_get_global_thread_id())]->th.th_team)
3379#define __kmp_get_thread()(__kmp_thread_from_gtid(__kmp_get_global_thread_id())) (__kmp_thread_from_gtid(__kmp_get_gtid()__kmp_get_global_thread_id()))
3380
3381// AT: Which way is correct?
3382// AT: 1. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc;
3383// AT: 2. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team_nproc;
3384#define __kmp_get_team_num_threads(gtid)(__kmp_threads[(gtid)]->th.th_team->t.t_nproc) \
3385 (__kmp_threads[(gtid)]->th.th_team->t.t_nproc)
3386
3387static inline bool KMP_UBER_GTID(int gtid) {
3388 KMP_DEBUG_ASSERT(gtid >= KMP_GTID_MIN)if (!(gtid >= (-6))) { __kmp_debug_assert("gtid >= (-6)"
, "openmp/runtime/src/kmp.h", 3388); }
;
3389 KMP_DEBUG_ASSERT(gtid < __kmp_threads_capacity)if (!(gtid < __kmp_threads_capacity)) { __kmp_debug_assert
("gtid < __kmp_threads_capacity", "openmp/runtime/src/kmp.h"
, 3389); }
;
3390 return (gtid >= 0 && __kmp_root[gtid] && __kmp_threads[gtid] &&
3391 __kmp_threads[gtid] == __kmp_root[gtid]->r.r_uber_thread);
3392}
3393
3394static inline int __kmp_tid_from_gtid(int gtid) {
3395 KMP_DEBUG_ASSERT(gtid >= 0)if (!(gtid >= 0)) { __kmp_debug_assert("gtid >= 0", "openmp/runtime/src/kmp.h"
, 3395); }
;
3396 return __kmp_threads[gtid]->th.th_info.ds.ds_tid;
3397}
3398
3399static inline int __kmp_gtid_from_tid(int tid, const kmp_team_t *team) {
3400 KMP_DEBUG_ASSERT(tid >= 0 && team)if (!(tid >= 0 && team)) { __kmp_debug_assert("tid >= 0 && team"
, "openmp/runtime/src/kmp.h", 3400); }
;
3401 return team->t.t_threads[tid]->th.th_info.ds.ds_gtid;
3402}
3403
3404static inline int __kmp_gtid_from_thread(const kmp_info_t *thr) {
3405 KMP_DEBUG_ASSERT(thr)if (!(thr)) { __kmp_debug_assert("thr", "openmp/runtime/src/kmp.h"
, 3405); }
;
3406 return thr->th.th_info.ds.ds_gtid;
3407}
3408
3409static inline kmp_info_t *__kmp_thread_from_gtid(int gtid) {
3410 KMP_DEBUG_ASSERT(gtid >= 0)if (!(gtid >= 0)) { __kmp_debug_assert("gtid >= 0", "openmp/runtime/src/kmp.h"
, 3410); }
;
3411 return __kmp_threads[gtid];
3412}
3413
3414static inline kmp_team_t *__kmp_team_from_gtid(int gtid) {
3415 KMP_DEBUG_ASSERT(gtid >= 0)if (!(gtid >= 0)) { __kmp_debug_assert("gtid >= 0", "openmp/runtime/src/kmp.h"
, 3415); }
;
3416 return __kmp_threads[gtid]->th.th_team;
3417}
3418
3419static inline void __kmp_assert_valid_gtid(kmp_int32 gtid) {
3420 if (UNLIKELY(gtid < 0 || gtid >= __kmp_threads_capacity)__builtin_expect(!!(gtid < 0 || gtid >= __kmp_threads_capacity
), 0)
)
3421 KMP_FATAL(ThreadIdentInvalid)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_ThreadIdentInvalid)
, __kmp_msg_null)
;
3422}
3423
3424#if KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0) || KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0)
3425extern int __kmp_user_level_mwait; // TRUE or FALSE; from KMP_USER_LEVEL_MWAIT
3426extern int __kmp_umwait_enabled; // Runtime check if user-level mwait enabled
3427extern int __kmp_mwait_enabled; // Runtime check if ring3 mwait is enabled
3428extern int __kmp_mwait_hints; // Hints to pass in to mwait
3429#endif
3430
3431#if KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0)
3432extern int __kmp_waitpkg_enabled; // Runtime check if waitpkg exists
3433extern int __kmp_tpause_state; // 0 (default), 1=C0.1, 2=C0.2; from KMP_TPAUSE
3434extern int __kmp_tpause_hint; // 1=C0.1 (default), 0=C0.2; from KMP_TPAUSE
3435extern int __kmp_tpause_enabled; // 0 (default), 1 (KMP_TPAUSE is non-zero)
3436#endif
3437
3438/* ------------------------------------------------------------------------- */
3439
3440extern kmp_global_t __kmp_global; /* global status */
3441
3442extern kmp_info_t __kmp_monitor;
3443// For Debugging Support Library
3444extern std::atomic<kmp_int32> __kmp_team_counter;
3445// For Debugging Support Library
3446extern std::atomic<kmp_int32> __kmp_task_counter;
3447
3448#if USE_DEBUGGER0
3449#define _KMP_GEN_ID(counter)(~0) \
3450 (__kmp_debugging ? KMP_ATOMIC_INC(&counter)(&counter)->fetch_add(1, std::memory_order_acq_rel) + 1 : ~0)
3451#else
3452#define _KMP_GEN_ID(counter)(~0) (~0)
3453#endif /* USE_DEBUGGER */
3454
3455#define KMP_GEN_TASK_ID()(~0) _KMP_GEN_ID(__kmp_task_counter)(~0)
3456#define KMP_GEN_TEAM_ID()(~0) _KMP_GEN_ID(__kmp_team_counter)(~0)
3457
3458/* ------------------------------------------------------------------------ */
3459
3460extern void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2,
3461 size_t size, char const *format, ...);
3462
3463extern void __kmp_serial_initialize(void);
3464extern void __kmp_middle_initialize(void);
3465extern void __kmp_parallel_initialize(void);
3466
3467extern void __kmp_internal_begin(void);
3468extern void __kmp_internal_end_library(int gtid);
3469extern void __kmp_internal_end_thread(int gtid);
3470extern void __kmp_internal_end_atexit(void);
3471extern void __kmp_internal_end_dtor(void);
3472extern void __kmp_internal_end_dest(void *);
3473
3474extern int __kmp_register_root(int initial_thread);
3475extern void __kmp_unregister_root(int gtid);
3476extern void __kmp_unregister_library(void); // called by __kmp_internal_end()
3477
3478extern int __kmp_ignore_mppbeg(void);
3479extern int __kmp_ignore_mppend(void);
3480
3481extern int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws);
3482extern void __kmp_exit_single(int gtid);
3483
3484extern void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref);
3485extern void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref);
3486
3487#ifdef USE_LOAD_BALANCE1
3488extern int __kmp_get_load_balance(int);
3489#endif
3490
3491extern int __kmp_get_global_thread_id(void);
3492extern int __kmp_get_global_thread_id_reg(void);
3493extern void __kmp_exit_thread(int exit_status);
3494extern void __kmp_abort(char const *format, ...);
3495extern void __kmp_abort_thread(void);
3496KMP_NORETURN[[noreturn]] extern void __kmp_abort_process(void);
3497extern void __kmp_warn(char const *format, ...);
3498
3499extern void __kmp_set_num_threads(int new_nth, int gtid);
3500
3501// Returns current thread (pointer to kmp_info_t). Current thread *must* be
3502// registered.
3503static inline kmp_info_t *__kmp_entry_thread() {
3504 int gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
3505
3506 return __kmp_threads[gtid];
3507}
3508
3509extern void __kmp_set_max_active_levels(int gtid, int new_max_active_levels);
3510extern int __kmp_get_max_active_levels(int gtid);
3511extern int __kmp_get_ancestor_thread_num(int gtid, int level);
3512extern int __kmp_get_team_size(int gtid, int level);
3513extern void __kmp_set_schedule(int gtid, kmp_sched_t new_sched, int chunk);
3514extern void __kmp_get_schedule(int gtid, kmp_sched_t *sched, int *chunk);
3515
3516extern unsigned short __kmp_get_random(kmp_info_t *thread);
3517extern void __kmp_init_random(kmp_info_t *thread);
3518
3519extern kmp_r_sched_t __kmp_get_schedule_global(void);
3520extern void __kmp_adjust_num_threads(int new_nproc);
3521extern void __kmp_check_stksize(size_t *val);
3522
3523extern void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL, char const *_file_, int _line_);
3524extern void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL, char const *_file_, int _line_);
3525extern void ___kmp_free(void *ptr KMP_SRC_LOC_DECL, char const *_file_, int _line_);
3526#define __kmp_allocate(size)___kmp_allocate((size), "openmp/runtime/src/kmp.h", 3526) ___kmp_allocate((size)KMP_SRC_LOC_CURR, "openmp/runtime/src/kmp.h", 3526)
3527#define __kmp_page_allocate(size)___kmp_page_allocate((size), "openmp/runtime/src/kmp.h", 3527
)
___kmp_page_allocate((size)KMP_SRC_LOC_CURR, "openmp/runtime/src/kmp.h", 3527)
3528#define __kmp_free(ptr)___kmp_free((ptr), "openmp/runtime/src/kmp.h", 3528) ___kmp_free((ptr)KMP_SRC_LOC_CURR, "openmp/runtime/src/kmp.h", 3528)
3529
3530#if USE_FAST_MEMORY3
3531extern void *___kmp_fast_allocate(kmp_info_t *this_thr,
3532 size_t size KMP_SRC_LOC_DECL, char const *_file_, int _line_);
3533extern void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL, char const *_file_, int _line_);
3534extern void __kmp_free_fast_memory(kmp_info_t *this_thr);
3535extern void __kmp_initialize_fast_memory(kmp_info_t *this_thr);
3536#define __kmp_fast_allocate(this_thr, size)___kmp_fast_allocate((this_thr), (size), "openmp/runtime/src/kmp.h"
, 3536)
\
3537 ___kmp_fast_allocate((this_thr), (size)KMP_SRC_LOC_CURR, "openmp/runtime/src/kmp.h", 3537)
3538#define __kmp_fast_free(this_thr, ptr)___kmp_fast_free((this_thr), (ptr), "openmp/runtime/src/kmp.h"
, 3538)
\
3539 ___kmp_fast_free((this_thr), (ptr)KMP_SRC_LOC_CURR, "openmp/runtime/src/kmp.h", 3539)
3540#endif
3541
3542extern void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL, char const *_file_, int _line_);
3543extern void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem,
3544 size_t elsize KMP_SRC_LOC_DECL, char const *_file_, int _line_);
3545extern void *___kmp_thread_realloc(kmp_info_t *th, void *ptr,
3546 size_t size KMP_SRC_LOC_DECL, char const *_file_, int _line_);
3547extern void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL, char const *_file_, int _line_);
3548#define __kmp_thread_malloc(th, size)___kmp_thread_malloc((th), (size), "openmp/runtime/src/kmp.h"
, 3548)
\
3549 ___kmp_thread_malloc((th), (size)KMP_SRC_LOC_CURR, "openmp/runtime/src/kmp.h", 3549)
3550#define __kmp_thread_calloc(th, nelem, elsize)___kmp_thread_calloc((th), (nelem), (elsize), "openmp/runtime/src/kmp.h"
, 3550)
\
3551 ___kmp_thread_calloc((th), (nelem), (elsize)KMP_SRC_LOC_CURR, "openmp/runtime/src/kmp.h", 3551)
3552#define __kmp_thread_realloc(th, ptr, size)___kmp_thread_realloc((th), (ptr), (size), "openmp/runtime/src/kmp.h"
, 3552)
\
3553 ___kmp_thread_realloc((th), (ptr), (size)KMP_SRC_LOC_CURR, "openmp/runtime/src/kmp.h", 3553)
3554#define __kmp_thread_free(th, ptr)___kmp_thread_free((th), (ptr), "openmp/runtime/src/kmp.h", 3554
)
\
3555 ___kmp_thread_free((th), (ptr)KMP_SRC_LOC_CURR, "openmp/runtime/src/kmp.h", 3555)
3556
3557extern void __kmp_push_num_threads(ident_t *loc, int gtid, int num_threads);
3558
3559extern void __kmp_push_proc_bind(ident_t *loc, int gtid,
3560 kmp_proc_bind_t proc_bind);
3561extern void __kmp_push_num_teams(ident_t *loc, int gtid, int num_teams,
3562 int num_threads);
3563extern void __kmp_push_num_teams_51(ident_t *loc, int gtid, int num_teams_lb,
3564 int num_teams_ub, int num_threads);
3565
3566extern void __kmp_yield();
3567
3568extern void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
3569 enum sched_type schedule, kmp_int32 lb,
3570 kmp_int32 ub, kmp_int32 st, kmp_int32 chunk);
3571extern void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
3572 enum sched_type schedule, kmp_uint32 lb,
3573 kmp_uint32 ub, kmp_int32 st,
3574 kmp_int32 chunk);
3575extern void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
3576 enum sched_type schedule, kmp_int64 lb,
3577 kmp_int64 ub, kmp_int64 st, kmp_int64 chunk);
3578extern void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
3579 enum sched_type schedule, kmp_uint64 lb,
3580 kmp_uint64 ub, kmp_int64 st,
3581 kmp_int64 chunk);
3582
3583extern int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid,
3584 kmp_int32 *p_last, kmp_int32 *p_lb,
3585 kmp_int32 *p_ub, kmp_int32 *p_st);
3586extern int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid,
3587 kmp_int32 *p_last, kmp_uint32 *p_lb,
3588 kmp_uint32 *p_ub, kmp_int32 *p_st);
3589extern int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid,
3590 kmp_int32 *p_last, kmp_int64 *p_lb,
3591 kmp_int64 *p_ub, kmp_int64 *p_st);
3592extern int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid,
3593 kmp_int32 *p_last, kmp_uint64 *p_lb,
3594 kmp_uint64 *p_ub, kmp_int64 *p_st);
3595
3596extern void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid);
3597extern void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid);
3598extern void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid);
3599extern void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid);
3600
3601#ifdef KMP_GOMP_COMPAT
3602
3603extern void __kmp_aux_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
3604 enum sched_type schedule, kmp_int32 lb,
3605 kmp_int32 ub, kmp_int32 st,
3606 kmp_int32 chunk, int push_ws);
3607extern void __kmp_aux_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
3608 enum sched_type schedule, kmp_uint32 lb,
3609 kmp_uint32 ub, kmp_int32 st,
3610 kmp_int32 chunk, int push_ws);
3611extern void __kmp_aux_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
3612 enum sched_type schedule, kmp_int64 lb,
3613 kmp_int64 ub, kmp_int64 st,
3614 kmp_int64 chunk, int push_ws);
3615extern void __kmp_aux_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
3616 enum sched_type schedule, kmp_uint64 lb,
3617 kmp_uint64 ub, kmp_int64 st,
3618 kmp_int64 chunk, int push_ws);
3619extern void __kmp_aux_dispatch_fini_chunk_4(ident_t *loc, kmp_int32 gtid);
3620extern void __kmp_aux_dispatch_fini_chunk_8(ident_t *loc, kmp_int32 gtid);
3621extern void __kmp_aux_dispatch_fini_chunk_4u(ident_t *loc, kmp_int32 gtid);
3622extern void __kmp_aux_dispatch_fini_chunk_8u(ident_t *loc, kmp_int32 gtid);
3623
3624#endif /* KMP_GOMP_COMPAT */
3625
3626extern kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker);
3627extern kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker);
3628extern kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker);
3629extern kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker);
3630extern kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker);
3631extern kmp_uint32 __kmp_wait_4(kmp_uint32 volatile *spinner, kmp_uint32 checker,
3632 kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
3633 void *obj);
3634extern void __kmp_wait_4_ptr(void *spinner, kmp_uint32 checker,
3635 kmp_uint32 (*pred)(void *, kmp_uint32), void *obj);
3636
3637extern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64<> *flag,
3638 int final_spin
3639#if USE_ITT_BUILD1
3640 ,
3641 void *itt_sync_obj
3642#endif
3643);
3644extern void __kmp_release_64(kmp_flag_64<> *flag);
3645
3646extern void __kmp_infinite_loop(void);
3647
3648extern void __kmp_cleanup(void);
3649
3650#if KMP_HANDLE_SIGNALS(1 || 0)
3651extern int __kmp_handle_signals;
3652extern void __kmp_install_signals(int parallel_init);
3653extern void __kmp_remove_signals(void);
3654#endif
3655
3656extern void __kmp_clear_system_time(void);
3657extern void __kmp_read_system_time(double *delta);
3658
3659extern void __kmp_check_stack_overlap(kmp_info_t *thr);
3660
3661extern void __kmp_expand_host_name(char *buffer, size_t size);
3662extern void __kmp_expand_file_name(char *result, size_t rlen, char *pattern);
3663
3664#if KMP_ARCH_X860 || KMP_ARCH_X86_641 || (KMP_OS_WINDOWS0 && (KMP_ARCH_AARCH640 || KMP_ARCH_ARM))
3665extern void
3666__kmp_initialize_system_tick(void); /* Initialize timer tick value */
3667#endif
3668
3669extern void
3670__kmp_runtime_initialize(void); /* machine specific initialization */
3671extern void __kmp_runtime_destroy(void);
3672
3673#if KMP_AFFINITY_SUPPORTED1
3674extern char *__kmp_affinity_print_mask(char *buf, int buf_len,
3675 kmp_affin_mask_t *mask);
3676extern kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf,
3677 kmp_affin_mask_t *mask);
3678extern void __kmp_affinity_initialize(kmp_affinity_t &affinity);
3679extern void __kmp_affinity_uninitialize(void);
3680extern void __kmp_affinity_set_init_mask(
3681 int gtid, int isa_root); /* set affinity according to KMP_AFFINITY */
3682extern void __kmp_affinity_set_place(int gtid);
3683extern void __kmp_affinity_determine_capable(const char *env_var);
3684extern int __kmp_aux_set_affinity(void **mask);
3685extern int __kmp_aux_get_affinity(void **mask);
3686extern int __kmp_aux_get_affinity_max_proc();
3687extern int __kmp_aux_set_affinity_mask_proc(int proc, void **mask);
3688extern int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask);
3689extern int __kmp_aux_get_affinity_mask_proc(int proc, void **mask);
3690extern void __kmp_balanced_affinity(kmp_info_t *th, int team_size);
3691#if KMP_OS_LINUX1 || KMP_OS_FREEBSD0
3692extern int kmp_set_thread_affinity_mask_initial(void);
3693#endif
3694static inline void __kmp_assign_root_init_mask() {
3695 int gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
11
Value assigned to 'ompt_enabled.enabled', which participates in a condition later
12
Value assigned to 'ompt_enabled.ompt_callback_parallel_begin', which participates in a condition later
3696 kmp_root_t *r = __kmp_threads[gtid]->th.th_root;
3697 if (r->r.r_uber_thread == __kmp_threads[gtid] && !r->r.r_affinity_assigned) {
13
Assuming the condition is false
3698 __kmp_affinity_set_init_mask(gtid, TRUE(!0));
3699 r->r.r_affinity_assigned = TRUE(!0);
3700 }
3701}
3702static inline void __kmp_reset_root_init_mask(int gtid) {
3703 if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0))
3704 return;
3705 kmp_info_t *th = __kmp_threads[gtid];
3706 kmp_root_t *r = th->th.th_root;
3707 if (r->r.r_uber_thread == th && r->r.r_affinity_assigned) {
3708 __kmp_set_system_affinity(__kmp_affin_origMask, FALSE)(__kmp_affin_origMask)->set_system_affinity(0);
3709 KMP_CPU_COPY(th->th.th_affin_mask, __kmp_affin_origMask)(th->th.th_affin_mask)->copy(__kmp_affin_origMask);
3710 r->r.r_affinity_assigned = FALSE0;
3711 }
3712}
3713#else /* KMP_AFFINITY_SUPPORTED */
3714#define __kmp_assign_root_init_mask() /* Nothing */
3715static inline void __kmp_reset_root_init_mask(int gtid) {}
3716#endif /* KMP_AFFINITY_SUPPORTED */
3717// No need for KMP_AFFINITY_SUPPORTED guard as only one field in the
3718// format string is for affinity, so platforms that do not support
3719// affinity can still use the other fields, e.g., %n for num_threads
3720extern size_t __kmp_aux_capture_affinity(int gtid, const char *format,
3721 kmp_str_buf_t *buffer);
3722extern void __kmp_aux_display_affinity(int gtid, const char *format);
3723
3724extern void __kmp_cleanup_hierarchy();
3725extern void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar);
3726
3727#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
3728
3729extern int __kmp_futex_determine_capable(void);
3730
3731#endif // KMP_USE_FUTEX
3732
3733extern void __kmp_gtid_set_specific(int gtid);
3734extern int __kmp_gtid_get_specific(void);
3735
3736extern double __kmp_read_cpu_time(void);
3737
3738extern int __kmp_read_system_info(struct kmp_sys_info *info);
3739
3740#if KMP_USE_MONITOR
3741extern void __kmp_create_monitor(kmp_info_t *th);
3742#endif
3743
3744extern void *__kmp_launch_thread(kmp_info_t *thr);
3745
3746extern void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size);
3747
3748#if KMP_OS_WINDOWS0
3749extern int __kmp_still_running(kmp_info_t *th);
3750extern int __kmp_is_thread_alive(kmp_info_t *th, DWORD *exit_val);
3751extern void __kmp_free_handle(kmp_thread_t tHandle);
3752#endif
3753
3754#if KMP_USE_MONITOR
3755extern void __kmp_reap_monitor(kmp_info_t *th);
3756#endif
3757extern void __kmp_reap_worker(kmp_info_t *th);
3758extern void __kmp_terminate_thread(int gtid);
3759
3760extern int __kmp_try_suspend_mx(kmp_info_t *th);
3761extern void __kmp_lock_suspend_mx(kmp_info_t *th);
3762extern void __kmp_unlock_suspend_mx(kmp_info_t *th);
3763
3764extern void __kmp_elapsed(double *);
3765extern void __kmp_elapsed_tick(double *);
3766
3767extern void __kmp_enable(int old_state);
3768extern void __kmp_disable(int *old_state);
3769
3770extern void __kmp_thread_sleep(int millis);
3771
3772extern void __kmp_common_initialize(void);
3773extern void __kmp_common_destroy(void);
3774extern void __kmp_common_destroy_gtid(int gtid);
3775
3776#if KMP_OS_UNIX1
3777extern void __kmp_register_atfork(void);
3778#endif
3779extern void __kmp_suspend_initialize(void);
3780extern void __kmp_suspend_initialize_thread(kmp_info_t *th);
3781extern void __kmp_suspend_uninitialize_thread(kmp_info_t *th);
3782
3783extern kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
3784 int tid);
3785extern kmp_team_t *
3786__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
3787#if OMPT_SUPPORT1
3788 ompt_data_t ompt_parallel_data,
3789#endif
3790 kmp_proc_bind_t proc_bind, kmp_internal_control_t *new_icvs,
3791 int argc USE_NESTED_HOT_ARG(kmp_info_t *thr), kmp_info_t *thr);
3792extern void __kmp_free_thread(kmp_info_t *);
3793extern void __kmp_free_team(kmp_root_t *,
3794 kmp_team_t *USE_NESTED_HOT_ARG(kmp_info_t *), kmp_info_t *);
3795extern kmp_team_t *__kmp_reap_team(kmp_team_t *);
3796
3797/* ------------------------------------------------------------------------ */
3798
3799extern void __kmp_initialize_bget(kmp_info_t *th);
3800extern void __kmp_finalize_bget(kmp_info_t *th);
3801
3802KMP_EXPORTextern void *kmpc_malloc(size_t size);
3803KMP_EXPORTextern void *kmpc_aligned_malloc(size_t size, size_t alignment);
3804KMP_EXPORTextern void *kmpc_calloc(size_t nelem, size_t elsize);
3805KMP_EXPORTextern void *kmpc_realloc(void *ptr, size_t size);
3806KMP_EXPORTextern void kmpc_free(void *ptr);
3807
3808/* declarations for internal use */
3809
3810extern int __kmp_barrier(enum barrier_type bt, int gtid, int is_split,
3811 size_t reduce_size, void *reduce_data,
3812 void (*reduce)(void *, void *));
3813extern void __kmp_end_split_barrier(enum barrier_type bt, int gtid);
3814extern int __kmp_barrier_gomp_cancel(int gtid);
3815
3816/*!
3817 * Tell the fork call which compiler generated the fork call, and therefore how
3818 * to deal with the call.
3819 */
3820enum fork_context_e {
3821 fork_context_gnu, /**< Called from GNU generated code, so must not invoke the
3822 microtask internally. */
3823 fork_context_intel, /**< Called from Intel generated code. */
3824 fork_context_last
3825};
3826extern int __kmp_fork_call(ident_t *loc, int gtid,
3827 enum fork_context_e fork_context, kmp_int32 argc,
3828 microtask_t microtask, launch_t invoker,
3829 kmp_va_list ap);
3830
3831extern void __kmp_join_call(ident_t *loc, int gtid
3832#if OMPT_SUPPORT1
3833 ,
3834 enum fork_context_e fork_context
3835#endif
3836 ,
3837 int exit_teams = 0);
3838
3839extern void __kmp_serialized_parallel(ident_t *id, kmp_int32 gtid);
3840extern void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team);
3841extern void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team);
3842extern int __kmp_invoke_task_func(int gtid);
3843extern void __kmp_run_before_invoked_task(int gtid, int tid,
3844 kmp_info_t *this_thr,
3845 kmp_team_t *team);
3846extern void __kmp_run_after_invoked_task(int gtid, int tid,
3847 kmp_info_t *this_thr,
3848 kmp_team_t *team);
3849
3850// should never have been exported
3851KMP_EXPORTextern int __kmpc_invoke_task_func(int gtid);
3852extern int __kmp_invoke_teams_master(int gtid);
3853extern void __kmp_teams_master(int gtid);
3854extern int __kmp_aux_get_team_num();
3855extern int __kmp_aux_get_num_teams();
3856extern void __kmp_save_internal_controls(kmp_info_t *thread);
3857extern void __kmp_user_set_library(enum library_type arg);
3858extern void __kmp_aux_set_library(enum library_type arg);
3859extern void __kmp_aux_set_stacksize(size_t arg);
3860extern void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid);
3861extern void __kmp_aux_set_defaults(char const *str, size_t len);
3862
3863/* Functions called from __kmp_aux_env_initialize() in kmp_settings.cpp */
3864void kmpc_set_blocktime(int arg);
3865void ompc_set_nested(int flag);
3866void ompc_set_dynamic(int flag);
3867void ompc_set_num_threads(int arg);
3868
3869extern void __kmp_push_current_task_to_thread(kmp_info_t *this_thr,
3870 kmp_team_t *team, int tid);
3871extern void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr);
3872extern kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
3873 kmp_tasking_flags_t *flags,
3874 size_t sizeof_kmp_task_t,
3875 size_t sizeof_shareds,
3876 kmp_routine_entry_t task_entry);
3877extern void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr,
3878 kmp_team_t *team, int tid,
3879 int set_curr_task);
3880extern void __kmp_finish_implicit_task(kmp_info_t *this_thr);
3881extern void __kmp_free_implicit_task(kmp_info_t *this_thr);
3882
3883extern kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3884 int gtid,
3885 kmp_task_t *task);
3886extern void __kmp_fulfill_event(kmp_event_t *event);
3887
3888extern void __kmp_free_task_team(kmp_info_t *thread,
3889 kmp_task_team_t *task_team);
3890extern void __kmp_reap_task_teams(void);
3891extern void __kmp_wait_to_unref_task_teams(void);
3892extern void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team,
3893 int always);
3894extern void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team);
3895extern void __kmp_task_team_wait(kmp_info_t *this_thr, kmp_team_t *team
3896#if USE_ITT_BUILD1
3897 ,
3898 void *itt_sync_obj
3899#endif /* USE_ITT_BUILD */
3900 ,
3901 int wait = 1);
3902extern void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread,
3903 int gtid);
3904
3905extern int __kmp_is_address_mapped(void *addr);
3906extern kmp_uint64 __kmp_hardware_timestamp(void);
3907
3908#if KMP_OS_UNIX1
3909extern int __kmp_read_from_file(char const *path, char const *format, ...);
3910#endif
3911
3912/* ------------------------------------------------------------------------ */
3913//
3914// Assembly routines that have no compiler intrinsic replacement
3915//
3916
3917extern int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int npr, int argc,
3918 void *argv[]
3919#if OMPT_SUPPORT1
3920 ,
3921 void **exit_frame_ptr
3922#endif
3923);
3924
3925/* ------------------------------------------------------------------------ */
3926
3927KMP_EXPORTextern void __kmpc_begin(ident_t *, kmp_int32 flags);
3928KMP_EXPORTextern void __kmpc_end(ident_t *);
3929
3930KMP_EXPORTextern void __kmpc_threadprivate_register_vec(ident_t *, void *data,
3931 kmpc_ctor_vec ctor,
3932 kmpc_cctor_vec cctor,
3933 kmpc_dtor_vec dtor,
3934 size_t vector_length);
3935KMP_EXPORTextern void __kmpc_threadprivate_register(ident_t *, void *data,
3936 kmpc_ctor ctor, kmpc_cctor cctor,
3937 kmpc_dtor dtor);
3938KMP_EXPORTextern void *__kmpc_threadprivate(ident_t *, kmp_int32 global_tid,
3939 void *data, size_t size);
3940
3941KMP_EXPORTextern kmp_int32 __kmpc_global_thread_num(ident_t *);
3942KMP_EXPORTextern kmp_int32 __kmpc_global_num_threads(ident_t *);
3943KMP_EXPORTextern kmp_int32 __kmpc_bound_thread_num(ident_t *);
3944KMP_EXPORTextern kmp_int32 __kmpc_bound_num_threads(ident_t *);
3945
3946KMP_EXPORTextern kmp_int32 __kmpc_ok_to_fork(ident_t *);
3947KMP_EXPORTextern void __kmpc_fork_call(ident_t *, kmp_int32 nargs,
3948 kmpc_micro microtask, ...);
3949KMP_EXPORTextern void __kmpc_fork_call_if(ident_t *loc, kmp_int32 nargs,
3950 kmpc_micro microtask, kmp_int32 cond,
3951 void *args);
3952
3953KMP_EXPORTextern void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid);
3954KMP_EXPORTextern void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid);
3955
3956KMP_EXPORTextern void __kmpc_flush(ident_t *);
3957KMP_EXPORTextern void __kmpc_barrier(ident_t *, kmp_int32 global_tid);
3958KMP_EXPORTextern kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
3959KMP_EXPORTextern void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
3960KMP_EXPORTextern kmp_int32 __kmpc_masked(ident_t *, kmp_int32 global_tid,
3961 kmp_int32 filter);
3962KMP_EXPORTextern void __kmpc_end_masked(ident_t *, kmp_int32 global_tid);
3963KMP_EXPORTextern void __kmpc_ordered(ident_t *, kmp_int32 global_tid);
3964KMP_EXPORTextern void __kmpc_end_ordered(ident_t *, kmp_int32 global_tid);
3965KMP_EXPORTextern void __kmpc_critical(ident_t *, kmp_int32 global_tid,
3966 kmp_critical_name *);
3967KMP_EXPORTextern void __kmpc_end_critical(ident_t *, kmp_int32 global_tid,
3968 kmp_critical_name *);
3969KMP_EXPORTextern void __kmpc_critical_with_hint(ident_t *, kmp_int32 global_tid,
3970 kmp_critical_name *, uint32_t hint);
3971
3972KMP_EXPORTextern kmp_int32 __kmpc_barrier_master(ident_t *, kmp_int32 global_tid);
3973KMP_EXPORTextern void __kmpc_end_barrier_master(ident_t *, kmp_int32 global_tid);
3974
3975KMP_EXPORTextern kmp_int32 __kmpc_barrier_master_nowait(ident_t *,
3976 kmp_int32 global_tid);
3977
3978KMP_EXPORTextern kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
3979KMP_EXPORTextern void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
3980
3981KMP_EXPORTextern kmp_int32 __kmpc_sections_init(ident_t *loc, kmp_int32 global_tid);
3982KMP_EXPORTextern kmp_int32 __kmpc_next_section(ident_t *loc, kmp_int32 global_tid,
3983 kmp_int32 numberOfSections);
3984KMP_EXPORTextern void __kmpc_end_sections(ident_t *loc, kmp_int32 global_tid);
3985
3986KMP_EXPORTextern void KMPC_FOR_STATIC_INIT(ident_t *loc, kmp_int32 global_tid,
3987 kmp_int32 schedtype, kmp_int32 *plastiter,
3988 kmp_int *plower, kmp_int *pupper,
3989 kmp_int *pstride, kmp_int incr,
3990 kmp_int chunk);
3991
3992KMP_EXPORTextern void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
3993
3994KMP_EXPORTextern void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
3995 size_t cpy_size, void *cpy_data,
3996 void (*cpy_func)(void *, void *),
3997 kmp_int32 didit);
3998
3999KMP_EXPORTextern void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid,
4000 void *cpy_data);
4001
4002extern void KMPC_SET_NUM_THREADS(int arg);
4003extern void KMPC_SET_DYNAMIC(int flag);
4004extern void KMPC_SET_NESTED(int flag);
4005
4006/* OMP 3.0 tasking interface routines */
4007KMP_EXPORTextern kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid,
4008 kmp_task_t *new_task);
4009KMP_EXPORTextern kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
4010 kmp_int32 flags,
4011 size_t sizeof_kmp_task_t,
4012 size_t sizeof_shareds,
4013 kmp_routine_entry_t task_entry);
4014KMP_EXPORTextern kmp_task_t *__kmpc_omp_target_task_alloc(
4015 ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
4016 size_t sizeof_shareds, kmp_routine_entry_t task_entry, kmp_int64 device_id);
4017KMP_EXPORTextern void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid,
4018 kmp_task_t *task);
4019KMP_EXPORTextern void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid,
4020 kmp_task_t *task);
4021KMP_EXPORTextern kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid,
4022 kmp_task_t *new_task);
4023KMP_EXPORTextern kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid);
4024KMP_EXPORTextern kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid,
4025 int end_part);
4026
4027#if TASK_UNUSED
4028void __kmpc_omp_task_begin(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task);
4029void __kmpc_omp_task_complete(ident_t *loc_ref, kmp_int32 gtid,
4030 kmp_task_t *task);
4031#endif // TASK_UNUSED
4032
4033/* ------------------------------------------------------------------------ */
4034
4035KMP_EXPORTextern void __kmpc_taskgroup(ident_t *loc, int gtid);
4036KMP_EXPORTextern void __kmpc_end_taskgroup(ident_t *loc, int gtid);
4037
4038KMP_EXPORTextern kmp_int32 __kmpc_omp_task_with_deps(
4039 ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps,
4040 kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
4041 kmp_depend_info_t *noalias_dep_list);
4042KMP_EXPORTextern void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid,
4043 kmp_int32 ndeps,
4044 kmp_depend_info_t *dep_list,
4045 kmp_int32 ndeps_noalias,
4046 kmp_depend_info_t *noalias_dep_list);
4047/* __kmpc_omp_taskwait_deps_51 : Function for OpenMP 5.1 nowait clause.
4048 * Placeholder for taskwait with nowait clause.*/
4049KMP_EXPORTextern void __kmpc_omp_taskwait_deps_51(ident_t *loc_ref, kmp_int32 gtid,
4050 kmp_int32 ndeps,
4051 kmp_depend_info_t *dep_list,
4052 kmp_int32 ndeps_noalias,
4053 kmp_depend_info_t *noalias_dep_list,
4054 kmp_int32 has_no_wait);
4055
4056extern kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
4057 bool serialize_immediate);
4058
4059KMP_EXPORTextern kmp_int32 __kmpc_cancel(ident_t *loc_ref, kmp_int32 gtid,
4060 kmp_int32 cncl_kind);
4061KMP_EXPORTextern kmp_int32 __kmpc_cancellationpoint(ident_t *loc_ref, kmp_int32 gtid,
4062 kmp_int32 cncl_kind);
4063KMP_EXPORTextern kmp_int32 __kmpc_cancel_barrier(ident_t *loc_ref, kmp_int32 gtid);
4064KMP_EXPORTextern int __kmp_get_cancellation_status(int cancel_kind);
4065
4066KMP_EXPORTextern void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask);
4067KMP_EXPORTextern void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask);
4068KMP_EXPORTextern void __kmpc_taskloop(ident_t *loc, kmp_int32 gtid, kmp_task_t *task,
4069 kmp_int32 if_val, kmp_uint64 *lb,
4070 kmp_uint64 *ub, kmp_int64 st, kmp_int32 nogroup,
4071 kmp_int32 sched, kmp_uint64 grainsize,
4072 void *task_dup);
4073KMP_EXPORTextern void __kmpc_taskloop_5(ident_t *loc, kmp_int32 gtid,
4074 kmp_task_t *task, kmp_int32 if_val,
4075 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4076 kmp_int32 nogroup, kmp_int32 sched,
4077 kmp_uint64 grainsize, kmp_int32 modifier,
4078 void *task_dup);
4079KMP_EXPORTextern void *__kmpc_task_reduction_init(int gtid, int num_data, void *data);
4080KMP_EXPORTextern void *__kmpc_taskred_init(int gtid, int num_data, void *data);
4081KMP_EXPORTextern void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *d);
4082KMP_EXPORTextern void *__kmpc_task_reduction_modifier_init(ident_t *loc, int gtid,
4083 int is_ws, int num,
4084 void *data);
4085KMP_EXPORTextern void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int is_ws,
4086 int num, void *data);
4087KMP_EXPORTextern void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid,
4088 int is_ws);
4089KMP_EXPORTextern kmp_int32 __kmpc_omp_reg_task_with_affinity(
4090 ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 naffins,
4091 kmp_task_affinity_info_t *affin_list);
4092KMP_EXPORTextern void __kmp_set_num_teams(int num_teams);
4093KMP_EXPORTextern int __kmp_get_max_teams(void);
4094KMP_EXPORTextern void __kmp_set_teams_thread_limit(int limit);
4095KMP_EXPORTextern int __kmp_get_teams_thread_limit(void);
4096
4097/* Interface target task integration */
4098KMP_EXPORTextern void **__kmpc_omp_get_target_async_handle_ptr(kmp_int32 gtid);
4099KMP_EXPORTextern bool __kmpc_omp_has_task_team(kmp_int32 gtid);
4100
4101/* Lock interface routines (fast versions with gtid passed in) */
4102KMP_EXPORTextern void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid,
4103 void **user_lock);
4104KMP_EXPORTextern void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid,
4105 void **user_lock);
4106KMP_EXPORTextern void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid,
4107 void **user_lock);
4108KMP_EXPORTextern void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid,
4109 void **user_lock);
4110KMP_EXPORTextern void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock);
4111KMP_EXPORTextern void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid,
4112 void **user_lock);
4113KMP_EXPORTextern void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid,
4114 void **user_lock);
4115KMP_EXPORTextern void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid,
4116 void **user_lock);
4117KMP_EXPORTextern int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock);
4118KMP_EXPORTextern int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid,
4119 void **user_lock);
4120
4121KMP_EXPORTextern void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid,
4122 void **user_lock, uintptr_t hint);
4123KMP_EXPORTextern void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
4124 void **user_lock,
4125 uintptr_t hint);
4126
4127/* Interface to fast scalable reduce methods routines */
4128
4129KMP_EXPORTextern kmp_int32 __kmpc_reduce_nowait(
4130 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
4131 void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
4132 kmp_critical_name *lck);
4133KMP_EXPORTextern void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
4134 kmp_critical_name *lck);
4135KMP_EXPORTextern kmp_int32 __kmpc_reduce(
4136 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
4137 void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
4138 kmp_critical_name *lck);
4139KMP_EXPORTextern void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
4140 kmp_critical_name *lck);
4141
4142/* Internal fast reduction routines */
4143
4144extern PACKED_REDUCTION_METHOD_T __kmp_determine_reduction_method(
4145 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
4146 void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
4147 kmp_critical_name *lck);
4148
4149// this function is for testing set/get/determine reduce method
4150KMP_EXPORTextern kmp_int32 __kmp_get_reduce_method(void);
4151
4152KMP_EXPORTextern kmp_uint64 __kmpc_get_taskid();
4153KMP_EXPORTextern kmp_uint64 __kmpc_get_parent_taskid();
4154
4155// C++ port
4156// missing 'extern "C"' declarations
4157
4158KMP_EXPORTextern kmp_int32 __kmpc_in_parallel(ident_t *loc);
4159KMP_EXPORTextern void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid);
4160KMP_EXPORTextern void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
4161 kmp_int32 num_threads);
4162
4163KMP_EXPORTextern void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
4164 int proc_bind);
4165KMP_EXPORTextern void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
4166 kmp_int32 num_teams,
4167 kmp_int32 num_threads);
4168/* Function for OpenMP 5.1 num_teams clause */
4169KMP_EXPORTextern void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
4170 kmp_int32 num_teams_lb,
4171 kmp_int32 num_teams_ub,
4172 kmp_int32 num_threads);
4173KMP_EXPORTextern void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc,
4174 kmpc_micro microtask, ...);
4175struct kmp_dim { // loop bounds info casted to kmp_int64
4176 kmp_int64 lo; // lower
4177 kmp_int64 up; // upper
4178 kmp_int64 st; // stride
4179};
4180KMP_EXPORTextern void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
4181 kmp_int32 num_dims,
4182 const struct kmp_dim *dims);
4183KMP_EXPORTextern void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid,
4184 const kmp_int64 *vec);
4185KMP_EXPORTextern void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid,
4186 const kmp_int64 *vec);
4187KMP_EXPORTextern void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
4188
4189KMP_EXPORTextern void *__kmpc_threadprivate_cached(ident_t *loc, kmp_int32 global_tid,
4190 void *data, size_t size,
4191 void ***cache);
4192
4193// The routines below are not exported.
4194// Consider making them 'static' in corresponding source files.
4195void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
4196 void *data_addr, size_t pc_size);
4197struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
4198 void *data_addr,
4199 size_t pc_size);
4200void __kmp_threadprivate_resize_cache(int newCapacity);
4201void __kmp_cleanup_threadprivate_caches();
4202
4203// ompc_, kmpc_ entries moved from omp.h.
4204#if KMP_OS_WINDOWS0
4205#define KMPC_CONVENTION __cdecl
4206#else
4207#define KMPC_CONVENTION
4208#endif
4209
4210#ifndef __OMP_H
4211typedef enum omp_sched_t {
4212 omp_sched_static = 1,
4213 omp_sched_dynamic = 2,
4214 omp_sched_guided = 3,
4215 omp_sched_auto = 4
4216} omp_sched_t;
4217typedef void *kmp_affinity_mask_t;
4218#endif
4219
4220KMP_EXPORTextern void KMPC_CONVENTION ompc_set_max_active_levels(int);
4221KMP_EXPORTextern void KMPC_CONVENTION ompc_set_schedule(omp_sched_t, int);
4222KMP_EXPORTextern int KMPC_CONVENTION ompc_get_ancestor_thread_num(int);
4223KMP_EXPORTextern int KMPC_CONVENTION ompc_get_team_size(int);
4224KMP_EXPORTextern int KMPC_CONVENTION
4225kmpc_set_affinity_mask_proc(int, kmp_affinity_mask_t *);
4226KMP_EXPORTextern int KMPC_CONVENTION
4227kmpc_unset_affinity_mask_proc(int, kmp_affinity_mask_t *);
4228KMP_EXPORTextern int KMPC_CONVENTION
4229kmpc_get_affinity_mask_proc(int, kmp_affinity_mask_t *);
4230
4231KMP_EXPORTextern void KMPC_CONVENTION kmpc_set_stacksize(int);
4232KMP_EXPORTextern void KMPC_CONVENTION kmpc_set_stacksize_s(size_t);
4233KMP_EXPORTextern void KMPC_CONVENTION kmpc_set_library(int);
4234KMP_EXPORTextern void KMPC_CONVENTION kmpc_set_defaults(char const *);
4235KMP_EXPORTextern void KMPC_CONVENTION kmpc_set_disp_num_buffers(int);
4236void KMP_EXPAND_NAME(ompc_set_affinity_format)__kmp_api_ompc_set_affinity_format(char const *format);
4237size_t KMP_EXPAND_NAME(ompc_get_affinity_format)__kmp_api_ompc_get_affinity_format(char *buffer, size_t size);
4238void KMP_EXPAND_NAME(ompc_display_affinity)__kmp_api_ompc_display_affinity(char const *format);
4239size_t KMP_EXPAND_NAME(ompc_capture_affinity)__kmp_api_ompc_capture_affinity(char *buffer, size_t buf_size,
4240 char const *format);
4241
4242enum kmp_target_offload_kind {
4243 tgt_disabled = 0,
4244 tgt_default = 1,
4245 tgt_mandatory = 2
4246};
4247typedef enum kmp_target_offload_kind kmp_target_offload_kind_t;
4248// Set via OMP_TARGET_OFFLOAD if specified, defaults to tgt_default otherwise
4249extern kmp_target_offload_kind_t __kmp_target_offload;
4250extern int __kmpc_get_target_offload();
4251
4252// Constants used in libomptarget
4253#define KMP_DEVICE_DEFAULT-1 -1 // This is libomptarget's default device.
4254#define KMP_DEVICE_ALL-11 -11 // This is libomptarget's "all devices".
4255
4256// OMP Pause Resource
4257
4258// The following enum is used both to set the status in __kmp_pause_status, and
4259// as the internal equivalent of the externally-visible omp_pause_resource_t.
4260typedef enum kmp_pause_status_t {
4261 kmp_not_paused = 0, // status is not paused, or, requesting resume
4262 kmp_soft_paused = 1, // status is soft-paused, or, requesting soft pause
4263 kmp_hard_paused = 2 // status is hard-paused, or, requesting hard pause
4264} kmp_pause_status_t;
4265
4266// This stores the pause state of the runtime
4267extern kmp_pause_status_t __kmp_pause_status;
4268extern int __kmpc_pause_resource(kmp_pause_status_t level);
4269extern int __kmp_pause_resource(kmp_pause_status_t level);
4270// Soft resume sets __kmp_pause_status, and wakes up all threads.
4271extern void __kmp_resume_if_soft_paused();
4272// Hard resume simply resets the status to not paused. Library will appear to
4273// be uninitialized after hard pause. Let OMP constructs trigger required
4274// initializations.
4275static inline void __kmp_resume_if_hard_paused() {
4276 if (__kmp_pause_status == kmp_hard_paused) {
4277 __kmp_pause_status = kmp_not_paused;
4278 }
4279}
4280
4281extern void __kmp_omp_display_env(int verbose);
4282
4283// 1: it is initializing hidden helper team
4284extern volatile int __kmp_init_hidden_helper;
4285// 1: the hidden helper team is done
4286extern volatile int __kmp_hidden_helper_team_done;
4287// 1: enable hidden helper task
4288extern kmp_int32 __kmp_enable_hidden_helper;
4289// Main thread of hidden helper team
4290extern kmp_info_t *__kmp_hidden_helper_main_thread;
4291// Descriptors for the hidden helper threads
4292extern kmp_info_t **__kmp_hidden_helper_threads;
4293// Number of hidden helper threads
4294extern kmp_int32 __kmp_hidden_helper_threads_num;
4295// Number of hidden helper tasks that have not been executed yet
4296extern std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
4297
4298extern void __kmp_hidden_helper_initialize();
4299extern void __kmp_hidden_helper_threads_initz_routine();
4300extern void __kmp_do_initialize_hidden_helper_threads();
4301extern void __kmp_hidden_helper_threads_initz_wait();
4302extern void __kmp_hidden_helper_initz_release();
4303extern void __kmp_hidden_helper_threads_deinitz_wait();
4304extern void __kmp_hidden_helper_threads_deinitz_release();
4305extern void __kmp_hidden_helper_main_thread_wait();
4306extern void __kmp_hidden_helper_worker_thread_wait();
4307extern void __kmp_hidden_helper_worker_thread_signal();
4308extern void __kmp_hidden_helper_main_thread_release();
4309
4310// Check whether a given thread is a hidden helper thread
4311#define KMP_HIDDEN_HELPER_THREAD(gtid)((gtid) >= 1 && (gtid) <= __kmp_hidden_helper_threads_num
)
\
4312 ((gtid) >= 1 && (gtid) <= __kmp_hidden_helper_threads_num)
4313
4314#define KMP_HIDDEN_HELPER_WORKER_THREAD(gtid)((gtid) > 1 && (gtid) <= __kmp_hidden_helper_threads_num
)
\
4315 ((gtid) > 1 && (gtid) <= __kmp_hidden_helper_threads_num)
4316
4317#define KMP_HIDDEN_HELPER_MAIN_THREAD(gtid)((gtid) == 1 && (gtid) <= __kmp_hidden_helper_threads_num
)
\
4318 ((gtid) == 1 && (gtid) <= __kmp_hidden_helper_threads_num)
4319
4320#define KMP_HIDDEN_HELPER_TEAM(team)(team->t.t_threads[0] == __kmp_hidden_helper_main_thread) \
4321 (team->t.t_threads[0] == __kmp_hidden_helper_main_thread)
4322
4323// Map a gtid to a hidden helper thread. The first hidden helper thread, a.k.a
4324// main thread, is skipped.
4325#define KMP_GTID_TO_SHADOW_GTID(gtid)((gtid) % (__kmp_hidden_helper_threads_num - 1) + 2) \
4326 ((gtid) % (__kmp_hidden_helper_threads_num - 1) + 2)
4327
4328// Return the adjusted gtid value by subtracting from gtid the number
4329// of hidden helper threads. This adjusted value is the gtid the thread would
4330// have received if there were no hidden helper threads.
4331static inline int __kmp_adjust_gtid_for_hidden_helpers(int gtid) {
4332 int adjusted_gtid = gtid;
4333 if (__kmp_hidden_helper_threads_num > 0 && gtid > 0 &&
4334 gtid - __kmp_hidden_helper_threads_num >= 0) {
4335 adjusted_gtid -= __kmp_hidden_helper_threads_num;
4336 }
4337 return adjusted_gtid;
4338}
4339
4340// Support for error directive
4341typedef enum kmp_severity_t {
4342 severity_warning = 1,
4343 severity_fatal = 2
4344} kmp_severity_t;
4345extern void __kmpc_error(ident_t *loc, int severity, const char *message);
4346
4347// Support for scope directive
4348KMP_EXPORTextern void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved);
4349KMP_EXPORTextern void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved);
4350
4351#ifdef __cplusplus201703L
4352}
4353#endif
4354
4355template <bool C, bool S>
4356extern void __kmp_suspend_32(int th_gtid, kmp_flag_32<C, S> *flag);
4357template <bool C, bool S>
4358extern void __kmp_suspend_64(int th_gtid, kmp_flag_64<C, S> *flag);
4359template <bool C, bool S>
4360extern void __kmp_atomic_suspend_64(int th_gtid,
4361 kmp_atomic_flag_64<C, S> *flag);
4362extern void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag);
4363#if KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0) || KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0)
4364template <bool C, bool S>
4365extern void __kmp_mwait_32(int th_gtid, kmp_flag_32<C, S> *flag);
4366template <bool C, bool S>
4367extern void __kmp_mwait_64(int th_gtid, kmp_flag_64<C, S> *flag);
4368template <bool C, bool S>
4369extern void __kmp_atomic_mwait_64(int th_gtid, kmp_atomic_flag_64<C, S> *flag);
4370extern void __kmp_mwait_oncore(int th_gtid, kmp_flag_oncore *flag);
4371#endif
4372template <bool C, bool S>
4373extern void __kmp_resume_32(int target_gtid, kmp_flag_32<C, S> *flag);
4374template <bool C, bool S>
4375extern void __kmp_resume_64(int target_gtid, kmp_flag_64<C, S> *flag);
4376template <bool C, bool S>
4377extern void __kmp_atomic_resume_64(int target_gtid,
4378 kmp_atomic_flag_64<C, S> *flag);
4379extern void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag);
4380
4381template <bool C, bool S>
4382int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid,
4383 kmp_flag_32<C, S> *flag, int final_spin,
4384 int *thread_finished,
4385#if USE_ITT_BUILD1
4386 void *itt_sync_obj,
4387#endif /* USE_ITT_BUILD */
4388 kmp_int32 is_constrained);
4389template <bool C, bool S>
4390int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid,
4391 kmp_flag_64<C, S> *flag, int final_spin,
4392 int *thread_finished,
4393#if USE_ITT_BUILD1
4394 void *itt_sync_obj,
4395#endif /* USE_ITT_BUILD */
4396 kmp_int32 is_constrained);
4397template <bool C, bool S>
4398int __kmp_atomic_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid,
4399 kmp_atomic_flag_64<C, S> *flag,
4400 int final_spin, int *thread_finished,
4401#if USE_ITT_BUILD1
4402 void *itt_sync_obj,
4403#endif /* USE_ITT_BUILD */
4404 kmp_int32 is_constrained);
4405int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid,
4406 kmp_flag_oncore *flag, int final_spin,
4407 int *thread_finished,
4408#if USE_ITT_BUILD1
4409 void *itt_sync_obj,
4410#endif /* USE_ITT_BUILD */
4411 kmp_int32 is_constrained);
4412
4413extern int __kmp_nesting_mode;
4414extern int __kmp_nesting_mode_nlevels;
4415extern int *__kmp_nesting_nth_level;
4416extern void __kmp_init_nesting_mode();
4417extern void __kmp_set_nesting_mode_threads();
4418
4419/// This class safely opens and closes a C-style FILE* object using RAII
4420/// semantics. There are also methods which allow using stdout or stderr as
4421/// the underlying FILE* object. With the implicit conversion operator to
4422/// FILE*, an object with this type can be used in any function which takes
4423/// a FILE* object e.g., fprintf().
4424/// No close method is needed at use sites.
4425class kmp_safe_raii_file_t {
4426 FILE *f;
4427
4428 void close() {
4429 if (f && f != stdoutstdout && f != stderrstderr) {
4430 fclose(f);
4431 f = nullptr;
4432 }
4433 }
4434
4435public:
4436 kmp_safe_raii_file_t() : f(nullptr) {}
4437 kmp_safe_raii_file_t(const char *filename, const char *mode,
4438 const char *env_var = nullptr)
4439 : f(nullptr) {
4440 open(filename, mode, env_var);
4441 }
4442 ~kmp_safe_raii_file_t() { close(); }
4443
4444 /// Open filename using mode. This is automatically closed in the destructor.
4445 /// The env_var parameter indicates the environment variable the filename
4446 /// came from if != nullptr.
4447 void open(const char *filename, const char *mode,
4448 const char *env_var = nullptr) {
4449 KMP_ASSERT(!f)if (!(!f)) { __kmp_debug_assert("!f", "openmp/runtime/src/kmp.h"
, 4449); }
;
4450 f = fopen(filename, mode);
4451 if (!f) {
4452 int code = errno(*__errno_location ());
4453 if (env_var) {
4454 __kmp_fatal(KMP_MSG(CantOpenFileForReading, filename)__kmp_msg_format(kmp_i18n_msg_CantOpenFileForReading, filename
)
, KMP_ERR(code)__kmp_msg_error_code(code),
4455 KMP_HNT(CheckEnvVar, env_var, filename)__kmp_msg_format(kmp_i18n_hnt_CheckEnvVar, env_var, filename), __kmp_msg_null);
4456 } else {
4457 __kmp_fatal(KMP_MSG(CantOpenFileForReading, filename)__kmp_msg_format(kmp_i18n_msg_CantOpenFileForReading, filename
)
, KMP_ERR(code)__kmp_msg_error_code(code),
4458 __kmp_msg_null);
4459 }
4460 }
4461 }
4462 /// Instead of erroring out, return non-zero when
4463 /// unsuccessful fopen() for any reason
4464 int try_open(const char *filename, const char *mode) {
4465 KMP_ASSERT(!f)if (!(!f)) { __kmp_debug_assert("!f", "openmp/runtime/src/kmp.h"
, 4465); }
;
4466 f = fopen(filename, mode);
4467 if (!f)
4468 return errno(*__errno_location ());
4469 return 0;
4470 }
4471 /// Set the FILE* object to stdout and output there
4472 /// No open call should happen before this call.
4473 void set_stdout() {
4474 KMP_ASSERT(!f)if (!(!f)) { __kmp_debug_assert("!f", "openmp/runtime/src/kmp.h"
, 4474); }
;
4475 f = stdoutstdout;
4476 }
4477 /// Set the FILE* object to stderr and output there
4478 /// No open call should happen before this call.
4479 void set_stderr() {
4480 KMP_ASSERT(!f)if (!(!f)) { __kmp_debug_assert("!f", "openmp/runtime/src/kmp.h"
, 4480); }
;
4481 f = stderrstderr;
4482 }
4483 operator bool() { return bool(f); }
4484 operator FILE *() { return f; }
4485};
4486
4487template <typename SourceType, typename TargetType,
4488 bool isSourceSmaller = (sizeof(SourceType) < sizeof(TargetType)),
4489 bool isSourceEqual = (sizeof(SourceType) == sizeof(TargetType)),
4490 bool isSourceSigned = std::is_signed<SourceType>::value,
4491 bool isTargetSigned = std::is_signed<TargetType>::value>
4492struct kmp_convert {};
4493
4494// Both types are signed; Source smaller
4495template <typename SourceType, typename TargetType>
4496struct kmp_convert<SourceType, TargetType, true, false, true, true> {
4497 static TargetType to(SourceType src) { return (TargetType)src; }
4498};
4499// Source equal
4500template <typename SourceType, typename TargetType>
4501struct kmp_convert<SourceType, TargetType, false, true, true, true> {
4502 static TargetType to(SourceType src) { return src; }
4503};
4504// Source bigger
4505template <typename SourceType, typename TargetType>
4506struct kmp_convert<SourceType, TargetType, false, false, true, true> {
4507 static TargetType to(SourceType src) {
4508 KMP_ASSERT(src <= static_cast<SourceType>(if (!(src <= static_cast<SourceType>( (std::numeric_limits
<TargetType>::max)()))) { __kmp_debug_assert("src <= static_cast<SourceType>( (std::numeric_limits<TargetType>::max)())"
, "openmp/runtime/src/kmp.h", 4509); }
4509 (std::numeric_limits<TargetType>::max)()))if (!(src <= static_cast<SourceType>( (std::numeric_limits
<TargetType>::max)()))) { __kmp_debug_assert("src <= static_cast<SourceType>( (std::numeric_limits<TargetType>::max)())"
, "openmp/runtime/src/kmp.h", 4509); }
;
4510 KMP_ASSERT(src >= static_cast<SourceType>(if (!(src >= static_cast<SourceType>( (std::numeric_limits
<TargetType>::min)()))) { __kmp_debug_assert("src >= static_cast<SourceType>( (std::numeric_limits<TargetType>::min)())"
, "openmp/runtime/src/kmp.h", 4511); }
4511 (std::numeric_limits<TargetType>::min)()))if (!(src >= static_cast<SourceType>( (std::numeric_limits
<TargetType>::min)()))) { __kmp_debug_assert("src >= static_cast<SourceType>( (std::numeric_limits<TargetType>::min)())"
, "openmp/runtime/src/kmp.h", 4511); }
;
4512 return (TargetType)src;
4513 }
4514};
4515
4516// Source signed, Target unsigned
4517// Source smaller
4518template <typename SourceType, typename TargetType>
4519struct kmp_convert<SourceType, TargetType, true, false, true, false> {
4520 static TargetType to(SourceType src) {
4521 KMP_ASSERT(src >= 0)if (!(src >= 0)) { __kmp_debug_assert("src >= 0", "openmp/runtime/src/kmp.h"
, 4521); }
;
4522 return (TargetType)src;
4523 }
4524};
4525// Source equal
4526template <typename SourceType, typename TargetType>
4527struct kmp_convert<SourceType, TargetType, false, true, true, false> {
4528 static TargetType to(SourceType src) {
4529 KMP_ASSERT(src >= 0)if (!(src >= 0)) { __kmp_debug_assert("src >= 0", "openmp/runtime/src/kmp.h"
, 4529); }
;
4530 return (TargetType)src;
4531 }
4532};
4533// Source bigger
4534template <typename SourceType, typename TargetType>
4535struct kmp_convert<SourceType, TargetType, false, false, true, false> {
4536 static TargetType to(SourceType src) {
4537 KMP_ASSERT(src >= 0)if (!(src >= 0)) { __kmp_debug_assert("src >= 0", "openmp/runtime/src/kmp.h"
, 4537); }
;
4538 KMP_ASSERT(src <= static_cast<SourceType>(if (!(src <= static_cast<SourceType>( (std::numeric_limits
<TargetType>::max)()))) { __kmp_debug_assert("src <= static_cast<SourceType>( (std::numeric_limits<TargetType>::max)())"
, "openmp/runtime/src/kmp.h", 4539); }
4539 (std::numeric_limits<TargetType>::max)()))if (!(src <= static_cast<SourceType>( (std::numeric_limits
<TargetType>::max)()))) { __kmp_debug_assert("src <= static_cast<SourceType>( (std::numeric_limits<TargetType>::max)())"
, "openmp/runtime/src/kmp.h", 4539); }
;
4540 return (TargetType)src;
4541 }
4542};
4543
4544// Source unsigned, Target signed
4545// Source smaller
4546template <typename SourceType, typename TargetType>
4547struct kmp_convert<SourceType, TargetType, true, false, false, true> {
4548 static TargetType to(SourceType src) { return (TargetType)src; }
4549};
4550// Source equal
4551template <typename SourceType, typename TargetType>
4552struct kmp_convert<SourceType, TargetType, false, true, false, true> {
4553 static TargetType to(SourceType src) {
4554 KMP_ASSERT(src <= static_cast<SourceType>(if (!(src <= static_cast<SourceType>( (std::numeric_limits
<TargetType>::max)()))) { __kmp_debug_assert("src <= static_cast<SourceType>( (std::numeric_limits<TargetType>::max)())"
, "openmp/runtime/src/kmp.h", 4555); }
4555 (std::numeric_limits<TargetType>::max)()))if (!(src <= static_cast<SourceType>( (std::numeric_limits
<TargetType>::max)()))) { __kmp_debug_assert("src <= static_cast<SourceType>( (std::numeric_limits<TargetType>::max)())"
, "openmp/runtime/src/kmp.h", 4555); }
;
4556 return (TargetType)src;
4557 }
4558};
4559// Source bigger
4560template <typename SourceType, typename TargetType>
4561struct kmp_convert<SourceType, TargetType, false, false, false, true> {
4562 static TargetType to(SourceType src) {
4563 KMP_ASSERT(src <= static_cast<SourceType>(if (!(src <= static_cast<SourceType>( (std::numeric_limits
<TargetType>::max)()))) { __kmp_debug_assert("src <= static_cast<SourceType>( (std::numeric_limits<TargetType>::max)())"
, "openmp/runtime/src/kmp.h", 4564); }
4564 (std::numeric_limits<TargetType>::max)()))if (!(src <= static_cast<SourceType>( (std::numeric_limits
<TargetType>::max)()))) { __kmp_debug_assert("src <= static_cast<SourceType>( (std::numeric_limits<TargetType>::max)())"
, "openmp/runtime/src/kmp.h", 4564); }
;
4565 return (TargetType)src;
4566 }
4567};
4568
4569// Source unsigned, Target unsigned
4570// Source smaller
4571template <typename SourceType, typename TargetType>
4572struct kmp_convert<SourceType, TargetType, true, false, false, false> {
4573 static TargetType to(SourceType src) { return (TargetType)src; }
4574};
4575// Source equal
4576template <typename SourceType, typename TargetType>
4577struct kmp_convert<SourceType, TargetType, false, true, false, false> {
4578 static TargetType to(SourceType src) { return src; }
4579};
4580// Source bigger
4581template <typename SourceType, typename TargetType>
4582struct kmp_convert<SourceType, TargetType, false, false, false, false> {
4583 static TargetType to(SourceType src) {
4584 KMP_ASSERT(src <= static_cast<SourceType>(if (!(src <= static_cast<SourceType>( (std::numeric_limits
<TargetType>::max)()))) { __kmp_debug_assert("src <= static_cast<SourceType>( (std::numeric_limits<TargetType>::max)())"
, "openmp/runtime/src/kmp.h", 4585); }
4585 (std::numeric_limits<TargetType>::max)()))if (!(src <= static_cast<SourceType>( (std::numeric_limits
<TargetType>::max)()))) { __kmp_debug_assert("src <= static_cast<SourceType>( (std::numeric_limits<TargetType>::max)())"
, "openmp/runtime/src/kmp.h", 4585); }
;
4586 return (TargetType)src;
4587 }
4588};
4589
4590template <typename T1, typename T2>
4591static inline void __kmp_type_convert(T1 src, T2 *dest) {
4592 *dest = kmp_convert<T1, T2>::to(src);
4593}
4594
4595#endif /* KMP_H */