Bug Summary

File:build/source/openmp/runtime/src/kmp_runtime.cpp
Warning:line 1451, column 9
1st function call argument is an uninitialized value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name kmp_runtime.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-16/lib/clang/16 -I projects/openmp/runtime/src -I /build/source/openmp/runtime/src -I include -I /build/source/llvm/include -I /build/source/openmp/runtime/src/i18n -I /build/source/openmp/runtime/src/include -I /build/source/openmp/runtime/src/thirdparty/ittnotify -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D omp_EXPORTS -D _FORTIFY_SOURCE=2 -D NDEBUG -D _GNU_SOURCE -D _REENTRANT -D _FORTIFY_SOURCE=2 -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-16/lib/clang/16/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/source/= -source-date-epoch 1670584389 -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -Wno-enum-constexpr-conversion -Wno-extra -Wno-pedantic -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-covered-switch-default -Wno-frame-address -Wno-strict-aliasing -Wno-stringop-truncation -Wno-switch -Wno-uninitialized -Wno-return-type-c-linkage -Wno-cast-qual -Wno-int-to-void-pointer-cast -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fno-rtti -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-12-09-134624-15957-1 -x c++ /build/source/openmp/runtime/src/kmp_runtime.cpp

/build/source/openmp/runtime/src/kmp_runtime.cpp

1/*
2 * kmp_runtime.cpp -- KPTS runtime support library
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#include "kmp.h"
14#include "kmp_affinity.h"
15#include "kmp_atomic.h"
16#include "kmp_environment.h"
17#include "kmp_error.h"
18#include "kmp_i18n.h"
19#include "kmp_io.h"
20#include "kmp_itt.h"
21#include "kmp_settings.h"
22#include "kmp_stats.h"
23#include "kmp_str.h"
24#include "kmp_wait_release.h"
25#include "kmp_wrapper_getpid.h"
26#include "kmp_dispatch.h"
27#if KMP_USE_HIER_SCHED0
28#include "kmp_dispatch_hier.h"
29#endif
30
31#if OMPT_SUPPORT1
32#include "ompt-specific.h"
33#endif
34#if OMPD_SUPPORT1
35#include "ompd-specific.h"
36#endif
37
38#if OMP_PROFILING_SUPPORT0
39#include "llvm/Support/TimeProfiler.h"
40static char *ProfileTraceFile = nullptr;
41#endif
42
43/* these are temporary issues to be dealt with */
44#define KMP_USE_PRCTL0 0
45
46#if KMP_OS_WINDOWS0
47#include <process.h>
48#endif
49
50#if KMP_OS_WINDOWS0
51// windows does not need include files as it doesn't use shared memory
52#else
53#include <sys/mman.h>
54#include <sys/stat.h>
55#include <fcntl.h>
56#define SHM_SIZE1024 1024
57#endif
58
59#if defined(KMP_GOMP_COMPAT)
60char const __kmp_version_alt_comp[] =
61 KMP_VERSION_PREFIX"\x00@(#) " "LLVM OMP " "alternative compiler support: yes";
62#endif /* defined(KMP_GOMP_COMPAT) */
63
64char const __kmp_version_omp_api[] =
65 KMP_VERSION_PREFIX"\x00@(#) " "LLVM OMP " "API version: 5.0 (201611)";
66
67#ifdef KMP_DEBUG1
68char const __kmp_version_lock[] =
69 KMP_VERSION_PREFIX"\x00@(#) " "LLVM OMP " "lock type: run time selectable";
70#endif /* KMP_DEBUG */
71
72#define KMP_MIN(x, y)((x) < (y) ? (x) : (y)) ((x) < (y) ? (x) : (y))
73
74/* ------------------------------------------------------------------------ */
75
76#if KMP_USE_MONITOR
77kmp_info_t __kmp_monitor;
78#endif
79
80/* Forward declarations */
81
82void __kmp_cleanup(void);
83
84static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *, int tid,
85 int gtid);
86static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
87 kmp_internal_control_t *new_icvs,
88 ident_t *loc);
89#if KMP_AFFINITY_SUPPORTED1
90static void __kmp_partition_places(kmp_team_t *team,
91 int update_master_only = 0);
92#endif
93static void __kmp_do_serial_initialize(void);
94void __kmp_fork_barrier(int gtid, int tid);
95void __kmp_join_barrier(int gtid);
96void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc,
97 kmp_internal_control_t *new_icvs, ident_t *loc);
98
99#ifdef USE_LOAD_BALANCE1
100static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc);
101#endif
102
103static int __kmp_expand_threads(int nNeed);
104#if KMP_OS_WINDOWS0
105static int __kmp_unregister_root_other_thread(int gtid);
106#endif
107static void __kmp_reap_thread(kmp_info_t *thread, int is_root);
108kmp_info_t *__kmp_thread_pool_insert_pt = NULL__null;
109
110void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads,
111 int new_nthreads);
112void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads);
113
114/* Calculate the identifier of the current thread */
115/* fast (and somewhat portable) way to get unique identifier of executing
116 thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */
117int __kmp_get_global_thread_id() {
118 int i;
119 kmp_info_t **other_threads;
120 size_t stack_data;
121 char *stack_addr;
122 size_t stack_size;
123 char *stack_base;
124
125 KA_TRACE(if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n"
, __kmp_nth, __kmp_all_nth); }
126 1000,if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n"
, __kmp_nth, __kmp_all_nth); }
127 ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n"
, __kmp_nth, __kmp_all_nth); }
128 __kmp_nth, __kmp_all_nth))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n"
, __kmp_nth, __kmp_all_nth); }
;
129
130 /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to
131 a parallel region, made it return KMP_GTID_DNE to force serial_initialize
132 by caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee
133 __kmp_init_gtid for this to work. */
134
135 if (!TCR_4(__kmp_init_gtid)(__kmp_init_gtid))
136 return KMP_GTID_DNE(-2);
137
138#ifdef KMP_TDATA_GTID1
139 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) >= 3) {
140 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using TDATA\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: using TDATA\n"
); }
;
141 return __kmp_gtid;
142 }
143#endif
144 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) >= 2) {
145 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using keyed TLS\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: using keyed TLS\n"
); }
;
146 return __kmp_gtid_get_specific();
147 }
148 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using internal alg.\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: using internal alg.\n"
); }
;
149
150 stack_addr = (char *)&stack_data;
151 other_threads = __kmp_threads;
152
153 /* ATT: The code below is a source of potential bugs due to unsynchronized
154 access to __kmp_threads array. For example:
155 1. Current thread loads other_threads[i] to thr and checks it, it is
156 non-NULL.
157 2. Current thread is suspended by OS.
158 3. Another thread unregisters and finishes (debug versions of free()
159 may fill memory with something like 0xEF).
160 4. Current thread is resumed.
161 5. Current thread reads junk from *thr.
162 TODO: Fix it. --ln */
163
164 for (i = 0; i < __kmp_threads_capacity; i++) {
165
166 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i])((void *)(other_threads[i]));
167 if (!thr)
168 continue;
169
170 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize)((void *)(thr->th.th_info.ds.ds_stacksize));
171 stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase)((void *)(thr->th.th_info.ds.ds_stackbase));
172
173 /* stack grows down -- search through all of the active threads */
174
175 if (stack_addr <= stack_base) {
176 size_t stack_diff = stack_base - stack_addr;
177
178 if (stack_diff <= stack_size) {
179 /* The only way we can be closer than the allocated */
180 /* stack size is if we are running on this thread. */
181 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i)if (!(__kmp_gtid_get_specific() == i)) { __kmp_debug_assert("__kmp_gtid_get_specific() == i"
, "openmp/runtime/src/kmp_runtime.cpp", 181); }
;
182 return i;
183 }
184 }
185 }
186
187 /* get specific to try and determine our gtid */
188 KA_TRACE(1000,if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: internal alg. failed to find "
"thread, using TLS\n"); }
189 ("*** __kmp_get_global_thread_id: internal alg. failed to find "if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: internal alg. failed to find "
"thread, using TLS\n"); }
190 "thread, using TLS\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: internal alg. failed to find "
"thread, using TLS\n"); }
;
191 i = __kmp_gtid_get_specific();
192
193 /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */
194
195 /* if we havn't been assigned a gtid, then return code */
196 if (i < 0)
197 return i;
198
199 /* dynamically updated stack window for uber threads to avoid get_specific
200 call */
201 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
202 KMP_FATAL(StackOverflow, i)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_StackOverflow, i), __kmp_msg_null
)
;
203 }
204
205 stack_base = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
206 if (stack_addr > stack_base) {
207 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr)((other_threads[i]->th.th_info.ds.ds_stackbase)) = ((stack_addr
))
;
208 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,((other_threads[i]->th.th_info.ds.ds_stacksize)) = ((other_threads
[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base)
)
209 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -((other_threads[i]->th.th_info.ds.ds_stacksize)) = ((other_threads
[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base)
)
210 stack_base)((other_threads[i]->th.th_info.ds.ds_stacksize)) = ((other_threads
[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base)
)
;
211 } else {
212 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,((other_threads[i]->th.th_info.ds.ds_stacksize)) = ((stack_base
- stack_addr))
213 stack_base - stack_addr)((other_threads[i]->th.th_info.ds.ds_stacksize)) = ((stack_base
- stack_addr))
;
214 }
215
216 /* Reprint stack bounds for ubermaster since they have been refined */
217 if (__kmp_storage_map) {
218 char *stack_end = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
219 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
220 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
221 other_threads[i]->th.th_info.ds.ds_stacksize,
222 "th_%d stack (refinement)", i);
223 }
224 return i;
225}
226
227int __kmp_get_global_thread_id_reg() {
228 int gtid;
229
230 if (!__kmp_init_serial) {
231 gtid = KMP_GTID_DNE(-2);
232 } else
233#ifdef KMP_TDATA_GTID1
234 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) >= 3) {
235 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using TDATA\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id_reg: using TDATA\n"
); }
;
236 gtid = __kmp_gtid;
237 } else
238#endif
239 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) >= 2) {
240 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using keyed TLS\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id_reg: using keyed TLS\n"
); }
;
241 gtid = __kmp_gtid_get_specific();
242 } else {
243 KA_TRACE(1000,if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id_reg: using internal alg.\n"
); }
244 ("*** __kmp_get_global_thread_id_reg: using internal alg.\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id_reg: using internal alg.\n"
); }
;
245 gtid = __kmp_get_global_thread_id();
246 }
247
248 /* we must be a new uber master sibling thread */
249 if (gtid == KMP_GTID_DNE(-2)) {
250 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_get_global_thread_id_reg: Encountered new root thread. "
"Registering a new gtid.\n"); }
251 ("__kmp_get_global_thread_id_reg: Encountered new root thread. "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_get_global_thread_id_reg: Encountered new root thread. "
"Registering a new gtid.\n"); }
252 "Registering a new gtid.\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_get_global_thread_id_reg: Encountered new root thread. "
"Registering a new gtid.\n"); }
;
253 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
254 if (!__kmp_init_serial) {
255 __kmp_do_serial_initialize();
256 gtid = __kmp_gtid_get_specific();
257 } else {
258 gtid = __kmp_register_root(FALSE0);
259 }
260 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
261 /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */
262 }
263
264 KMP_DEBUG_ASSERT(gtid >= 0)if (!(gtid >= 0)) { __kmp_debug_assert("gtid >= 0", "openmp/runtime/src/kmp_runtime.cpp"
, 264); }
;
265
266 return gtid;
267}
268
269/* caller must hold forkjoin_lock */
270void __kmp_check_stack_overlap(kmp_info_t *th) {
271 int f;
272 char *stack_beg = NULL__null;
273 char *stack_end = NULL__null;
274 int gtid;
275
276 KA_TRACE(10, ("__kmp_check_stack_overlap: called\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_check_stack_overlap: called\n"
); }
;
277 if (__kmp_storage_map) {
278 stack_end = (char *)th->th.th_info.ds.ds_stackbase;
279 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
280
281 gtid = __kmp_gtid_from_thread(th);
282
283 if (gtid == KMP_GTID_MONITOR(-4)) {
284 __kmp_print_storage_map_gtid(
285 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
286 "th_%s stack (%s)", "mon",
287 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
288 } else {
289 __kmp_print_storage_map_gtid(
290 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
291 "th_%d stack (%s)", gtid,
292 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
293 }
294 }
295
296 /* No point in checking ubermaster threads since they use refinement and
297 * cannot overlap */
298 gtid = __kmp_gtid_from_thread(th);
299 if (__kmp_env_checks == TRUE(!0) && !KMP_UBER_GTID(gtid)) {
300 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_check_stack_overlap: performing extensive checking\n"
); }
301 ("__kmp_check_stack_overlap: performing extensive checking\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_check_stack_overlap: performing extensive checking\n"
); }
;
302 if (stack_beg == NULL__null) {
303 stack_end = (char *)th->th.th_info.ds.ds_stackbase;
304 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
305 }
306
307 for (f = 0; f < __kmp_threads_capacity; f++) {
308 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f])((void *)(__kmp_threads[f]));
309
310 if (f_th && f_th != th) {
311 char *other_stack_end =
312 (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase)((void *)(f_th->th.th_info.ds.ds_stackbase));
313 char *other_stack_beg =
314 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize)((void *)(f_th->th.th_info.ds.ds_stacksize));
315 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
316 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
317
318 /* Print the other stack values before the abort */
319 if (__kmp_storage_map)
320 __kmp_print_storage_map_gtid(
321 -1, other_stack_beg, other_stack_end,
322 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize)((void *)(f_th->th.th_info.ds.ds_stacksize)),
323 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
324
325 __kmp_fatal(KMP_MSG(StackOverlap)__kmp_msg_format(kmp_i18n_msg_StackOverlap), KMP_HNT(ChangeStackLimit)__kmp_msg_format(kmp_i18n_hnt_ChangeStackLimit),
326 __kmp_msg_null);
327 }
328 }
329 }
330 }
331 KA_TRACE(10, ("__kmp_check_stack_overlap: returning\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_check_stack_overlap: returning\n"
); }
;
332}
333
334/* ------------------------------------------------------------------------ */
335
336void __kmp_infinite_loop(void) {
337 static int done = FALSE0;
338
339 while (!done) {
340 KMP_YIELD(TRUE){ __kmp_x86_pause(); if (((!0)) && (((__kmp_use_yield
== 1) || (__kmp_use_yield == 2 && (((__kmp_nth) >
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))))))) __kmp_yield
(); }
;
341 }
342}
343
344#define MAX_MESSAGE512 512
345
346void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, size_t size,
347 char const *format, ...) {
348 char buffer[MAX_MESSAGE512];
349 va_list ap;
350
351 va_start(ap, format)__builtin_va_start(ap, format);
352 KMP_SNPRINTFsnprintf(buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1,
353 p2, (unsigned long)size, format);
354 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
355 __kmp_vprintf(kmp_err, buffer, ap);
356#if KMP_PRINT_DATA_PLACEMENT
357 int node;
358 if (gtid >= 0) {
359 if (p1 <= p2 && (char *)p2 - (char *)p1 == size) {
360 if (__kmp_storage_map_verbose) {
361 node = __kmp_get_host_node(p1);
362 if (node < 0) /* doesn't work, so don't try this next time */
363 __kmp_storage_map_verbose = FALSE0;
364 else {
365 char *last;
366 int lastNode;
367 int localProc = __kmp_get_cpu_from_gtid(gtid);
368
369 const int page_size = KMP_GET_PAGE_SIZE()getpagesize();
370
371 p1 = (void *)((size_t)p1 & ~((size_t)page_size - 1));
372 p2 = (void *)(((size_t)p2 - 1) & ~((size_t)page_size - 1));
373 if (localProc >= 0)
374 __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid,
375 localProc >> 1);
376 else
377 __kmp_printf_no_lock(" GTID %d\n", gtid);
378#if KMP_USE_PRCTL0
379 /* The more elaborate format is disabled for now because of the prctl
380 * hanging bug. */
381 do {
382 last = p1;
383 lastNode = node;
384 /* This loop collates adjacent pages with the same host node. */
385 do {
386 (char *)p1 += page_size;
387 } while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
388 __kmp_printf_no_lock(" %p-%p memNode %d\n", last, (char *)p1 - 1,
389 lastNode);
390 } while (p1 <= p2);
391#else
392 __kmp_printf_no_lock(" %p-%p memNode %d\n", p1,
393 (char *)p1 + (page_size - 1),
394 __kmp_get_host_node(p1));
395 if (p1 < p2) {
396 __kmp_printf_no_lock(" %p-%p memNode %d\n", p2,
397 (char *)p2 + (page_size - 1),
398 __kmp_get_host_node(p2));
399 }
400#endif
401 }
402 }
403 } else
404 __kmp_printf_no_lock(" %s\n", KMP_I18N_STR(StorageMapWarning)__kmp_i18n_catgets(kmp_i18n_str_StorageMapWarning));
405 }
406#endif /* KMP_PRINT_DATA_PLACEMENT */
407 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
408}
409
410void __kmp_warn(char const *format, ...) {
411 char buffer[MAX_MESSAGE512];
412 va_list ap;
413
414 if (__kmp_generate_warnings == kmp_warnings_off) {
415 return;
416 }
417
418 va_start(ap, format)__builtin_va_start(ap, format);
419
420 KMP_SNPRINTFsnprintf(buffer, sizeof(buffer), "OMP warning: %s\n", format);
421 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
422 __kmp_vprintf(kmp_err, buffer, ap);
423 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
424
425 va_end(ap)__builtin_va_end(ap);
426}
427
428void __kmp_abort_process() {
429 // Later threads may stall here, but that's ok because abort() will kill them.
430 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
431
432 if (__kmp_debug_buf) {
433 __kmp_dump_debug_buffer();
434 }
435
436 if (KMP_OS_WINDOWS0) {
437 // Let other threads know of abnormal termination and prevent deadlock
438 // if abort happened during library initialization or shutdown
439 __kmp_global.g.g_abort = SIGABRT6;
440
441 /* On Windows* OS by default abort() causes pop-up error box, which stalls
442 nightly testing. Unfortunately, we cannot reliably suppress pop-up error
443 boxes. _set_abort_behavior() works well, but this function is not
444 available in VS7 (this is not problem for DLL, but it is a problem for
445 static OpenMP RTL). SetErrorMode (and so, timelimit utility) does not
446 help, at least in some versions of MS C RTL.
447
448 It seems following sequence is the only way to simulate abort() and
449 avoid pop-up error box. */
450 raise(SIGABRT6);
451 _exit(3); // Just in case, if signal ignored, exit anyway.
452 } else {
453 __kmp_unregister_library();
454 abort();
455 }
456
457 __kmp_infinite_loop();
458 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
459
460} // __kmp_abort_process
461
462void __kmp_abort_thread(void) {
463 // TODO: Eliminate g_abort global variable and this function.
464 // In case of abort just call abort(), it will kill all the threads.
465 __kmp_infinite_loop();
466} // __kmp_abort_thread
467
468/* Print out the storage map for the major kmp_info_t thread data structures
469 that are allocated together. */
470
471static void __kmp_print_thread_storage_map(kmp_info_t *thr, int gtid) {
472 __kmp_print_storage_map_gtid(gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d",
473 gtid);
474
475 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
476 sizeof(kmp_desc_t), "th_%d.th_info", gtid);
477
478 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
479 sizeof(kmp_local_t), "th_%d.th_local", gtid);
480
481 __kmp_print_storage_map_gtid(
482 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
483 sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid);
484
485 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
486 &thr->th.th_bar[bs_plain_barrier + 1],
487 sizeof(kmp_balign_t), "th_%d.th_bar[plain]",
488 gtid);
489
490 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
491 &thr->th.th_bar[bs_forkjoin_barrier + 1],
492 sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]",
493 gtid);
494
495#if KMP_FAST_REDUCTION_BARRIER1
496 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
497 &thr->th.th_bar[bs_reduction_barrier + 1],
498 sizeof(kmp_balign_t), "th_%d.th_bar[reduction]",
499 gtid);
500#endif // KMP_FAST_REDUCTION_BARRIER
501}
502
503/* Print out the storage map for the major kmp_team_t team data structures
504 that are allocated together. */
505
506static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team,
507 int team_id, int num_thr) {
508 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
509 __kmp_print_storage_map_gtid(-1, team, team + 1, sizeof(kmp_team_t), "%s_%d",
510 header, team_id);
511
512 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
513 &team->t.t_bar[bs_last_barrier],
514 sizeof(kmp_balign_team_t) * bs_last_barrier,
515 "%s_%d.t_bar", header, team_id);
516
517 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
518 &team->t.t_bar[bs_plain_barrier + 1],
519 sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]",
520 header, team_id);
521
522 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
523 &team->t.t_bar[bs_forkjoin_barrier + 1],
524 sizeof(kmp_balign_team_t),
525 "%s_%d.t_bar[forkjoin]", header, team_id);
526
527#if KMP_FAST_REDUCTION_BARRIER1
528 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
529 &team->t.t_bar[bs_reduction_barrier + 1],
530 sizeof(kmp_balign_team_t),
531 "%s_%d.t_bar[reduction]", header, team_id);
532#endif // KMP_FAST_REDUCTION_BARRIER
533
534 __kmp_print_storage_map_gtid(
535 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
536 sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id);
537
538 __kmp_print_storage_map_gtid(
539 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
540 sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id);
541
542 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
543 &team->t.t_disp_buffer[num_disp_buff],
544 sizeof(dispatch_shared_info_t) * num_disp_buff,
545 "%s_%d.t_disp_buffer", header, team_id);
546}
547
548static void __kmp_init_allocator() {
549 __kmp_init_memkind();
550 __kmp_init_target_mem();
551}
552static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
553
554/* ------------------------------------------------------------------------ */
555
556#if KMP_DYNAMIC_LIB1
557#if KMP_OS_WINDOWS0
558
559BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
560 //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
561
562 switch (fdwReason) {
563
564 case DLL_PROCESS_ATTACH:
565 KA_TRACE(10, ("DllMain: PROCESS_ATTACH\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("DllMain: PROCESS_ATTACH\n"
); }
;
566
567 return TRUE(!0);
568
569 case DLL_PROCESS_DETACH:
570 KA_TRACE(10, ("DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()))if (kmp_a_debug >= 10) { __kmp_debug_printf ("DllMain: PROCESS_DETACH T#%d\n"
, __kmp_gtid_get_specific()); }
;
571
572 // According to Windows* documentation for DllMain entry point:
573 // for DLL_PROCESS_DETACH, lpReserved is used for telling the difference:
574 // lpReserved == NULL when FreeLibrary() is called,
575 // lpReserved != NULL when the process is terminated.
576 // When FreeLibrary() is called, worker threads remain alive. So the
577 // runtime's state is consistent and executing proper shutdown is OK.
578 // When the process is terminated, worker threads have exited or been
579 // forcefully terminated by the OS and only the shutdown thread remains.
580 // This can leave the runtime in an inconsistent state.
581 // Hence, only attempt proper cleanup when FreeLibrary() is called.
582 // Otherwise, rely on OS to reclaim resources.
583 if (lpReserved == NULL__null)
584 __kmp_internal_end_library(__kmp_gtid_get_specific());
585
586 return TRUE(!0);
587
588 case DLL_THREAD_ATTACH:
589 KA_TRACE(10, ("DllMain: THREAD_ATTACH\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("DllMain: THREAD_ATTACH\n"
); }
;
590
591 /* if we want to register new siblings all the time here call
592 * __kmp_get_gtid(); */
593 return TRUE(!0);
594
595 case DLL_THREAD_DETACH:
596 KA_TRACE(10, ("DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()))if (kmp_a_debug >= 10) { __kmp_debug_printf ("DllMain: THREAD_DETACH T#%d\n"
, __kmp_gtid_get_specific()); }
;
597
598 __kmp_internal_end_thread(__kmp_gtid_get_specific());
599 return TRUE(!0);
600 }
601
602 return TRUE(!0);
603}
604
605#endif /* KMP_OS_WINDOWS */
606#endif /* KMP_DYNAMIC_LIB */
607
608/* __kmp_parallel_deo -- Wait until it's our turn. */
609void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
610 int gtid = *gtid_ref;
611#ifdef BUILD_PARALLEL_ORDERED1
612 kmp_team_t *team = __kmp_team_from_gtid(gtid);
613#endif /* BUILD_PARALLEL_ORDERED */
614
615 if (__kmp_env_consistency_check) {
616 if (__kmp_threads[gtid]->th.th_root->r.r_active)
617#if KMP_USE_DYNAMIC_LOCK1
618 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL__null, 0);
619#else
620 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL__null);
621#endif
622 }
623#ifdef BUILD_PARALLEL_ORDERED1
624 if (!team->t.t_serialized) {
625 KMP_MB();
626 KMP_WAIT__kmp_wait_4(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ__kmp_eq_4,
627 NULL__null);
628 KMP_MB();
629 }
630#endif /* BUILD_PARALLEL_ORDERED */
631}
632
633/* __kmp_parallel_dxo -- Signal the next task. */
634void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
635 int gtid = *gtid_ref;
636#ifdef BUILD_PARALLEL_ORDERED1
637 int tid = __kmp_tid_from_gtid(gtid);
638 kmp_team_t *team = __kmp_team_from_gtid(gtid);
639#endif /* BUILD_PARALLEL_ORDERED */
640
641 if (__kmp_env_consistency_check) {
642 if (__kmp_threads[gtid]->th.th_root->r.r_active)
643 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
644 }
645#ifdef BUILD_PARALLEL_ORDERED1
646 if (!team->t.t_serialized) {
647 KMP_MB(); /* Flush all pending memory write invalidates. */
648
649 /* use the tid of the next thread in this team */
650 /* TODO replace with general release procedure */
651 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
652
653 KMP_MB(); /* Flush all pending memory write invalidates. */
654 }
655#endif /* BUILD_PARALLEL_ORDERED */
656}
657
658/* ------------------------------------------------------------------------ */
659/* The BARRIER for a SINGLE process section is always explicit */
660
661int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws) {
662 int status;
663 kmp_info_t *th;
664 kmp_team_t *team;
665
666 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
667 __kmp_parallel_initialize();
668 __kmp_resume_if_soft_paused();
669
670 th = __kmp_threads[gtid];
671 team = th->th.th_team;
672 status = 0;
673
674 th->th.th_ident = id_ref;
675
676 if (team->t.t_serialized) {
677 status = 1;
678 } else {
679 kmp_int32 old_this = th->th.th_local.this_construct;
680
681 ++th->th.th_local.this_construct;
682 /* try to set team count to thread count--success means thread got the
683 single block */
684 /* TODO: Should this be acquire or release? */
685 if (team->t.t_construct == old_this) {
686 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
687 th->th.th_local.this_construct);
688 }
689#if USE_ITT_BUILD1
690 if (__itt_metadata_add_ptr__kmp_itt_metadata_add_ptr__3_0 && __kmp_forkjoin_frames_mode == 3 &&
691 KMP_MASTER_GTID(gtid)(0 == __kmp_tid_from_gtid((gtid))) && th->th.th_teams_microtask == NULL__null &&
692 team->t.t_active_level == 1) {
693 // Only report metadata by primary thread of active team at level 1
694 __kmp_itt_metadata_single(id_ref);
695 }
696#endif /* USE_ITT_BUILD */
697 }
698
699 if (__kmp_env_consistency_check) {
700 if (status && push_ws) {
701 __kmp_push_workshare(gtid, ct_psingle, id_ref);
702 } else {
703 __kmp_check_workshare(gtid, ct_psingle, id_ref);
704 }
705 }
706#if USE_ITT_BUILD1
707 if (status) {
708 __kmp_itt_single_start(gtid);
709 }
710#endif /* USE_ITT_BUILD */
711 return status;
712}
713
714void __kmp_exit_single(int gtid) {
715#if USE_ITT_BUILD1
716 __kmp_itt_single_end(gtid);
717#endif /* USE_ITT_BUILD */
718 if (__kmp_env_consistency_check)
719 __kmp_pop_workshare(gtid, ct_psingle, NULL__null);
720}
721
722/* determine if we can go parallel or must use a serialized parallel region and
723 * how many threads we can use
724 * set_nproc is the number of threads requested for the team
725 * returns 0 if we should serialize or only use one thread,
726 * otherwise the number of threads to use
727 * The forkjoin lock is held by the caller. */
728static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
729 int master_tid, int set_nthreads,
730 int enter_teams) {
731 int capacity;
732 int new_nthreads;
733 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 733); }
;
734 KMP_DEBUG_ASSERT(root && parent_team)if (!(root && parent_team)) { __kmp_debug_assert("root && parent_team"
, "openmp/runtime/src/kmp_runtime.cpp", 734); }
;
735 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
736
737 // If dyn-var is set, dynamically adjust the number of desired threads,
738 // according to the method specified by dynamic_mode.
739 new_nthreads = set_nthreads;
740 if (!get__dynamic_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.dynamic)
) {
741 ;
742 }
743#ifdef USE_LOAD_BALANCE1
744 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
745 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
746 if (new_nthreads == 1) {
747 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d load balance reduced "
"reservation to 1 thread\n", master_tid); }
748 "reservation to 1 thread\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d load balance reduced "
"reservation to 1 thread\n", master_tid); }
749 master_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d load balance reduced "
"reservation to 1 thread\n", master_tid); }
;
750 return 1;
751 }
752 if (new_nthreads < set_nthreads) {
753 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d load balance reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
754 "reservation to %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d load balance reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
755 master_tid, new_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d load balance reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
;
756 }
757 }
758#endif /* USE_LOAD_BALANCE */
759 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
760 new_nthreads = __kmp_avail_proc - __kmp_nth +
761 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
762 if (new_nthreads <= 1) {
763 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d thread limit reduced "
"reservation to 1 thread\n", master_tid); }
764 "reservation to 1 thread\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d thread limit reduced "
"reservation to 1 thread\n", master_tid); }
765 master_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d thread limit reduced "
"reservation to 1 thread\n", master_tid); }
;
766 return 1;
767 }
768 if (new_nthreads < set_nthreads) {
769 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d thread limit reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
770 "reservation to %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d thread limit reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
771 master_tid, new_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d thread limit reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
;
772 } else {
773 new_nthreads = set_nthreads;
774 }
775 } else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
776 if (set_nthreads > 2) {
777 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
778 new_nthreads = (new_nthreads % set_nthreads) + 1;
779 if (new_nthreads == 1) {
780 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d dynamic random reduced "
"reservation to 1 thread\n", master_tid); }
781 "reservation to 1 thread\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d dynamic random reduced "
"reservation to 1 thread\n", master_tid); }
782 master_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d dynamic random reduced "
"reservation to 1 thread\n", master_tid); }
;
783 return 1;
784 }
785 if (new_nthreads < set_nthreads) {
786 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d dynamic random reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
787 "reservation to %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d dynamic random reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
788 master_tid, new_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d dynamic random reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
;
789 }
790 }
791 } else {
792 KMP_ASSERT(0)if (!(0)) { __kmp_debug_assert("0", "openmp/runtime/src/kmp_runtime.cpp"
, 792); }
;
793 }
794
795 // Respect KMP_ALL_THREADS/KMP_DEVICE_THREAD_LIMIT.
796 if (__kmp_nth + new_nthreads -
797 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
798 __kmp_max_nth) {
799 int tl_nthreads = __kmp_max_nth - __kmp_nth +
800 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
801 if (tl_nthreads <= 0) {
802 tl_nthreads = 1;
803 }
804
805 // If dyn-var is false, emit a 1-time warning.
806 if (!get__dynamic_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.dynamic)
&& (!__kmp_reserve_warn)) {
807 __kmp_reserve_warn = 1;
808 __kmp_msg(kmp_ms_warning,
809 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, set_nthreads, tl_nthreads
)
,
810 KMP_HNT(Unset_ALL_THREADS)__kmp_msg_format(kmp_i18n_hnt_Unset_ALL_THREADS), __kmp_msg_null);
811 }
812 if (tl_nthreads == 1) {
813 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
"reduced reservation to 1 thread\n", master_tid); }
814 "reduced reservation to 1 thread\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
"reduced reservation to 1 thread\n", master_tid); }
815 master_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
"reduced reservation to 1 thread\n", master_tid); }
;
816 return 1;
817 }
818 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
"reservation to %d threads\n", master_tid, tl_nthreads); }
819 "reservation to %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
"reservation to %d threads\n", master_tid, tl_nthreads); }
820 master_tid, tl_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
"reservation to %d threads\n", master_tid, tl_nthreads); }
;
821 new_nthreads = tl_nthreads;
822 }
823
824 // Respect OMP_THREAD_LIMIT
825 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
826 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
827 if (cg_nthreads + new_nthreads -
828 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
829 max_cg_threads) {
830 int tl_nthreads = max_cg_threads - cg_nthreads +
831 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
832 if (tl_nthreads <= 0) {
833 tl_nthreads = 1;
834 }
835
836 // If dyn-var is false, emit a 1-time warning.
837 if (!get__dynamic_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.dynamic)
&& (!__kmp_reserve_warn)) {
838 __kmp_reserve_warn = 1;
839 __kmp_msg(kmp_ms_warning,
840 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, set_nthreads, tl_nthreads
)
,
841 KMP_HNT(Unset_ALL_THREADS)__kmp_msg_format(kmp_i18n_hnt_Unset_ALL_THREADS), __kmp_msg_null);
842 }
843 if (tl_nthreads == 1) {
844 KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
"reduced reservation to 1 thread\n", master_tid); }
845 "reduced reservation to 1 thread\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
"reduced reservation to 1 thread\n", master_tid); }
846 master_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
"reduced reservation to 1 thread\n", master_tid); }
;
847 return 1;
848 }
849 KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
"reservation to %d threads\n", master_tid, tl_nthreads); }
850 "reservation to %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
"reservation to %d threads\n", master_tid, tl_nthreads); }
851 master_tid, tl_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
"reservation to %d threads\n", master_tid, tl_nthreads); }
;
852 new_nthreads = tl_nthreads;
853 }
854
855 // Check if the threads array is large enough, or needs expanding.
856 // See comment in __kmp_register_root() about the adjustment if
857 // __kmp_threads[0] == NULL.
858 capacity = __kmp_threads_capacity;
859 if (TCR_PTR(__kmp_threads[0])((void *)(__kmp_threads[0])) == NULL__null) {
860 --capacity;
861 }
862 // If it is not for initializing the hidden helper team, we need to take
863 // __kmp_hidden_helper_threads_num out of the capacity because it is included
864 // in __kmp_threads_capacity.
865 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)(__kmp_init_hidden_helper_threads)) {
866 capacity -= __kmp_hidden_helper_threads_num;
867 }
868 if (__kmp_nth + new_nthreads -
869 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
870 capacity) {
871 // Expand the threads array.
872 int slotsRequired = __kmp_nth + new_nthreads -
873 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
874 capacity;
875 int slotsAdded = __kmp_expand_threads(slotsRequired);
876 if (slotsAdded < slotsRequired) {
877 // The threads array was not expanded enough.
878 new_nthreads -= (slotsRequired - slotsAdded);
879 KMP_ASSERT(new_nthreads >= 1)if (!(new_nthreads >= 1)) { __kmp_debug_assert("new_nthreads >= 1"
, "openmp/runtime/src/kmp_runtime.cpp", 879); }
;
880
881 // If dyn-var is false, emit a 1-time warning.
882 if (!get__dynamic_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.dynamic)
&& (!__kmp_reserve_warn)) {
883 __kmp_reserve_warn = 1;
884 if (__kmp_tp_cached) {
885 __kmp_msg(kmp_ms_warning,
886 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, set_nthreads, new_nthreads
)
,
887 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity)__kmp_msg_format(kmp_i18n_hnt_Set_ALL_THREADPRIVATE, __kmp_tp_capacity
)
,
888 KMP_HNT(PossibleSystemLimitOnThreads)__kmp_msg_format(kmp_i18n_hnt_PossibleSystemLimitOnThreads), __kmp_msg_null);
889 } else {
890 __kmp_msg(kmp_ms_warning,
891 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, set_nthreads, new_nthreads
)
,
892 KMP_HNT(SystemLimitOnThreads)__kmp_msg_format(kmp_i18n_hnt_SystemLimitOnThreads), __kmp_msg_null);
893 }
894 }
895 }
896 }
897
898#ifdef KMP_DEBUG1
899 if (new_nthreads == 1) {
900 KC_TRACE(10,if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d serializing team after reclaiming "
"dead roots and rechecking; requested %d threads\n", __kmp_get_global_thread_id
(), set_nthreads); }
901 ("__kmp_reserve_threads: T#%d serializing team after reclaiming "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d serializing team after reclaiming "
"dead roots and rechecking; requested %d threads\n", __kmp_get_global_thread_id
(), set_nthreads); }
902 "dead roots and rechecking; requested %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d serializing team after reclaiming "
"dead roots and rechecking; requested %d threads\n", __kmp_get_global_thread_id
(), set_nthreads); }
903 __kmp_get_gtid(), set_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d serializing team after reclaiming "
"dead roots and rechecking; requested %d threads\n", __kmp_get_global_thread_id
(), set_nthreads); }
;
904 } else {
905 KC_TRACE(10, ("__kmp_reserve_threads: T#%d allocating %d threads; requested"if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d allocating %d threads; requested"
" %d threads\n", __kmp_get_global_thread_id(), new_nthreads,
set_nthreads); }
906 " %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d allocating %d threads; requested"
" %d threads\n", __kmp_get_global_thread_id(), new_nthreads,
set_nthreads); }
907 __kmp_get_gtid(), new_nthreads, set_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d allocating %d threads; requested"
" %d threads\n", __kmp_get_global_thread_id(), new_nthreads,
set_nthreads); }
;
908 }
909#endif // KMP_DEBUG
910 return new_nthreads;
911}
912
913/* Allocate threads from the thread pool and assign them to the new team. We are
914 assured that there are enough threads available, because we checked on that
915 earlier within critical section forkjoin */
916static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
917 kmp_info_t *master_th, int master_gtid,
918 int fork_teams_workers) {
919 int i;
920 int use_hot_team;
921
922 KA_TRACE(10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_team_threads: new_nprocs = %d\n"
, team->t.t_nproc); }
;
923 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid())if (!(master_gtid == __kmp_get_global_thread_id())) { __kmp_debug_assert
("master_gtid == __kmp_get_global_thread_id()", "openmp/runtime/src/kmp_runtime.cpp"
, 923); }
;
924 KMP_MB();
925
926 /* first, let's setup the primary thread */
927 master_th->th.th_info.ds.ds_tid = 0;
928 master_th->th.th_team = team;
929 master_th->th.th_team_nproc = team->t.t_nproc;
930 master_th->th.th_team_master = master_th;
931 master_th->th.th_team_serialized = FALSE0;
932 master_th->th.th_dispatch = &team->t.t_dispatch[0];
933
934/* make sure we are not the optimized hot team */
935#if KMP_NESTED_HOT_TEAMS1
936 use_hot_team = 0;
937 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
938 if (hot_teams) { // hot teams array is not allocated if
939 // KMP_HOT_TEAMS_MAX_LEVEL=0
940 int level = team->t.t_active_level - 1; // index in array of hot teams
941 if (master_th->th.th_teams_microtask) { // are we inside the teams?
942 if (master_th->th.th_teams_size.nteams > 1) {
943 ++level; // level was not increased in teams construct for
944 // team_of_masters
945 }
946 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
947 master_th->th.th_teams_level == team->t.t_level) {
948 ++level; // level was not increased in teams construct for
949 // team_of_workers before the parallel
950 } // team->t.t_level will be increased inside parallel
951 }
952 if (level < __kmp_hot_teams_max_level) {
953 if (hot_teams[level].hot_team) {
954 // hot team has already been allocated for given level
955 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team)if (!(hot_teams[level].hot_team == team)) { __kmp_debug_assert
("hot_teams[level].hot_team == team", "openmp/runtime/src/kmp_runtime.cpp"
, 955); }
;
956 use_hot_team = 1; // the team is ready to use
957 } else {
958 use_hot_team = 0; // AC: threads are not allocated yet
959 hot_teams[level].hot_team = team; // remember new hot team
960 hot_teams[level].hot_team_nth = team->t.t_nproc;
961 }
962 } else {
963 use_hot_team = 0;
964 }
965 }
966#else
967 use_hot_team = team == root->r.r_hot_team;
968#endif
969 if (!use_hot_team) {
970
971 /* install the primary thread */
972 team->t.t_threads[0] = master_th;
973 __kmp_initialize_info(master_th, team, 0, master_gtid);
974
975 /* now, install the worker threads */
976 for (i = 1; i < team->t.t_nproc; i++) {
977
978 /* fork or reallocate a new thread and install it in team */
979 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
980 team->t.t_threads[i] = thr;
981 KMP_DEBUG_ASSERT(thr)if (!(thr)) { __kmp_debug_assert("thr", "openmp/runtime/src/kmp_runtime.cpp"
, 981); }
;
982 KMP_DEBUG_ASSERT(thr->th.th_team == team)if (!(thr->th.th_team == team)) { __kmp_debug_assert("thr->th.th_team == team"
, "openmp/runtime/src/kmp_runtime.cpp", 982); }
;
983 /* align team and thread arrived states */
984 KA_TRACE(20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
"T#%d(%d:%d) join =%llu, plain=%llu\n", __kmp_gtid_from_tid(
0, team), team->t.t_id, 0, __kmp_gtid_from_tid(i, team), team
->t.t_id, i, team->t.t_bar[bs_forkjoin_barrier].b_arrived
, team->t.t_bar[bs_plain_barrier].b_arrived); }
985 "T#%d(%d:%d) join =%llu, plain=%llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
"T#%d(%d:%d) join =%llu, plain=%llu\n", __kmp_gtid_from_tid(
0, team), team->t.t_id, 0, __kmp_gtid_from_tid(i, team), team
->t.t_id, i, team->t.t_bar[bs_forkjoin_barrier].b_arrived
, team->t.t_bar[bs_plain_barrier].b_arrived); }
986 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
"T#%d(%d:%d) join =%llu, plain=%llu\n", __kmp_gtid_from_tid(
0, team), team->t.t_id, 0, __kmp_gtid_from_tid(i, team), team
->t.t_id, i, team->t.t_bar[bs_forkjoin_barrier].b_arrived
, team->t.t_bar[bs_plain_barrier].b_arrived); }
987 __kmp_gtid_from_tid(i, team), team->t.t_id, i,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
"T#%d(%d:%d) join =%llu, plain=%llu\n", __kmp_gtid_from_tid(
0, team), team->t.t_id, 0, __kmp_gtid_from_tid(i, team), team
->t.t_id, i, team->t.t_bar[bs_forkjoin_barrier].b_arrived
, team->t.t_bar[bs_plain_barrier].b_arrived); }
988 team->t.t_bar[bs_forkjoin_barrier].b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
"T#%d(%d:%d) join =%llu, plain=%llu\n", __kmp_gtid_from_tid(
0, team), team->t.t_id, 0, __kmp_gtid_from_tid(i, team), team
->t.t_id, i, team->t.t_bar[bs_forkjoin_barrier].b_arrived
, team->t.t_bar[bs_plain_barrier].b_arrived); }
989 team->t.t_bar[bs_plain_barrier].b_arrived))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
"T#%d(%d:%d) join =%llu, plain=%llu\n", __kmp_gtid_from_tid(
0, team), team->t.t_id, 0, __kmp_gtid_from_tid(i, team), team
->t.t_id, i, team->t.t_bar[bs_forkjoin_barrier].b_arrived
, team->t.t_bar[bs_plain_barrier].b_arrived); }
;
990 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
991 thr->th.th_teams_level = master_th->th.th_teams_level;
992 thr->th.th_teams_size = master_th->th.th_teams_size;
993 { // Initialize threads' barrier data.
994 int b;
995 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
996 for (b = 0; b < bs_last_barrier; ++b) {
997 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
998 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG)if (!(balign[b].bb.wait_flag != 2)) { __kmp_debug_assert("balign[b].bb.wait_flag != 2"
, "openmp/runtime/src/kmp_runtime.cpp", 998); }
;
999#if USE_DEBUGGER0
1000 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1001#endif
1002 }
1003 }
1004 }
1005
1006#if KMP_AFFINITY_SUPPORTED1
1007 // Do not partition the places list for teams construct workers who
1008 // haven't actually been forked to do real work yet. This partitioning
1009 // will take place in the parallel region nested within the teams construct.
1010 if (!fork_teams_workers) {
1011 __kmp_partition_places(team);
1012 }
1013#endif
1014 }
1015
1016 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1017 for (i = 0; i < team->t.t_nproc; i++) {
1018 kmp_info_t *thr = team->t.t_threads[i];
1019 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1020 thr->th.th_prev_level != team->t.t_level) {
1021 team->t.t_display_affinity = 1;
1022 break;
1023 }
1024 }
1025 }
1026
1027 KMP_MB();
1028}
1029
1030#if KMP_ARCH_X860 || KMP_ARCH_X86_641
1031// Propagate any changes to the floating point control registers out to the team
1032// We try to avoid unnecessary writes to the relevant cache line in the team
1033// structure, so we don't make changes unless they are needed.
1034inline static void propagateFPControl(kmp_team_t *team) {
1035 if (__kmp_inherit_fp_control) {
1036 kmp_int16 x87_fpu_control_word;
1037 kmp_uint32 mxcsr;
1038
1039 // Get primary thread's values of FPU control flags (both X87 and vector)
1040 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1041 __kmp_store_mxcsr(&mxcsr);
1042 mxcsr &= KMP_X86_MXCSR_MASK0xffffffc0;
1043
1044 // There is no point looking at t_fp_control_saved here.
1045 // If it is TRUE, we still have to update the values if they are different
1046 // from those we now have. If it is FALSE we didn't save anything yet, but
1047 // our objective is the same. We have to ensure that the values in the team
1048 // are the same as those we have.
1049 // So, this code achieves what we need whether or not t_fp_control_saved is
1050 // true. By checking whether the value needs updating we avoid unnecessary
1051 // writes that would put the cache-line into a written state, causing all
1052 // threads in the team to have to read it again.
1053 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word)if ((team->t.t_x87_fpu_control_word) != (x87_fpu_control_word
)) (team->t.t_x87_fpu_control_word) = (x87_fpu_control_word
)
;
1054 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr)if ((team->t.t_mxcsr) != (mxcsr)) (team->t.t_mxcsr) = (
mxcsr)
;
1055 // Although we don't use this value, other code in the runtime wants to know
1056 // whether it should restore them. So we must ensure it is correct.
1057 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE)if ((team->t.t_fp_control_saved) != ((!0))) (team->t.t_fp_control_saved
) = ((!0))
;
1058 } else {
1059 // Similarly here. Don't write to this cache-line in the team structure
1060 // unless we have to.
1061 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE)if ((team->t.t_fp_control_saved) != (0)) (team->t.t_fp_control_saved
) = (0)
;
1062 }
1063}
1064
1065// Do the opposite, setting the hardware registers to the updated values from
1066// the team.
1067inline static void updateHWFPControl(kmp_team_t *team) {
1068 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1069 // Only reset the fp control regs if they have been changed in the team.
1070 // the parallel region that we are exiting.
1071 kmp_int16 x87_fpu_control_word;
1072 kmp_uint32 mxcsr;
1073 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1074 __kmp_store_mxcsr(&mxcsr);
1075 mxcsr &= KMP_X86_MXCSR_MASK0xffffffc0;
1076
1077 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1078 __kmp_clear_x87_fpu_status_word();
1079 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1080 }
1081
1082 if (team->t.t_mxcsr != mxcsr) {
1083 __kmp_load_mxcsr(&team->t.t_mxcsr);
1084 }
1085 }
1086}
1087#else
1088#define propagateFPControl(x) ((void)0)
1089#define updateHWFPControl(x) ((void)0)
1090#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1091
1092static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team,
1093 int realloc); // forward declaration
1094
1095/* Run a parallel region that has been serialized, so runs only in a team of the
1096 single primary thread. */
1097void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
1098 kmp_info_t *this_thr;
1099 kmp_team_t *serial_team;
1100
1101 KC_TRACE(10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: called by T#%d\n"
, global_tid); }
;
1102
1103 /* Skip all this code for autopar serialized loops since it results in
1104 unacceptable overhead */
1105 if (loc != NULL__null && (loc->flags & KMP_IDENT_AUTOPAR))
1106 return;
1107
1108 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
1109 __kmp_parallel_initialize();
1110 __kmp_resume_if_soft_paused();
1111
1112 this_thr = __kmp_threads[global_tid];
1113 serial_team = this_thr->th.th_serial_team;
1114
1115 /* utilize the serialized team held by this thread */
1116 KMP_DEBUG_ASSERT(serial_team)if (!(serial_team)) { __kmp_debug_assert("serial_team", "openmp/runtime/src/kmp_runtime.cpp"
, 1116); }
;
1117 KMP_MB();
1118
1119 if (__kmp_tasking_mode != tskm_immediate_exec) {
1120 KMP_DEBUG_ASSERT(if (!(this_thr->th.th_task_team == this_thr->th.th_team
->t.t_task_team[this_thr->th.th_task_state])) { __kmp_debug_assert
("this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 1122); }
1121 this_thr->th.th_task_team ==if (!(this_thr->th.th_task_team == this_thr->th.th_team
->t.t_task_team[this_thr->th.th_task_state])) { __kmp_debug_assert
("this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 1122); }
1122 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state])if (!(this_thr->th.th_task_team == this_thr->th.th_team
->t.t_task_team[this_thr->th.th_task_state])) { __kmp_debug_assert
("this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 1122); }
;
1123 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==if (!(serial_team->t.t_task_team[this_thr->th.th_task_state
] == __null)) { __kmp_debug_assert("serial_team->t.t_task_team[this_thr->th.th_task_state] == __null"
, "openmp/runtime/src/kmp_runtime.cpp", 1124); }
1124 NULL)if (!(serial_team->t.t_task_team[this_thr->th.th_task_state
] == __null)) { __kmp_debug_assert("serial_team->t.t_task_team[this_thr->th.th_task_state] == __null"
, "openmp/runtime/src/kmp_runtime.cpp", 1124); }
;
1125 KA_TRACE(20, ("__kmpc_serialized_parallel: T#%d pushing task_team %p / "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d pushing task_team %p / "
"team %p, new task_team = NULL\n", global_tid, this_thr->
th.th_task_team, this_thr->th.th_team); }
1126 "team %p, new task_team = NULL\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d pushing task_team %p / "
"team %p, new task_team = NULL\n", global_tid, this_thr->
th.th_task_team, this_thr->th.th_team); }
1127 global_tid, this_thr->th.th_task_team, this_thr->th.th_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d pushing task_team %p / "
"team %p, new task_team = NULL\n", global_tid, this_thr->
th.th_task_team, this_thr->th.th_team); }
;
1128 this_thr->th.th_task_team = NULL__null;
1129 }
1130
1131 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1132 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1133 proc_bind = proc_bind_false;
1134 } else if (proc_bind == proc_bind_default) {
1135 // No proc_bind clause was specified, so use the current value
1136 // of proc-bind-var for this parallel region.
1137 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1138 }
1139 // Reset for next parallel region
1140 this_thr->th.th_set_proc_bind = proc_bind_default;
1141
1142#if OMPT_SUPPORT1
1143 ompt_data_t ompt_parallel_data = ompt_data_none{0};
1144 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid)__ompt_load_return_address(global_tid);
1145 if (ompt_enabled.enabled &&
1146 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1147
1148 ompt_task_info_t *parent_task_info;
1149 parent_task_info = OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info));
1150
1151 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
1152 if (ompt_enabled.ompt_callback_parallel_begin) {
1153 int team_size = 1;
1154
1155 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)ompt_callback_parallel_begin_callback(
1156 &(parent_task_info->task_data), &(parent_task_info->frame),
1157 &ompt_parallel_data, team_size,
1158 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1159 }
1160 }
1161#endif // OMPT_SUPPORT
1162
1163 if (this_thr->th.th_team != serial_team) {
1164 // Nested level will be an index in the nested nthreads array
1165 int level = this_thr->th.th_team->t.t_level;
1166
1167 if (serial_team->t.t_serialized) {
1168 /* this serial team was already used
1169 TODO increase performance by making this locks more specific */
1170 kmp_team_t *new_team;
1171
1172 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1173
1174 new_team =
1175 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1176#if OMPT_SUPPORT1
1177 ompt_parallel_data,
1178#endif
1179 proc_bind, &this_thr->th.th_current_task->td_icvs,
1180 0 USE_NESTED_HOT_ARG(NULL), __null);
1181 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1182 KMP_ASSERT(new_team)if (!(new_team)) { __kmp_debug_assert("new_team", "openmp/runtime/src/kmp_runtime.cpp"
, 1182); }
;
1183
1184 /* setup new serialized team and install it */
1185 new_team->t.t_threads[0] = this_thr;
1186 new_team->t.t_parent = this_thr->th.th_team;
1187 serial_team = new_team;
1188 this_thr->th.th_serial_team = serial_team;
1189
1190 KF_TRACE(if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n"
, global_tid, serial_team); }
1191 10,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n"
, global_tid, serial_team); }
1192 ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n"
, global_tid, serial_team); }
1193 global_tid, serial_team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n"
, global_tid, serial_team); }
;
1194
1195 /* TODO the above breaks the requirement that if we run out of resources,
1196 then we can still guarantee that serialized teams are ok, since we may
1197 need to allocate a new one */
1198 } else {
1199 KF_TRACE(if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n"
, global_tid, serial_team); }
1200 10,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n"
, global_tid, serial_team); }
1201 ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n"
, global_tid, serial_team); }
1202 global_tid, serial_team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n"
, global_tid, serial_team); }
;
1203 }
1204
1205 /* we have to initialize this serial team */
1206 KMP_DEBUG_ASSERT(serial_team->t.t_threads)if (!(serial_team->t.t_threads)) { __kmp_debug_assert("serial_team->t.t_threads"
, "openmp/runtime/src/kmp_runtime.cpp", 1206); }
;
1207 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr)if (!(serial_team->t.t_threads[0] == this_thr)) { __kmp_debug_assert
("serial_team->t.t_threads[0] == this_thr", "openmp/runtime/src/kmp_runtime.cpp"
, 1207); }
;
1208 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team)if (!(this_thr->th.th_team != serial_team)) { __kmp_debug_assert
("this_thr->th.th_team != serial_team", "openmp/runtime/src/kmp_runtime.cpp"
, 1208); }
;
1209 serial_team->t.t_ident = loc;
1210 serial_team->t.t_serialized = 1;
1211 serial_team->t.t_nproc = 1;
1212 serial_team->t.t_parent = this_thr->th.th_team;
1213 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1214 this_thr->th.th_team = serial_team;
1215 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1216
1217 KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d curtask=%p\n"
, global_tid, this_thr->th.th_current_task); }
1218 this_thr->th.th_current_task))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d curtask=%p\n"
, global_tid, this_thr->th.th_current_task); }
;
1219 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1)if (!(this_thr->th.th_current_task->td_flags.executing ==
1)) { __kmp_debug_assert("this_thr->th.th_current_task->td_flags.executing == 1"
, "openmp/runtime/src/kmp_runtime.cpp", 1219); }
;
1220 this_thr->th.th_current_task->td_flags.executing = 0;
1221
1222 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1223
1224 /* TODO: GEH: do ICVs work for nested serialized teams? Don't we need an
1225 implicit task for each serialized task represented by
1226 team->t.t_serialized? */
1227 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1228 &this_thr->th.th_current_task->td_parent->td_icvs);
1229
1230 // Thread value exists in the nested nthreads array for the next nested
1231 // level
1232 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1233 this_thr->th.th_current_task->td_icvs.nproc =
1234 __kmp_nested_nth.nth[level + 1];
1235 }
1236
1237 if (__kmp_nested_proc_bind.used &&
1238 (level + 1 < __kmp_nested_proc_bind.used)) {
1239 this_thr->th.th_current_task->td_icvs.proc_bind =
1240 __kmp_nested_proc_bind.bind_types[level + 1];
1241 }
1242
1243#if USE_DEBUGGER0
1244 serial_team->t.t_pkfn = (microtask_t)(~0); // For the debugger.
1245#endif
1246 this_thr->th.th_info.ds.ds_tid = 0;
1247
1248 /* set thread cache values */
1249 this_thr->th.th_team_nproc = 1;
1250 this_thr->th.th_team_master = this_thr;
1251 this_thr->th.th_team_serialized = 1;
1252
1253 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1254 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1255 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator; // save
1256
1257 propagateFPControl(serial_team);
1258
1259 /* check if we need to allocate dispatch buffers stack */
1260 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch)if (!(serial_team->t.t_dispatch)) { __kmp_debug_assert("serial_team->t.t_dispatch"
, "openmp/runtime/src/kmp_runtime.cpp", 1260); }
;
1261 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1262 serial_team->t.t_dispatch->th_disp_buffer =
1263 (dispatch_private_info_t *)__kmp_allocate(___kmp_allocate((sizeof(dispatch_private_info_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 1264)
1264 sizeof(dispatch_private_info_t))___kmp_allocate((sizeof(dispatch_private_info_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 1264)
;
1265 }
1266 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1267
1268 KMP_MB();
1269
1270 } else {
1271 /* this serialized team is already being used,
1272 * that's fine, just add another nested level */
1273 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team)if (!(this_thr->th.th_team == serial_team)) { __kmp_debug_assert
("this_thr->th.th_team == serial_team", "openmp/runtime/src/kmp_runtime.cpp"
, 1273); }
;
1274 KMP_DEBUG_ASSERT(serial_team->t.t_threads)if (!(serial_team->t.t_threads)) { __kmp_debug_assert("serial_team->t.t_threads"
, "openmp/runtime/src/kmp_runtime.cpp", 1274); }
;
1275 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr)if (!(serial_team->t.t_threads[0] == this_thr)) { __kmp_debug_assert
("serial_team->t.t_threads[0] == this_thr", "openmp/runtime/src/kmp_runtime.cpp"
, 1275); }
;
1276 ++serial_team->t.t_serialized;
1277 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1278
1279 // Nested level will be an index in the nested nthreads array
1280 int level = this_thr->th.th_team->t.t_level;
1281 // Thread value exists in the nested nthreads array for the next nested
1282 // level
1283 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1284 this_thr->th.th_current_task->td_icvs.nproc =
1285 __kmp_nested_nth.nth[level + 1];
1286 }
1287 serial_team->t.t_level++;
1288 KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d increasing nesting level "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d increasing nesting level "
"of serial team %p to %d\n", global_tid, serial_team, serial_team
->t.t_level); }
1289 "of serial team %p to %d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d increasing nesting level "
"of serial team %p to %d\n", global_tid, serial_team, serial_team
->t.t_level); }
1290 global_tid, serial_team, serial_team->t.t_level))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d increasing nesting level "
"of serial team %p to %d\n", global_tid, serial_team, serial_team
->t.t_level); }
;
1291
1292 /* allocate/push dispatch buffers stack */
1293 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch)if (!(serial_team->t.t_dispatch)) { __kmp_debug_assert("serial_team->t.t_dispatch"
, "openmp/runtime/src/kmp_runtime.cpp", 1293); }
;
1294 {
1295 dispatch_private_info_t *disp_buffer =
1296 (dispatch_private_info_t *)__kmp_allocate(___kmp_allocate((sizeof(dispatch_private_info_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 1297)
1297 sizeof(dispatch_private_info_t))___kmp_allocate((sizeof(dispatch_private_info_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 1297)
;
1298 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1299 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1300 }
1301 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1302
1303 KMP_MB();
1304 }
1305 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq)if ((serial_team->t.t_cancel_request) != (cancel_noreq)) (
serial_team->t.t_cancel_request) = (cancel_noreq)
;
1306
1307 // Perform the display affinity functionality for
1308 // serialized parallel regions
1309 if (__kmp_display_affinity) {
1310 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1311 this_thr->th.th_prev_num_threads != 1) {
1312 // NULL means use the affinity-format-var ICV
1313 __kmp_aux_display_affinity(global_tid, NULL__null);
1314 this_thr->th.th_prev_level = serial_team->t.t_level;
1315 this_thr->th.th_prev_num_threads = 1;
1316 }
1317 }
1318
1319 if (__kmp_env_consistency_check)
1320 __kmp_push_parallel(global_tid, NULL__null);
1321#if OMPT_SUPPORT1
1322 serial_team->t.ompt_team_info.master_return_address = codeptr;
1323 if (ompt_enabled.enabled &&
1324 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1325 OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info))->frame.exit_frame.ptr =
1326 OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
1327
1328 ompt_lw_taskteam_t lw_taskteam;
1329 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1330 &ompt_parallel_data, codeptr);
1331
1332 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1333 // don't use lw_taskteam after linking. content was swaped
1334
1335 /* OMPT implicit task begin */
1336 if (ompt_enabled.ompt_callback_implicit_task) {
1337 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1338 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr)(&(this_thr->th.th_team->t.ompt_team_info.parallel_data
))
,
1339 OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data
))
, 1, __kmp_tid_from_gtid(global_tid),
1340 ompt_task_implicit); // TODO: Can this be ompt_task_initial?
1341 OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info))->thread_num =
1342 __kmp_tid_from_gtid(global_tid);
1343 }
1344
1345 /* OMPT state */
1346 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1347 OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info))->frame.exit_frame.ptr =
1348 OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
1349 }
1350#endif
1351}
1352
1353/* most of the work for a fork */
1354/* return true if we really went parallel, false if serialized */
1355int __kmp_fork_call(ident_t *loc, int gtid,
1356 enum fork_context_e call_context, // Intel, GNU, ...
1357 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1358 kmp_va_list ap) {
1359 void **argv;
1360 int i;
1361 int master_tid;
1362 int master_this_cons;
1363 kmp_team_t *team;
1364 kmp_team_t *parent_team;
1365 kmp_info_t *master_th;
1366 kmp_root_t *root;
1367 int nthreads;
1368 int master_active;
1369 int master_set_numthreads;
1370 int level;
1371 int active_level;
1372 int teams_level;
1373#if KMP_NESTED_HOT_TEAMS1
1374 kmp_hot_team_ptr_t **p_hot_teams;
1375#endif
1376 { // KMP_TIME_BLOCK
1377 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call)((void)0);
1378 KMP_COUNT_VALUE(OMP_PARALLEL_args, argc)((void)0);
1379
1380 KA_TRACE(20, ("__kmp_fork_call: enter T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: enter T#%d\n"
, gtid); }
;
1
Assuming 'kmp_a_debug' is < 20
1381 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL__null) {
2
Assuming '__kmp_stkpadding' is <= 0
1382 /* Some systems prefer the stack for the root thread(s) to start with */
1383 /* some gap from the parent stack to prevent false sharing. */
1384 void *dummy = KMP_ALLOCA(__kmp_stkpadding)__builtin_alloca (__kmp_stkpadding);
1385 /* These 2 lines below are so this does not get optimized out */
1386 if (__kmp_stkpadding > KMP_MAX_STKPADDING(2 * 1024 * 1024))
1387 __kmp_stkpadding += (short)((kmp_int64)dummy);
1388 }
1389
1390 /* initialize if needed */
1391 KMP_DEBUG_ASSERT(if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 1392); }
3
Assuming '__kmp_init_serial' is not equal to 0
4
Taking false branch
1392 __kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 1392); }
; // AC: potentially unsafe, not in sync with shutdown
1393 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
5
Assuming '__kmp_init_parallel' is not equal to 0
6
Taking false branch
1394 __kmp_parallel_initialize();
1395 __kmp_resume_if_soft_paused();
1396
1397 /* setup current data */
1398 master_th = __kmp_threads[gtid]; // AC: potentially unsafe, not in sync with
1399 // shutdown
1400 parent_team = master_th->th.th_team;
1401 master_tid = master_th->th.th_info.ds.ds_tid;
1402 master_this_cons = master_th->th.th_local.this_construct;
1403 root = master_th->th.th_root;
1404 master_active = root->r.r_active;
1405 master_set_numthreads = master_th->th.th_set_nproc;
1406
1407#if OMPT_SUPPORT1
1408 ompt_data_t ompt_parallel_data = ompt_data_none{0};
1409 ompt_data_t *parent_task_data;
7
'parent_task_data' declared without an initial value
1410 ompt_frame_t *ompt_frame;
1411 ompt_data_t *implicit_task_data;
1412 void *return_address = NULL__null;
1413
1414 if (ompt_enabled.enabled) {
8
Assuming field 'enabled' is 0
9
Taking false branch
1415 __ompt_get_task_info_internal(0, NULL__null, &parent_task_data, &ompt_frame,
1416 NULL__null, NULL__null);
1417 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
1418 }
1419#endif
1420
1421 // Assign affinity to root thread if it hasn't happened yet
1422 __kmp_assign_root_init_mask();
10
Calling '__kmp_assign_root_init_mask'
14
Returning from '__kmp_assign_root_init_mask'
1423
1424 // Nested level will be an index in the nested nthreads array
1425 level = parent_team->t.t_level;
1426 // used to launch non-serial teams even if nested is not allowed
1427 active_level = parent_team->t.t_active_level;
1428 // needed to check nesting inside the teams
1429 teams_level = master_th->th.th_teams_level;
1430#if KMP_NESTED_HOT_TEAMS1
1431 p_hot_teams = &master_th->th.th_hot_teams;
1432 if (*p_hot_teams == NULL__null && __kmp_hot_teams_max_level > 0) {
15
Assuming the condition is false
1433 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(___kmp_allocate((sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level
), "openmp/runtime/src/kmp_runtime.cpp", 1434)
1434 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level)___kmp_allocate((sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level
), "openmp/runtime/src/kmp_runtime.cpp", 1434)
;
1435 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1436 // it is either actual or not needed (when active_level > 0)
1437 (*p_hot_teams)[0].hot_team_nth = 1;
1438 }
1439#endif
1440
1441#if OMPT_SUPPORT1
1442 if (ompt_enabled.enabled) {
16
Assuming field 'enabled' is not equal to 0
17
Taking true branch
1443 if (ompt_enabled.ompt_callback_parallel_begin) {
18
Assuming field 'ompt_callback_parallel_begin' is not equal to 0
1444 int team_size = master_set_numthreads
19
Taking true branch
20
Assuming 'master_set_numthreads' is not equal to 0
21
'?' condition is true
1445 ? master_set_numthreads
1446 : get__nproc_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.nproc)
;
1447 int flags = OMPT_INVOKER(call_context)((call_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
|
22
Assuming 'call_context' is not equal to fork_context_gnu
23
'?' condition is false
1448 ((microtask == (microtask_t)__kmp_teams_master)
24
Assuming 'microtask' is not equal to __kmp_teams_master
25
'?' condition is false
1449 ? ompt_parallel_league
1450 : ompt_parallel_team);
1451 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)ompt_callback_parallel_begin_callback(
26
1st function call argument is an uninitialized value
1452 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
1453 return_address);
1454 }
1455 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1456 }
1457#endif
1458
1459 master_th->th.th_ident = loc;
1460
1461 if (master_th->th.th_teams_microtask && ap &&
1462 microtask != (microtask_t)__kmp_teams_master && level == teams_level) {
1463 // AC: This is start of parallel that is nested inside teams construct.
1464 // The team is actual (hot), all workers are ready at the fork barrier.
1465 // No lock needed to initialize the team a bit, then free workers.
1466 parent_team->t.t_ident = loc;
1467 __kmp_alloc_argv_entries(argc, parent_team, TRUE(!0));
1468 parent_team->t.t_argc = argc;
1469 argv = (void **)parent_team->t.t_argv;
1470 for (i = argc - 1; i >= 0; --i)
1471 *argv++ = va_arg(kmp_va_deref(ap), void *)__builtin_va_arg((*(ap)), void *);
1472 // Increment our nested depth levels, but not increase the serialization
1473 if (parent_team == master_th->th.th_serial_team) {
1474 // AC: we are in serialized parallel
1475 __kmpc_serialized_parallel(loc, gtid);
1476 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1)if (!(parent_team->t.t_serialized > 1)) { __kmp_debug_assert
("parent_team->t.t_serialized > 1", "openmp/runtime/src/kmp_runtime.cpp"
, 1476); }
;
1477
1478 if (call_context == fork_context_gnu) {
1479 // AC: need to decrement t_serialized for enquiry functions to work
1480 // correctly, will restore at join time
1481 parent_team->t.t_serialized--;
1482 return TRUE(!0);
1483 }
1484
1485#if OMPD_SUPPORT1
1486 parent_team->t.t_pkfn = microtask;
1487#endif
1488
1489#if OMPT_SUPPORT1
1490 void *dummy;
1491 void **exit_frame_p;
1492
1493 ompt_lw_taskteam_t lw_taskteam;
1494
1495 if (ompt_enabled.enabled) {
1496 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1497 &ompt_parallel_data, return_address);
1498 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1499
1500 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1501 // don't use lw_taskteam after linking. content was swaped
1502
1503 /* OMPT implicit task begin */
1504 implicit_task_data = OMPT_CUR_TASK_DATA(master_th)(&(master_th->th.th_current_task->ompt_task_info.task_data
))
;
1505 if (ompt_enabled.ompt_callback_implicit_task) {
1506 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num =
1507 __kmp_tid_from_gtid(gtid);
1508 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1509 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
,
1510 implicit_task_data, 1,
1511 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, ompt_task_implicit);
1512 }
1513
1514 /* OMPT state */
1515 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1516 } else {
1517 exit_frame_p = &dummy;
1518 }
1519#endif
1520 // AC: need to decrement t_serialized for enquiry functions to work
1521 // correctly, will restore at join time
1522 parent_team->t.t_serialized--;
1523
1524 {
1525 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel)((void)0);
1526 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK)((void)0);
1527 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1528#if OMPT_SUPPORT1
1529 ,
1530 exit_frame_p
1531#endif
1532 );
1533 }
1534
1535#if OMPT_SUPPORT1
1536 if (ompt_enabled.enabled) {
1537 *exit_frame_p = NULL__null;
1538 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->frame.exit_frame = ompt_data_none{0};
1539 if (ompt_enabled.ompt_callback_implicit_task) {
1540 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1541 ompt_scope_end, NULL__null, implicit_task_data, 1,
1542 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, ompt_task_implicit);
1543 }
1544 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
;
1545 __ompt_lw_taskteam_unlink(master_th);
1546 if (ompt_enabled.ompt_callback_parallel_end) {
1547 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)ompt_callback_parallel_end_callback(
1548 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th)(&(master_th->th.th_current_task->ompt_task_info.task_data
))
,
1549 OMPT_INVOKER(call_context)((call_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
| ompt_parallel_team,
1550 return_address);
1551 }
1552 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1553 }
1554#endif
1555 return TRUE(!0);
1556 }
1557
1558 parent_team->t.t_pkfn = microtask;
1559 parent_team->t.t_invoke = invoker;
1560 KMP_ATOMIC_INC(&root->r.r_in_parallel)(&root->r.r_in_parallel)->fetch_add(1, std::memory_order_acq_rel
)
;
1561 parent_team->t.t_active_level++;
1562 parent_team->t.t_level++;
1563 parent_team->t.t_def_allocator = master_th->th.th_def_allocator; // save
1564
1565#if OMPT_SUPPORT1
1566 if (ompt_enabled.enabled) {
1567 ompt_lw_taskteam_t lw_taskteam;
1568 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1569 &ompt_parallel_data, return_address);
1570 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1, true);
1571 }
1572#endif
1573
1574 /* Change number of threads in the team if requested */
1575 if (master_set_numthreads) { // The parallel has num_threads clause
1576 if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1577 // AC: only can reduce number of threads dynamically, can't increase
1578 kmp_info_t **other_threads = parent_team->t.t_threads;
1579 // NOTE: if using distributed barrier, we need to run this code block
1580 // even when the team size appears not to have changed from the max.
1581 int old_proc = master_th->th.th_teams_size.nth;
1582 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
1583 bp_dist_bar) {
1584 __kmp_resize_dist_barrier(parent_team, old_proc,
1585 master_set_numthreads);
1586 __kmp_add_threads_to_team(parent_team, master_set_numthreads);
1587 }
1588 parent_team->t.t_nproc = master_set_numthreads;
1589 for (i = 0; i < master_set_numthreads; ++i) {
1590 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1591 }
1592 }
1593 // Keep extra threads hot in the team for possible next parallels
1594 master_th->th.th_set_nproc = 0;
1595 }
1596
1597#if USE_DEBUGGER0
1598 if (__kmp_debugging) { // Let debugger override number of threads.
1599 int nth = __kmp_omp_num_threads(loc);
1600 if (nth > 0) { // 0 means debugger doesn't want to change num threads
1601 master_set_numthreads = nth;
1602 }
1603 }
1604#endif
1605
1606 // Figure out the proc_bind policy for the nested parallel within teams
1607 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1608 // proc_bind_default means don't update
1609 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1610 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1611 proc_bind = proc_bind_false;
1612 } else {
1613 // No proc_bind clause specified; use current proc-bind-var
1614 if (proc_bind == proc_bind_default) {
1615 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1616 }
1617 /* else: The proc_bind policy was specified explicitly on parallel
1618 clause.
1619 This overrides proc-bind-var for this parallel region, but does not
1620 change proc-bind-var. */
1621 // Figure the value of proc-bind-var for the child threads.
1622 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1623 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1624 master_th->th.th_current_task->td_icvs.proc_bind)) {
1625 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1626 }
1627 }
1628 KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind)if ((parent_team->t.t_proc_bind) != (proc_bind)) (parent_team
->t.t_proc_bind) = (proc_bind)
;
1629 // Need to change the bind-var ICV to correct value for each implicit task
1630 if (proc_bind_icv != proc_bind_default &&
1631 master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1632 kmp_info_t **other_threads = parent_team->t.t_threads;
1633 for (i = 0; i < master_th->th.th_team_nproc; ++i) {
1634 other_threads[i]->th.th_current_task->td_icvs.proc_bind =
1635 proc_bind_icv;
1636 }
1637 }
1638 // Reset for next parallel region
1639 master_th->th.th_set_proc_bind = proc_bind_default;
1640
1641#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
1642 if (((__itt_frame_submit_v3_ptr__kmp_itt_frame_submit_v3_ptr__3_0 && __itt_get_timestamp_ptr__kmp_itt_get_timestamp_ptr__3_0) ||
1643 KMP_ITT_DEBUG0) &&
1644 __kmp_forkjoin_frames_mode == 3 &&
1645 parent_team->t.t_active_level == 1 // only report frames at level 1
1646 && master_th->th.th_teams_size.nteams == 1) {
1647 kmp_uint64 tmp_time = __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0();
1648 master_th->th.th_frame_time = tmp_time;
1649 parent_team->t.t_region_time = tmp_time;
1650 }
1651 if (__itt_stack_caller_create_ptr__kmp_itt_stack_caller_create_ptr__3_0) {
1652 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL)if (!(parent_team->t.t_stack_id == __null)) { __kmp_debug_assert
("parent_team->t.t_stack_id == __null", "openmp/runtime/src/kmp_runtime.cpp"
, 1652); }
;
1653 // create new stack stitching id before entering fork barrier
1654 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1655 }
1656#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1657#if KMP_AFFINITY_SUPPORTED1
1658 __kmp_partition_places(parent_team);
1659#endif
1660
1661 KF_TRACE(10, ("__kmp_fork_call: before internal fork: root=%p, team=%p, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: before internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid
); }
1662 "master_th=%p, gtid=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: before internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid
); }
1663 root, parent_team, master_th, gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: before internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid
); }
;
1664 __kmp_internal_fork(loc, gtid, parent_team);
1665 KF_TRACE(10, ("__kmp_fork_call: after internal fork: root=%p, team=%p, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: after internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid
); }
1666 "master_th=%p, gtid=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: after internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid
); }
1667 root, parent_team, master_th, gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: after internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid
); }
;
1668
1669 if (call_context == fork_context_gnu)
1670 return TRUE(!0);
1671
1672 /* Invoke microtask for PRIMARY thread */
1673 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n"
, gtid, parent_team->t.t_id, parent_team->t.t_pkfn); }
1674 parent_team->t.t_id, parent_team->t.t_pkfn))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n"
, gtid, parent_team->t.t_id, parent_team->t.t_pkfn); }
;
1675
1676 if (!parent_team->t.t_invoke(gtid)) {
1677 KMP_ASSERT2(0, "cannot invoke microtask for PRIMARY thread")if (!(0)) { __kmp_debug_assert(("cannot invoke microtask for PRIMARY thread"
), "openmp/runtime/src/kmp_runtime.cpp", 1677); }
;
1678 }
1679 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n"
, gtid, parent_team->t.t_id, parent_team->t.t_pkfn); }
1680 parent_team->t.t_id, parent_team->t.t_pkfn))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n"
, gtid, parent_team->t.t_id, parent_team->t.t_pkfn); }
;
1681 KMP_MB(); /* Flush all pending memory write invalidates. */
1682
1683 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: parallel exit T#%d\n"
, gtid); }
;
1684
1685 return TRUE(!0);
1686 } // Parallel closely nested in teams construct
1687
1688#if KMP_DEBUG1
1689 if (__kmp_tasking_mode != tskm_immediate_exec) {
1690 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==if (!(master_th->th.th_task_team == parent_team->t.t_task_team
[master_th->th.th_task_state])) { __kmp_debug_assert("master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 1691); }
1691 parent_team->t.t_task_team[master_th->th.th_task_state])if (!(master_th->th.th_task_team == parent_team->t.t_task_team
[master_th->th.th_task_state])) { __kmp_debug_assert("master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 1691); }
;
1692 }
1693#endif
1694
1695 // Need this to happen before we determine the number of threads, not while
1696 // we are allocating the team
1697 //__kmp_push_current_task_to_thread(master_th, parent_team, 0);
1698 int enter_teams = 0;
1699 if (parent_team->t.t_active_level >=
1700 master_th->th.th_current_task->td_icvs.max_active_levels) {
1701 nthreads = 1;
1702 } else {
1703 enter_teams = ((ap == NULL__null && active_level == 0) ||
1704 (ap && teams_level > 0 && teams_level == level));
1705 nthreads = master_set_numthreads
1706 ? master_set_numthreads
1707 // TODO: get nproc directly from current task
1708 : get__nproc_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.nproc)
;
1709 // Check if we need to take forkjoin lock? (no need for serialized
1710 // parallel out of teams construct). This code moved here from
1711 // __kmp_reserve_threads() to speedup nested serialized parallels.
1712 if (nthreads > 1) {
1713 if ((get__max_active_levels(master_th)((master_th)->th.th_current_task->td_icvs.max_active_levels
)
== 1 &&
1714 (root->r.r_in_parallel && !enter_teams)) ||
1715 (__kmp_library == library_serial)) {
1716 KC_TRACE(10, ("__kmp_fork_call: T#%d serializing team; requested %d"if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: T#%d serializing team; requested %d"
" threads\n", gtid, nthreads); }
1717 " threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: T#%d serializing team; requested %d"
" threads\n", gtid, nthreads); }
1718 gtid, nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: T#%d serializing team; requested %d"
" threads\n", gtid, nthreads); }
;
1719 nthreads = 1;
1720 }
1721 }
1722 if (nthreads > 1) {
1723 /* determine how many new threads we can use */
1724 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1725 /* AC: If we execute teams from parallel region (on host), then teams
1726 should be created but each can only have 1 thread if nesting is
1727 disabled. If teams called from serial region, then teams and their
1728 threads should be created regardless of the nesting setting. */
1729 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
1730 nthreads, enter_teams);
1731 if (nthreads == 1) {
1732 // Free lock for single thread execution here; for multi-thread
1733 // execution it will be freed later after team of threads created
1734 // and initialized
1735 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1736 }
1737 }
1738 }
1739 KMP_DEBUG_ASSERT(nthreads > 0)if (!(nthreads > 0)) { __kmp_debug_assert("nthreads > 0"
, "openmp/runtime/src/kmp_runtime.cpp", 1739); }
;
1740
1741 // If we temporarily changed the set number of threads then restore it now
1742 master_th->th.th_set_nproc = 0;
1743
1744 /* create a serialized parallel region? */
1745 if (nthreads == 1) {
1746/* josh todo: hypothetical question: what do we do for OS X*? */
1747#if KMP_OS_LINUX1 && \
1748 (KMP_ARCH_X860 || KMP_ARCH_X86_641 || KMP_ARCH_ARM || KMP_ARCH_AARCH640)
1749 void *args[argc];
1750#else
1751 void **args = (void **)KMP_ALLOCA(argc * sizeof(void *))__builtin_alloca (argc * sizeof(void *));
1752#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || \
1753 KMP_ARCH_AARCH64) */
1754
1755 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d serializing parallel region\n"
, gtid); }
1756 ("__kmp_fork_call: T#%d serializing parallel region\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d serializing parallel region\n"
, gtid); }
;
1757
1758 __kmpc_serialized_parallel(loc, gtid);
1759
1760#if OMPD_SUPPORT1
1761 master_th->th.th_serial_team->t.t_pkfn = microtask;
1762#endif
1763
1764 if (call_context == fork_context_intel) {
1765 /* TODO this sucks, use the compiler itself to pass args! :) */
1766 master_th->th.th_serial_team->t.t_ident = loc;
1767 if (!ap) {
1768 // revert change made in __kmpc_serialized_parallel()
1769 master_th->th.th_serial_team->t.t_level--;
1770 // Get args from parent team for teams construct
1771
1772#if OMPT_SUPPORT1
1773 void *dummy;
1774 void **exit_frame_p;
1775 ompt_task_info_t *task_info;
1776
1777 ompt_lw_taskteam_t lw_taskteam;
1778
1779 if (ompt_enabled.enabled) {
1780 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1781 &ompt_parallel_data, return_address);
1782
1783 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1784 // don't use lw_taskteam after linking. content was swaped
1785
1786 task_info = OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info));
1787 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1788 if (ompt_enabled.ompt_callback_implicit_task) {
1789 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num =
1790 __kmp_tid_from_gtid(gtid);
1791 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1792 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
,
1793 &(task_info->task_data), 1,
1794 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num,
1795 ompt_task_implicit);
1796 }
1797
1798 /* OMPT state */
1799 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1800 } else {
1801 exit_frame_p = &dummy;
1802 }
1803#endif
1804
1805 {
1806 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel)((void)0);
1807 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK)((void)0);
1808 __kmp_invoke_microtask(microtask, gtid, 0, argc,
1809 parent_team->t.t_argv
1810#if OMPT_SUPPORT1
1811 ,
1812 exit_frame_p
1813#endif
1814 );
1815 }
1816
1817#if OMPT_SUPPORT1
1818 if (ompt_enabled.enabled) {
1819 *exit_frame_p = NULL__null;
1820 if (ompt_enabled.ompt_callback_implicit_task) {
1821 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1822 ompt_scope_end, NULL__null, &(task_info->task_data), 1,
1823 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num,
1824 ompt_task_implicit);
1825 }
1826 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
;
1827 __ompt_lw_taskteam_unlink(master_th);
1828 if (ompt_enabled.ompt_callback_parallel_end) {
1829 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)ompt_callback_parallel_end_callback(
1830 &ompt_parallel_data, parent_task_data,
1831 OMPT_INVOKER(call_context)((call_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
| ompt_parallel_team,
1832 return_address);
1833 }
1834 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1835 }
1836#endif
1837 } else if (microtask == (microtask_t)__kmp_teams_master) {
1838 KMP_DEBUG_ASSERT(master_th->th.th_team ==if (!(master_th->th.th_team == master_th->th.th_serial_team
)) { __kmp_debug_assert("master_th->th.th_team == master_th->th.th_serial_team"
, "openmp/runtime/src/kmp_runtime.cpp", 1839); }
1839 master_th->th.th_serial_team)if (!(master_th->th.th_team == master_th->th.th_serial_team
)) { __kmp_debug_assert("master_th->th.th_team == master_th->th.th_serial_team"
, "openmp/runtime/src/kmp_runtime.cpp", 1839); }
;
1840 team = master_th->th.th_team;
1841 // team->t.t_pkfn = microtask;
1842 team->t.t_invoke = invoker;
1843 __kmp_alloc_argv_entries(argc, team, TRUE(!0));
1844 team->t.t_argc = argc;
1845 argv = (void **)team->t.t_argv;
1846 if (ap) {
1847 for (i = argc - 1; i >= 0; --i)
1848 *argv++ = va_arg(kmp_va_deref(ap), void *)__builtin_va_arg((*(ap)), void *);
1849 } else {
1850 for (i = 0; i < argc; ++i)
1851 // Get args from parent team for teams construct
1852 argv[i] = parent_team->t.t_argv[i];
1853 }
1854 // AC: revert change made in __kmpc_serialized_parallel()
1855 // because initial code in teams should have level=0
1856 team->t.t_level--;
1857 // AC: call special invoker for outer "parallel" of teams construct
1858 invoker(gtid);
1859#if OMPT_SUPPORT1
1860 if (ompt_enabled.enabled) {
1861 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info));
1862 if (ompt_enabled.ompt_callback_implicit_task) {
1863 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1864 ompt_scope_end, NULL__null, &(task_info->task_data), 0,
1865 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, ompt_task_initial);
1866 }
1867 if (ompt_enabled.ompt_callback_parallel_end) {
1868 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)ompt_callback_parallel_end_callback(
1869 &ompt_parallel_data, parent_task_data,
1870 OMPT_INVOKER(call_context)((call_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
| ompt_parallel_league,
1871 return_address);
1872 }
1873 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1874 }
1875#endif
1876 } else {
1877 argv = args;
1878 for (i = argc - 1; i >= 0; --i)
1879 *argv++ = va_arg(kmp_va_deref(ap), void *)__builtin_va_arg((*(ap)), void *);
1880 KMP_MB();
1881
1882#if OMPT_SUPPORT1
1883 void *dummy;
1884 void **exit_frame_p;
1885 ompt_task_info_t *task_info;
1886
1887 ompt_lw_taskteam_t lw_taskteam;
1888
1889 if (ompt_enabled.enabled) {
1890 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1891 &ompt_parallel_data, return_address);
1892 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1893 // don't use lw_taskteam after linking. content was swaped
1894 task_info = OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info));
1895 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1896
1897 /* OMPT implicit task begin */
1898 implicit_task_data = OMPT_CUR_TASK_DATA(master_th)(&(master_th->th.th_current_task->ompt_task_info.task_data
))
;
1899 if (ompt_enabled.ompt_callback_implicit_task) {
1900 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1901 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
,
1902 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1903 ompt_task_implicit);
1904 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num =
1905 __kmp_tid_from_gtid(gtid);
1906 }
1907
1908 /* OMPT state */
1909 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1910 } else {
1911 exit_frame_p = &dummy;
1912 }
1913#endif
1914
1915 {
1916 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel)((void)0);
1917 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK)((void)0);
1918 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1919#if OMPT_SUPPORT1
1920 ,
1921 exit_frame_p
1922#endif
1923 );
1924 }
1925
1926#if OMPT_SUPPORT1
1927 if (ompt_enabled.enabled) {
1928 *exit_frame_p = NULL__null;
1929 if (ompt_enabled.ompt_callback_implicit_task) {
1930 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1931 ompt_scope_end, NULL__null, &(task_info->task_data), 1,
1932 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num,
1933 ompt_task_implicit);
1934 }
1935
1936 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
;
1937 __ompt_lw_taskteam_unlink(master_th);
1938 if (ompt_enabled.ompt_callback_parallel_end) {
1939 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)ompt_callback_parallel_end_callback(
1940 &ompt_parallel_data, parent_task_data,
1941 OMPT_INVOKER(call_context)((call_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
| ompt_parallel_team,
1942 return_address);
1943 }
1944 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1945 }
1946#endif
1947 }
1948 } else if (call_context == fork_context_gnu) {
1949#if OMPT_SUPPORT1
1950 if (ompt_enabled.enabled) {
1951 ompt_lw_taskteam_t lwt;
1952 __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data,
1953 return_address);
1954
1955 lwt.ompt_task_info.frame.exit_frame = ompt_data_none{0};
1956 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1957 }
1958// don't use lw_taskteam after linking. content was swaped
1959#endif
1960
1961 // we were called from GNU native code
1962 KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d serial exit\n"
, gtid); }
;
1963 return FALSE0;
1964 } else {
1965 KMP_ASSERT2(call_context < fork_context_last,if (!(call_context < fork_context_last)) { __kmp_debug_assert
(("__kmp_fork_call: unknown fork_context parameter"), "openmp/runtime/src/kmp_runtime.cpp"
, 1966); }
1966 "__kmp_fork_call: unknown fork_context parameter")if (!(call_context < fork_context_last)) { __kmp_debug_assert
(("__kmp_fork_call: unknown fork_context parameter"), "openmp/runtime/src/kmp_runtime.cpp"
, 1966); }
;
1967 }
1968
1969 KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d serial exit\n"
, gtid); }
;
1970 KMP_MB();
1971 return FALSE0;
1972 } // if (nthreads == 1)
1973
1974 // GEH: only modify the executing flag in the case when not serialized
1975 // serialized case is handled in kmpc_serialized_parallel
1976 KF_TRACE(10, ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
"curtask=%p, curtask_max_aclevel=%d\n", parent_team->t.t_active_level
, master_th, master_th->th.th_current_task, master_th->
th.th_current_task->td_icvs.max_active_levels); }
1977 "curtask=%p, curtask_max_aclevel=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
"curtask=%p, curtask_max_aclevel=%d\n", parent_team->t.t_active_level
, master_th, master_th->th.th_current_task, master_th->
th.th_current_task->td_icvs.max_active_levels); }
1978 parent_team->t.t_active_level, master_th,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
"curtask=%p, curtask_max_aclevel=%d\n", parent_team->t.t_active_level
, master_th, master_th->th.th_current_task, master_th->
th.th_current_task->td_icvs.max_active_levels); }
1979 master_th->th.th_current_task,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
"curtask=%p, curtask_max_aclevel=%d\n", parent_team->t.t_active_level
, master_th, master_th->th.th_current_task, master_th->
th.th_current_task->td_icvs.max_active_levels); }
1980 master_th->th.th_current_task->td_icvs.max_active_levels))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
"curtask=%p, curtask_max_aclevel=%d\n", parent_team->t.t_active_level
, master_th, master_th->th.th_current_task, master_th->
th.th_current_task->td_icvs.max_active_levels); }
;
1981 // TODO: GEH - cannot do this assertion because root thread not set up as
1982 // executing
1983 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );
1984 master_th->th.th_current_task->td_flags.executing = 0;
1985
1986 if (!master_th->th.th_teams_microtask || level > teams_level) {
1987 /* Increment our nested depth level */
1988 KMP_ATOMIC_INC(&root->r.r_in_parallel)(&root->r.r_in_parallel)->fetch_add(1, std::memory_order_acq_rel
)
;
1989 }
1990
1991 // See if we need to make a copy of the ICVs.
1992 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
1993 if ((level + 1 < __kmp_nested_nth.used) &&
1994 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
1995 nthreads_icv = __kmp_nested_nth.nth[level + 1];
1996 } else {
1997 nthreads_icv = 0; // don't update
1998 }
1999
2000 // Figure out the proc_bind_policy for the new team.
2001 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2002 // proc_bind_default means don't update
2003 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
2004 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2005 proc_bind = proc_bind_false;
2006 } else {
2007 // No proc_bind clause specified; use current proc-bind-var for this
2008 // parallel region
2009 if (proc_bind == proc_bind_default) {
2010 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2011 }
2012 // Have teams construct take proc_bind value from KMP_TEAMS_PROC_BIND
2013 if (master_th->th.th_teams_microtask &&
2014 microtask == (microtask_t)__kmp_teams_master) {
2015 proc_bind = __kmp_teams_proc_bind;
2016 }
2017 /* else: The proc_bind policy was specified explicitly on parallel clause.
2018 This overrides proc-bind-var for this parallel region, but does not
2019 change proc-bind-var. */
2020 // Figure the value of proc-bind-var for the child threads.
2021 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2022 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2023 master_th->th.th_current_task->td_icvs.proc_bind)) {
2024 // Do not modify the proc bind icv for the two teams construct forks
2025 // They just let the proc bind icv pass through
2026 if (!master_th->th.th_teams_microtask ||
2027 !(microtask == (microtask_t)__kmp_teams_master || ap == NULL__null))
2028 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2029 }
2030 }
2031
2032 // Reset for next parallel region
2033 master_th->th.th_set_proc_bind = proc_bind_default;
2034
2035 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
2036 kmp_internal_control_t new_icvs;
2037 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2038 new_icvs.next = NULL__null;
2039 if (nthreads_icv > 0) {
2040 new_icvs.nproc = nthreads_icv;
2041 }
2042 if (proc_bind_icv != proc_bind_default) {
2043 new_icvs.proc_bind = proc_bind_icv;
2044 }
2045
2046 /* allocate a new parallel team */
2047 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: before __kmp_allocate_team\n"
); }
;
2048 team = __kmp_allocate_team(root, nthreads, nthreads,
2049#if OMPT_SUPPORT1
2050 ompt_parallel_data,
2051#endif
2052 proc_bind, &new_icvs,
2053 argc USE_NESTED_HOT_ARG(master_th), master_th);
2054 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2055 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
2056 } else {
2057 /* allocate a new parallel team */
2058 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: before __kmp_allocate_team\n"
); }
;
2059 team = __kmp_allocate_team(root, nthreads, nthreads,
2060#if OMPT_SUPPORT1
2061 ompt_parallel_data,
2062#endif
2063 proc_bind,
2064 &master_th->th.th_current_task->td_icvs,
2065 argc USE_NESTED_HOT_ARG(master_th), master_th);
2066 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2067 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
2068 &master_th->th.th_current_task->td_icvs);
2069 }
2070 KF_TRACE(if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n"
, team); }
2071 10, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n"
, team); }
;
2072
2073 /* setup the new team */
2074 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid)if ((team->t.t_master_tid) != (master_tid)) (team->t.t_master_tid
) = (master_tid)
;
2075 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons)if ((team->t.t_master_this_cons) != (master_this_cons)) (team
->t.t_master_this_cons) = (master_this_cons)
;
2076 KMP_CHECK_UPDATE(team->t.t_ident, loc)if ((team->t.t_ident) != (loc)) (team->t.t_ident) = (loc
)
;
2077 KMP_CHECK_UPDATE(team->t.t_parent, parent_team)if ((team->t.t_parent) != (parent_team)) (team->t.t_parent
) = (parent_team)
;
2078 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask)if ((team->t.t_pkfn) != (microtask)) (((team->t.t_pkfn)
)) = (((microtask)))
;
2079#if OMPT_SUPPORT1
2080 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,if ((team->t.ompt_team_info.master_return_address) != (return_address
)) (((team->t.ompt_team_info.master_return_address))) = ((
(return_address)))
2081 return_address)if ((team->t.ompt_team_info.master_return_address) != (return_address
)) (((team->t.ompt_team_info.master_return_address))) = ((
(return_address)))
;
2082#endif
2083 KMP_CHECK_UPDATE(team->t.t_invoke, invoker)if ((team->t.t_invoke) != (invoker)) (team->t.t_invoke)
= (invoker)
; // TODO move to root, maybe
2084 // TODO: parent_team->t.t_level == INT_MAX ???
2085 if (!master_th->th.th_teams_microtask || level > teams_level) {
2086 int new_level = parent_team->t.t_level + 1;
2087 KMP_CHECK_UPDATE(team->t.t_level, new_level)if ((team->t.t_level) != (new_level)) (team->t.t_level)
= (new_level)
;
2088 new_level = parent_team->t.t_active_level + 1;
2089 KMP_CHECK_UPDATE(team->t.t_active_level, new_level)if ((team->t.t_active_level) != (new_level)) (team->t.t_active_level
) = (new_level)
;
2090 } else {
2091 // AC: Do not increase parallel level at start of the teams construct
2092 int new_level = parent_team->t.t_level;
2093 KMP_CHECK_UPDATE(team->t.t_level, new_level)if ((team->t.t_level) != (new_level)) (team->t.t_level)
= (new_level)
;
2094 new_level = parent_team->t.t_active_level;
2095 KMP_CHECK_UPDATE(team->t.t_active_level, new_level)if ((team->t.t_active_level) != (new_level)) (team->t.t_active_level
) = (new_level)
;
2096 }
2097 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.sched)
;
2098 // set primary thread's schedule as new run-time schedule
2099 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched)if ((team->t.t_sched.sched) != (new_sched.sched)) (team->
t.t_sched.sched) = (new_sched.sched)
;
2100
2101 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq)if ((team->t.t_cancel_request) != (cancel_noreq)) (team->
t.t_cancel_request) = (cancel_noreq)
;
2102 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator)if ((team->t.t_def_allocator) != (master_th->th.th_def_allocator
)) (team->t.t_def_allocator) = (master_th->th.th_def_allocator
)
;
2103
2104 // Update the floating point rounding in the team if required.
2105 propagateFPControl(team);
2106#if OMPD_SUPPORT1
2107 if (ompd_state & OMPD_ENABLE_BP0x1)
2108 ompd_bp_parallel_begin();
2109#endif
2110
2111 if (__kmp_tasking_mode != tskm_immediate_exec) {
2112 // Set primary thread's task team to team's task team. Unless this is hot
2113 // team, it should be NULL.
2114 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==if (!(master_th->th.th_task_team == parent_team->t.t_task_team
[master_th->th.th_task_state])) { __kmp_debug_assert("master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 2115); }
2115 parent_team->t.t_task_team[master_th->th.th_task_state])if (!(master_th->th.th_task_team == parent_team->t.t_task_team
[master_th->th.th_task_state])) { __kmp_debug_assert("master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 2115); }
;
2116 KA_TRACE(20, ("__kmp_fork_call: Primary T#%d pushing task_team %p / team "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: Primary T#%d pushing task_team %p / team "
"%p, new task_team %p / team %p\n", __kmp_gtid_from_thread(master_th
), master_th->th.th_task_team, parent_team, team->t.t_task_team
[master_th->th.th_task_state], team); }
2117 "%p, new task_team %p / team %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: Primary T#%d pushing task_team %p / team "
"%p, new task_team %p / team %p\n", __kmp_gtid_from_thread(master_th
), master_th->th.th_task_team, parent_team, team->t.t_task_team
[master_th->th.th_task_state], team); }
2118 __kmp_gtid_from_thread(master_th),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: Primary T#%d pushing task_team %p / team "
"%p, new task_team %p / team %p\n", __kmp_gtid_from_thread(master_th
), master_th->th.th_task_team, parent_team, team->t.t_task_team
[master_th->th.th_task_state], team); }
2119 master_th->th.th_task_team, parent_team,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: Primary T#%d pushing task_team %p / team "
"%p, new task_team %p / team %p\n", __kmp_gtid_from_thread(master_th
), master_th->th.th_task_team, parent_team, team->t.t_task_team
[master_th->th.th_task_state], team); }
2120 team->t.t_task_team[master_th->th.th_task_state], team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: Primary T#%d pushing task_team %p / team "
"%p, new task_team %p / team %p\n", __kmp_gtid_from_thread(master_th
), master_th->th.th_task_team, parent_team, team->t.t_task_team
[master_th->th.th_task_state], team); }
;
2121
2122 if (active_level || master_th->th.th_task_team) {
2123 // Take a memo of primary thread's task_state
2124 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack)if (!(master_th->th.th_task_state_memo_stack)) { __kmp_debug_assert
("master_th->th.th_task_state_memo_stack", "openmp/runtime/src/kmp_runtime.cpp"
, 2124); }
;
2125 if (master_th->th.th_task_state_top >=
2126 master_th->th.th_task_state_stack_sz) { // increase size
2127 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2128 kmp_uint8 *old_stack, *new_stack;
2129 kmp_uint32 i;
2130 new_stack = (kmp_uint8 *)__kmp_allocate(new_size)___kmp_allocate((new_size), "openmp/runtime/src/kmp_runtime.cpp"
, 2130)
;
2131 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2132 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2133 }
2134 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2135 ++i) { // zero-init rest of stack
2136 new_stack[i] = 0;
2137 }
2138 old_stack = master_th->th.th_task_state_memo_stack;
2139 master_th->th.th_task_state_memo_stack = new_stack;
2140 master_th->th.th_task_state_stack_sz = new_size;
2141 __kmp_free(old_stack)___kmp_free((old_stack), "openmp/runtime/src/kmp_runtime.cpp"
, 2141)
;
2142 }
2143 // Store primary thread's task_state on stack
2144 master_th->th
2145 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2146 master_th->th.th_task_state;
2147 master_th->th.th_task_state_top++;
2148#if KMP_NESTED_HOT_TEAMS1
2149 if (master_th->th.th_hot_teams &&
2150 active_level < __kmp_hot_teams_max_level &&
2151 team == master_th->th.th_hot_teams[active_level].hot_team) {
2152 // Restore primary thread's nested state if nested hot team
2153 master_th->th.th_task_state =
2154 master_th->th
2155 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2156 } else {
2157#endif
2158 master_th->th.th_task_state = 0;
2159#if KMP_NESTED_HOT_TEAMS1
2160 }
2161#endif
2162 }
2163#if !KMP_NESTED_HOT_TEAMS1
2164 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||if (!((master_th->th.th_task_team == __null) || (team == root
->r.r_hot_team))) { __kmp_debug_assert("(master_th->th.th_task_team == __null) || (team == root->r.r_hot_team)"
, "openmp/runtime/src/kmp_runtime.cpp", 2165); }
2165 (team == root->r.r_hot_team))if (!((master_th->th.th_task_team == __null) || (team == root
->r.r_hot_team))) { __kmp_debug_assert("(master_th->th.th_task_team == __null) || (team == root->r.r_hot_team)"
, "openmp/runtime/src/kmp_runtime.cpp", 2165); }
;
2166#endif
2167 }
2168
2169 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n"
, gtid, parent_team->t.t_id, team->t.t_master_tid, team
->t.t_id, team->t.t_nproc); }
2170 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n"
, gtid, parent_team->t.t_id, team->t.t_master_tid, team
->t.t_id, team->t.t_nproc); }
2171 ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n"
, gtid, parent_team->t.t_id, team->t.t_master_tid, team
->t.t_id, team->t.t_nproc); }
2172 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n"
, gtid, parent_team->t.t_id, team->t.t_master_tid, team
->t.t_id, team->t.t_nproc); }
2173 team->t.t_nproc))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n"
, gtid, parent_team->t.t_id, team->t.t_master_tid, team
->t.t_id, team->t.t_nproc); }
;
2174 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||if (!(team != root->r.r_hot_team || (team->t.t_master_tid
== 0 && (team->t.t_parent == root->r.r_root_team
|| team->t.t_parent->t.t_serialized)))) { __kmp_debug_assert
("team != root->r.r_hot_team || (team->t.t_master_tid == 0 && (team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized))"
, "openmp/runtime/src/kmp_runtime.cpp", 2177); }
2175 (team->t.t_master_tid == 0 &&if (!(team != root->r.r_hot_team || (team->t.t_master_tid
== 0 && (team->t.t_parent == root->r.r_root_team
|| team->t.t_parent->t.t_serialized)))) { __kmp_debug_assert
("team != root->r.r_hot_team || (team->t.t_master_tid == 0 && (team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized))"
, "openmp/runtime/src/kmp_runtime.cpp", 2177); }
2176 (team->t.t_parent == root->r.r_root_team ||if (!(team != root->r.r_hot_team || (team->t.t_master_tid
== 0 && (team->t.t_parent == root->r.r_root_team
|| team->t.t_parent->t.t_serialized)))) { __kmp_debug_assert
("team != root->r.r_hot_team || (team->t.t_master_tid == 0 && (team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized))"
, "openmp/runtime/src/kmp_runtime.cpp", 2177); }
2177 team->t.t_parent->t.t_serialized)))if (!(team != root->r.r_hot_team || (team->t.t_master_tid
== 0 && (team->t.t_parent == root->r.r_root_team
|| team->t.t_parent->t.t_serialized)))) { __kmp_debug_assert
("team != root->r.r_hot_team || (team->t.t_master_tid == 0 && (team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized))"
, "openmp/runtime/src/kmp_runtime.cpp", 2177); }
;
2178 KMP_MB();
2179
2180 /* now, setup the arguments */
2181 argv = (void **)team->t.t_argv;
2182 if (ap) {
2183 for (i = argc - 1; i >= 0; --i) {
2184 void *new_argv = va_arg(kmp_va_deref(ap), void *)__builtin_va_arg((*(ap)), void *);
2185 KMP_CHECK_UPDATE(*argv, new_argv)if ((*argv) != (new_argv)) (*argv) = (new_argv);
2186 argv++;
2187 }
2188 } else {
2189 for (i = 0; i < argc; ++i) {
2190 // Get args from parent team for teams construct
2191 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i])if ((argv[i]) != (team->t.t_parent->t.t_argv[i])) (argv
[i]) = (team->t.t_parent->t.t_argv[i])
;
2192 }
2193 }
2194
2195 /* now actually fork the threads */
2196 KMP_CHECK_UPDATE(team->t.t_master_active, master_active)if ((team->t.t_master_active) != (master_active)) (team->
t.t_master_active) = (master_active)
;
2197 if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
2198 root->r.r_active = TRUE(!0);
2199
2200 __kmp_fork_team_threads(root, team, master_th, gtid, !ap);
2201 __kmp_setup_icv_copy(team, nthreads,
2202 &master_th->th.th_current_task->td_icvs, loc);
2203
2204#if OMPT_SUPPORT1
2205 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2206#endif
2207
2208 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2209
2210#if USE_ITT_BUILD1
2211 if (team->t.t_active_level == 1 // only report frames at level 1
2212 && !master_th->th.th_teams_microtask) { // not in teams construct
2213#if USE_ITT_NOTIFY1
2214 if ((__itt_frame_submit_v3_ptr__kmp_itt_frame_submit_v3_ptr__3_0 || KMP_ITT_DEBUG0) &&
2215 (__kmp_forkjoin_frames_mode == 3 ||
2216 __kmp_forkjoin_frames_mode == 1)) {
2217 kmp_uint64 tmp_time = 0;
2218 if (__itt_get_timestamp_ptr__kmp_itt_get_timestamp_ptr__3_0)
2219 tmp_time = __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0();
2220 // Internal fork - report frame begin
2221 master_th->th.th_frame_time = tmp_time;
2222 if (__kmp_forkjoin_frames_mode == 3)
2223 team->t.t_region_time = tmp_time;
2224 } else
2225// only one notification scheme (either "submit" or "forking/joined", not both)
2226#endif /* USE_ITT_NOTIFY */
2227 if ((__itt_frame_begin_v3_ptr__kmp_itt_frame_begin_v3_ptr__3_0 || KMP_ITT_DEBUG0) &&
2228 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2229 // Mark start of "parallel" region for Intel(R) VTune(TM) analyzer.
2230 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2231 }
2232 }
2233#endif /* USE_ITT_BUILD */
2234
2235 /* now go on and do the work */
2236 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team)if (!(team == __kmp_threads[gtid]->th.th_team)) { __kmp_debug_assert
("team == __kmp_threads[gtid]->th.th_team", "openmp/runtime/src/kmp_runtime.cpp"
, 2236); }
;
2237 KMP_MB();
2238 KF_TRACE(10,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n"
, root, team, master_th, gtid); }
2239 ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n"
, root, team, master_th, gtid); }
2240 root, team, master_th, gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n"
, root, team, master_th, gtid); }
;
2241
2242#if USE_ITT_BUILD1
2243 if (__itt_stack_caller_create_ptr__kmp_itt_stack_caller_create_ptr__3_0) {
2244 // create new stack stitching id before entering fork barrier
2245 if (!enter_teams) {
2246 KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL)if (!(team->t.t_stack_id == __null)) { __kmp_debug_assert(
"team->t.t_stack_id == __null", "openmp/runtime/src/kmp_runtime.cpp"
, 2246); }
;
2247 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2248 } else if (parent_team->t.t_serialized) {
2249 // keep stack stitching id in the serialized parent_team;
2250 // current team will be used for parallel inside the teams;
2251 // if parent_team is active, then it already keeps stack stitching id
2252 // for the league of teams
2253 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL)if (!(parent_team->t.t_stack_id == __null)) { __kmp_debug_assert
("parent_team->t.t_stack_id == __null", "openmp/runtime/src/kmp_runtime.cpp"
, 2253); }
;
2254 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
2255 }
2256 }
2257#endif /* USE_ITT_BUILD */
2258
2259 // AC: skip __kmp_internal_fork at teams construct, let only primary
2260 // threads execute
2261 if (ap) {
2262 __kmp_internal_fork(loc, gtid, team);
2263 KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_internal_fork : after : root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, team, master_th, gtid); }
2264 "master_th=%p, gtid=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_internal_fork : after : root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, team, master_th, gtid); }
2265 root, team, master_th, gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_internal_fork : after : root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, team, master_th, gtid); }
;
2266 }
2267
2268 if (call_context == fork_context_gnu) {
2269 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: parallel exit T#%d\n"
, gtid); }
;
2270 return TRUE(!0);
2271 }
2272
2273 /* Invoke microtask for PRIMARY thread */
2274 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n"
, gtid, team->t.t_id, team->t.t_pkfn); }
2275 team->t.t_id, team->t.t_pkfn))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n"
, gtid, team->t.t_id, team->t.t_pkfn); }
;
2276 } // END of timer KMP_fork_call block
2277
2278#if KMP_STATS_ENABLED0
2279 // If beginning a teams construct, then change thread state
2280 stats_state_e previous_state = KMP_GET_THREAD_STATE()((void)0);
2281 if (!ap) {
2282 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION)((void)0);
2283 }
2284#endif
2285
2286 if (!team->t.t_invoke(gtid)) {
2287 KMP_ASSERT2(0, "cannot invoke microtask for PRIMARY thread")if (!(0)) { __kmp_debug_assert(("cannot invoke microtask for PRIMARY thread"
), "openmp/runtime/src/kmp_runtime.cpp", 2287); }
;
2288 }
2289
2290#if KMP_STATS_ENABLED0
2291 // If was beginning of a teams construct, then reset thread state
2292 if (!ap) {
2293 KMP_SET_THREAD_STATE(previous_state)((void)0);
2294 }
2295#endif
2296
2297 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n"
, gtid, team->t.t_id, team->t.t_pkfn); }
2298 team->t.t_id, team->t.t_pkfn))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n"
, gtid, team->t.t_id, team->t.t_pkfn); }
;
2299 KMP_MB(); /* Flush all pending memory write invalidates. */
2300
2301 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: parallel exit T#%d\n"
, gtid); }
;
2302#if OMPT_SUPPORT1
2303 if (ompt_enabled.enabled) {
2304 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2305 }
2306#endif
2307
2308 return TRUE(!0);
2309}
2310
2311#if OMPT_SUPPORT1
2312static inline void __kmp_join_restore_state(kmp_info_t *thread,
2313 kmp_team_t *team) {
2314 // restore state outside the region
2315 thread->th.ompt_thread_info.state =
2316 ((team->t.t_serialized) ? ompt_state_work_serial
2317 : ompt_state_work_parallel);
2318}
2319
2320static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread,
2321 kmp_team_t *team, ompt_data_t *parallel_data,
2322 int flags, void *codeptr) {
2323 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2324 if (ompt_enabled.ompt_callback_parallel_end) {
2325 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)ompt_callback_parallel_end_callback(
2326 parallel_data, &(task_info->task_data), flags, codeptr);
2327 }
2328
2329 task_info->frame.enter_frame = ompt_data_none{0};
2330 __kmp_join_restore_state(thread, team);
2331}
2332#endif
2333
2334void __kmp_join_call(ident_t *loc, int gtid
2335#if OMPT_SUPPORT1
2336 ,
2337 enum fork_context_e fork_context
2338#endif
2339 ,
2340 int exit_teams) {
2341 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call)((void)0);
2342 kmp_team_t *team;
2343 kmp_team_t *parent_team;
2344 kmp_info_t *master_th;
2345 kmp_root_t *root;
2346 int master_active;
2347
2348 KA_TRACE(20, ("__kmp_join_call: enter T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: enter T#%d\n"
, gtid); }
;
2349
2350 /* setup current data */
2351 master_th = __kmp_threads[gtid];
2352 root = master_th->th.th_root;
2353 team = master_th->th.th_team;
2354 parent_team = team->t.t_parent;
2355
2356 master_th->th.th_ident = loc;
2357
2358#if OMPT_SUPPORT1
2359 void *team_microtask = (void *)team->t.t_pkfn;
2360 // For GOMP interface with serialized parallel, need the
2361 // __kmpc_end_serialized_parallel to call hooks for OMPT end-implicit-task
2362 // and end-parallel events.
2363 if (ompt_enabled.enabled &&
2364 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2365 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2366 }
2367#endif
2368
2369#if KMP_DEBUG1
2370 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2371 KA_TRACE(20, ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
"th_task_team = %p\n", __kmp_gtid_from_thread(master_th), team
, team->t.t_task_team[master_th->th.th_task_state], master_th
->th.th_task_team); }
2372 "th_task_team = %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
"th_task_team = %p\n", __kmp_gtid_from_thread(master_th), team
, team->t.t_task_team[master_th->th.th_task_state], master_th
->th.th_task_team); }
2373 __kmp_gtid_from_thread(master_th), team,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
"th_task_team = %p\n", __kmp_gtid_from_thread(master_th), team
, team->t.t_task_team[master_th->th.th_task_state], master_th
->th.th_task_team); }
2374 team->t.t_task_team[master_th->th.th_task_state],if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
"th_task_team = %p\n", __kmp_gtid_from_thread(master_th), team
, team->t.t_task_team[master_th->th.th_task_state], master_th
->th.th_task_team); }
2375 master_th->th.th_task_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
"th_task_team = %p\n", __kmp_gtid_from_thread(master_th), team
, team->t.t_task_team[master_th->th.th_task_state], master_th
->th.th_task_team); }
;
2376 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==if (!(master_th->th.th_task_team == team->t.t_task_team
[master_th->th.th_task_state])) { __kmp_debug_assert("master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 2377); }
2377 team->t.t_task_team[master_th->th.th_task_state])if (!(master_th->th.th_task_team == team->t.t_task_team
[master_th->th.th_task_state])) { __kmp_debug_assert("master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 2377); }
;
2378 }
2379#endif
2380
2381 if (team->t.t_serialized) {
2382 if (master_th->th.th_teams_microtask) {
2383 // We are in teams construct
2384 int level = team->t.t_level;
2385 int tlevel = master_th->th.th_teams_level;
2386 if (level == tlevel) {
2387 // AC: we haven't incremented it earlier at start of teams construct,
2388 // so do it here - at the end of teams construct
2389 team->t.t_level++;
2390 } else if (level == tlevel + 1) {
2391 // AC: we are exiting parallel inside teams, need to increment
2392 // serialization in order to restore it in the next call to
2393 // __kmpc_end_serialized_parallel
2394 team->t.t_serialized++;
2395 }
2396 }
2397 __kmpc_end_serialized_parallel(loc, gtid);
2398
2399#if OMPT_SUPPORT1
2400 if (ompt_enabled.enabled) {
2401 if (fork_context == fork_context_gnu) {
2402 __ompt_lw_taskteam_unlink(master_th);
2403 }
2404 __kmp_join_restore_state(master_th, parent_team);
2405 }
2406#endif
2407
2408 return;
2409 }
2410
2411 master_active = team->t.t_master_active;
2412
2413 if (!exit_teams) {
2414 // AC: No barrier for internal teams at exit from teams construct.
2415 // But there is barrier for external team (league).
2416 __kmp_internal_join(loc, gtid, team);
2417#if USE_ITT_BUILD1
2418 if (__itt_stack_caller_create_ptr__kmp_itt_stack_caller_create_ptr__3_0) {
2419 KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL)if (!(team->t.t_stack_id != __null)) { __kmp_debug_assert(
"team->t.t_stack_id != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 2419); }
;
2420 // destroy the stack stitching id after join barrier
2421 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2422 team->t.t_stack_id = NULL__null;
2423 }
2424#endif
2425 } else {
2426 master_th->th.th_task_state =
2427 0; // AC: no tasking in teams (out of any parallel)
2428#if USE_ITT_BUILD1
2429 if (__itt_stack_caller_create_ptr__kmp_itt_stack_caller_create_ptr__3_0 && parent_team->t.t_serialized) {
2430 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL)if (!(parent_team->t.t_stack_id != __null)) { __kmp_debug_assert
("parent_team->t.t_stack_id != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 2430); }
;
2431 // destroy the stack stitching id on exit from the teams construct
2432 // if parent_team is active, then the id will be destroyed later on
2433 // by master of the league of teams
2434 __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
2435 parent_team->t.t_stack_id = NULL__null;
2436 }
2437#endif
2438
2439 if (team->t.t_nproc > 1 &&
2440 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2441 team->t.b->update_num_threads(team->t.t_nproc);
2442 __kmp_add_threads_to_team(team, team->t.t_nproc);
2443 }
2444 }
2445
2446 KMP_MB();
2447
2448#if OMPT_SUPPORT1
2449 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2450 void *codeptr = team->t.ompt_team_info.master_return_address;
2451#endif
2452
2453#if USE_ITT_BUILD1
2454 // Mark end of "parallel" region for Intel(R) VTune(TM) analyzer.
2455 if (team->t.t_active_level == 1 &&
2456 (!master_th->th.th_teams_microtask || /* not in teams construct */
2457 master_th->th.th_teams_size.nteams == 1)) {
2458 master_th->th.th_ident = loc;
2459 // only one notification scheme (either "submit" or "forking/joined", not
2460 // both)
2461 if ((__itt_frame_submit_v3_ptr__kmp_itt_frame_submit_v3_ptr__3_0 || KMP_ITT_DEBUG0) &&
2462 __kmp_forkjoin_frames_mode == 3)
2463 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2464 master_th->th.th_frame_time, 0, loc,
2465 master_th->th.th_team_nproc, 1);
2466 else if ((__itt_frame_end_v3_ptr__kmp_itt_frame_end_v3_ptr__3_0 || KMP_ITT_DEBUG0) &&
2467 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2468 __kmp_itt_region_joined(gtid);
2469 } // active_level == 1
2470#endif /* USE_ITT_BUILD */
2471
2472#if KMP_AFFINITY_SUPPORTED1
2473 if (!exit_teams) {
2474 // Restore master thread's partition.
2475 master_th->th.th_first_place = team->t.t_first_place;
2476 master_th->th.th_last_place = team->t.t_last_place;
2477 }
2478#endif // KMP_AFFINITY_SUPPORTED
2479
2480 if (master_th->th.th_teams_microtask && !exit_teams &&
2481 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2482 team->t.t_level == master_th->th.th_teams_level + 1) {
2483// AC: We need to leave the team structure intact at the end of parallel
2484// inside the teams construct, so that at the next parallel same (hot) team
2485// works, only adjust nesting levels
2486#if OMPT_SUPPORT1
2487 ompt_data_t ompt_parallel_data = ompt_data_none{0};
2488 if (ompt_enabled.enabled) {
2489 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2490 if (ompt_enabled.ompt_callback_implicit_task) {
2491 int ompt_team_size = team->t.t_nproc;
2492 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
2493 ompt_scope_end, NULL__null, &(task_info->task_data), ompt_team_size,
2494 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, ompt_task_implicit);
2495 }
2496 task_info->frame.exit_frame = ompt_data_none{0};
2497 task_info->task_data = ompt_data_none{0};
2498 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
;
2499 __ompt_lw_taskteam_unlink(master_th);
2500 }
2501#endif
2502 /* Decrement our nested depth level */
2503 team->t.t_level--;
2504 team->t.t_active_level--;
2505 KMP_ATOMIC_DEC(&root->r.r_in_parallel)(&root->r.r_in_parallel)->fetch_sub(1, std::memory_order_acq_rel
)
;
2506
2507 // Restore number of threads in the team if needed. This code relies on
2508 // the proper adjustment of th_teams_size.nth after the fork in
2509 // __kmp_teams_master on each teams primary thread in the case that
2510 // __kmp_reserve_threads reduced it.
2511 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2512 int old_num = master_th->th.th_team_nproc;
2513 int new_num = master_th->th.th_teams_size.nth;
2514 kmp_info_t **other_threads = team->t.t_threads;
2515 team->t.t_nproc = new_num;
2516 for (int i = 0; i < old_num; ++i) {
2517 other_threads[i]->th.th_team_nproc = new_num;
2518 }
2519 // Adjust states of non-used threads of the team
2520 for (int i = old_num; i < new_num; ++i) {
2521 // Re-initialize thread's barrier data.
2522 KMP_DEBUG_ASSERT(other_threads[i])if (!(other_threads[i])) { __kmp_debug_assert("other_threads[i]"
, "openmp/runtime/src/kmp_runtime.cpp", 2522); }
;
2523 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2524 for (int b = 0; b < bs_last_barrier; ++b) {
2525 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2526 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG)if (!(balign[b].bb.wait_flag != 2)) { __kmp_debug_assert("balign[b].bb.wait_flag != 2"
, "openmp/runtime/src/kmp_runtime.cpp", 2526); }
;
2527#if USE_DEBUGGER0
2528 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2529#endif
2530 }
2531 if (__kmp_tasking_mode != tskm_immediate_exec) {
2532 // Synchronize thread's task state
2533 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2534 }
2535 }
2536 }
2537
2538#if OMPT_SUPPORT1
2539 if (ompt_enabled.enabled) {
2540 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2541 OMPT_INVOKER(fork_context)((fork_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
| ompt_parallel_team, codeptr);
2542 }
2543#endif
2544
2545 return;
2546 }
2547
2548 /* do cleanup and restore the parent team */
2549 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2550 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2551
2552 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2553
2554 /* jc: The following lock has instructions with REL and ACQ semantics,
2555 separating the parallel user code called in this parallel region
2556 from the serial user code called after this function returns. */
2557 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2558
2559 if (!master_th->th.th_teams_microtask ||
2560 team->t.t_level > master_th->th.th_teams_level) {
2561 /* Decrement our nested depth level */
2562 KMP_ATOMIC_DEC(&root->r.r_in_parallel)(&root->r.r_in_parallel)->fetch_sub(1, std::memory_order_acq_rel
)
;
2563 }
2564 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0)if (!(root->r.r_in_parallel >= 0)) { __kmp_debug_assert
("root->r.r_in_parallel >= 0", "openmp/runtime/src/kmp_runtime.cpp"
, 2564); }
;
2565
2566#if OMPT_SUPPORT1
2567 if (ompt_enabled.enabled) {
2568 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2569 if (ompt_enabled.ompt_callback_implicit_task) {
2570 int flags = (team_microtask == (void *)__kmp_teams_master)
2571 ? ompt_task_initial
2572 : ompt_task_implicit;
2573 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2574 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
2575 ompt_scope_end, NULL__null, &(task_info->task_data), ompt_team_size,
2576 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, flags);
2577 }
2578 task_info->frame.exit_frame = ompt_data_none{0};
2579 task_info->task_data = ompt_data_none{0};
2580 }
2581#endif
2582
2583 KF_TRACE(10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n"
, 0, master_th, team); }
2584 master_th, team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n"
, 0, master_th, team); }
;
2585 __kmp_pop_current_task_from_thread(master_th);
2586
2587 master_th->th.th_def_allocator = team->t.t_def_allocator;
2588
2589#if OMPD_SUPPORT1
2590 if (ompd_state & OMPD_ENABLE_BP0x1)
2591 ompd_bp_parallel_end();
2592#endif
2593 updateHWFPControl(team);
2594
2595 if (root->r.r_active != master_active)
2596 root->r.r_active = master_active;
2597
2598 __kmp_free_team(root, team USE_NESTED_HOT_ARG(, master_th
2599 master_th), master_th); // this will free worker threads
2600
2601 /* this race was fun to find. make sure the following is in the critical
2602 region otherwise assertions may fail occasionally since the old team may be
2603 reallocated and the hierarchy appears inconsistent. it is actually safe to
2604 run and won't cause any bugs, but will cause those assertion failures. it's
2605 only one deref&assign so might as well put this in the critical region */
2606 master_th->th.th_team = parent_team;
2607 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2608 master_th->th.th_team_master = parent_team->t.t_threads[0];
2609 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2610
2611 /* restore serialized team, if need be */
2612 if (parent_team->t.t_serialized &&
2613 parent_team != master_th->th.th_serial_team &&
2614 parent_team != root->r.r_root_team) {
2615 __kmp_free_team(root,
2616 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL), __null);
2617 master_th->th.th_serial_team = parent_team;
2618 }
2619
2620 if (__kmp_tasking_mode != tskm_immediate_exec) {
2621 if (master_th->th.th_task_state_top >
2622 0) { // Restore task state from memo stack
2623 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack)if (!(master_th->th.th_task_state_memo_stack)) { __kmp_debug_assert
("master_th->th.th_task_state_memo_stack", "openmp/runtime/src/kmp_runtime.cpp"
, 2623); }
;
2624 // Remember primary thread's state if we re-use this nested hot team
2625 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2626 master_th->th.th_task_state;
2627 --master_th->th.th_task_state_top; // pop
2628 // Now restore state at this level
2629 master_th->th.th_task_state =
2630 master_th->th
2631 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2632 }
2633 // Copy the task team from the parent team to the primary thread
2634 master_th->th.th_task_team =
2635 parent_team->t.t_task_team[master_th->th.th_task_state];
2636 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n"
, __kmp_gtid_from_thread(master_th), master_th->th.th_task_team
, parent_team); }
2637 ("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n"
, __kmp_gtid_from_thread(master_th), master_th->th.th_task_team
, parent_team); }
2638 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n"
, __kmp_gtid_from_thread(master_th), master_th->th.th_task_team
, parent_team); }
2639 parent_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n"
, __kmp_gtid_from_thread(master_th), master_th->th.th_task_team
, parent_team); }
;
2640 }
2641
2642 // TODO: GEH - cannot do this assertion because root thread not set up as
2643 // executing
2644 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 );
2645 master_th->th.th_current_task->td_flags.executing = 1;
2646
2647 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2648
2649#if KMP_AFFINITY_SUPPORTED1
2650 if (master_th->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
2651 __kmp_reset_root_init_mask(gtid);
2652 }
2653#endif
2654#if OMPT_SUPPORT1
2655 int flags =
2656 OMPT_INVOKER(fork_context)((fork_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
|
2657 ((team_microtask == (void *)__kmp_teams_master) ? ompt_parallel_league
2658 : ompt_parallel_team);
2659 if (ompt_enabled.enabled) {
2660 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2661 codeptr);
2662 }
2663#endif
2664
2665 KMP_MB();
2666 KA_TRACE(20, ("__kmp_join_call: exit T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: exit T#%d\n"
, gtid); }
;
2667}
2668
2669/* Check whether we should push an internal control record onto the
2670 serial team stack. If so, do it. */
2671void __kmp_save_internal_controls(kmp_info_t *thread) {
2672
2673 if (thread->th.th_team != thread->th.th_serial_team) {
2674 return;
2675 }
2676 if (thread->th.th_team->t.t_serialized > 1) {
2677 int push = 0;
2678
2679 if (thread->th.th_team->t.t_control_stack_top == NULL__null) {
2680 push = 1;
2681 } else {
2682 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2683 thread->th.th_team->t.t_serialized) {
2684 push = 1;
2685 }
2686 }
2687 if (push) { /* push a record on the serial team's stack */
2688 kmp_internal_control_t *control =
2689 (kmp_internal_control_t *)__kmp_allocate(___kmp_allocate((sizeof(kmp_internal_control_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 2690)
2690 sizeof(kmp_internal_control_t))___kmp_allocate((sizeof(kmp_internal_control_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 2690)
;
2691
2692 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2693
2694 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2695
2696 control->next = thread->th.th_team->t.t_control_stack_top;
2697 thread->th.th_team->t.t_control_stack_top = control;
2698 }
2699 }
2700}
2701
2702/* Changes set_nproc */
2703void __kmp_set_num_threads(int new_nth, int gtid) {
2704 kmp_info_t *thread;
2705 kmp_root_t *root;
2706
2707 KF_TRACE(10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_num_threads: new __kmp_nth = %d\n"
, new_nth); }
;
2708 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 2708); }
;
2709
2710 if (new_nth < 1)
2711 new_nth = 1;
2712 else if (new_nth > __kmp_max_nth)
2713 new_nth = __kmp_max_nth;
2714
2715 KMP_COUNT_VALUE(OMP_set_numthreads, new_nth)((void)0);
2716 thread = __kmp_threads[gtid];
2717 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2718 return; // nothing to do
2719
2720 __kmp_save_internal_controls(thread);
2721
2722 set__nproc(thread, new_nth)(((thread)->th.th_current_task->td_icvs.nproc) = (new_nth
))
;
2723
2724 // If this omp_set_num_threads() call will cause the hot team size to be
2725 // reduced (in the absence of a num_threads clause), then reduce it now,
2726 // rather than waiting for the next parallel region.
2727 root = thread->th.th_root;
2728 if (__kmp_init_parallel && (!root->r.r_active) &&
2729 (root->r.r_hot_team->t.t_nproc > new_nth)
2730#if KMP_NESTED_HOT_TEAMS1
2731 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2732#endif
2733 ) {
2734 kmp_team_t *hot_team = root->r.r_hot_team;
2735 int f;
2736
2737 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2738
2739 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2740 __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
2741 }
2742 // Release the extra threads we don't need any more.
2743 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2744 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL)if (!(hot_team->t.t_threads[f] != __null)) { __kmp_debug_assert
("hot_team->t.t_threads[f] != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 2744); }
;
2745 if (__kmp_tasking_mode != tskm_immediate_exec) {
2746 // When decreasing team size, threads no longer in the team should unref
2747 // task team.
2748 hot_team->t.t_threads[f]->th.th_task_team = NULL__null;
2749 }
2750 __kmp_free_thread(hot_team->t.t_threads[f]);
2751 hot_team->t.t_threads[f] = NULL__null;
2752 }
2753 hot_team->t.t_nproc = new_nth;
2754#if KMP_NESTED_HOT_TEAMS1
2755 if (thread->th.th_hot_teams) {
2756 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team)if (!(hot_team == thread->th.th_hot_teams[0].hot_team)) { __kmp_debug_assert
("hot_team == thread->th.th_hot_teams[0].hot_team", "openmp/runtime/src/kmp_runtime.cpp"
, 2756); }
;
2757 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2758 }
2759#endif
2760
2761 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2762 hot_team->t.b->update_num_threads(new_nth);
2763 __kmp_add_threads_to_team(hot_team, new_nth);
2764 }
2765
2766 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2767
2768 // Update the t_nproc field in the threads that are still active.
2769 for (f = 0; f < new_nth; f++) {
2770 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL)if (!(hot_team->t.t_threads[f] != __null)) { __kmp_debug_assert
("hot_team->t.t_threads[f] != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 2770); }
;
2771 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2772 }
2773 // Special flag in case omp_set_num_threads() call
2774 hot_team->t.t_size_changed = -1;
2775 }
2776}
2777
2778/* Changes max_active_levels */
2779void __kmp_set_max_active_levels(int gtid, int max_active_levels) {
2780 kmp_info_t *thread;
2781
2782 KF_TRACE(10, ("__kmp_set_max_active_levels: new max_active_levels for thread "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: new max_active_levels for thread "
"%d = (%d)\n", gtid, max_active_levels); }
2783 "%d = (%d)\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: new max_active_levels for thread "
"%d = (%d)\n", gtid, max_active_levels); }
2784 gtid, max_active_levels))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: new max_active_levels for thread "
"%d = (%d)\n", gtid, max_active_levels); }
;
2785 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 2785); }
;
2786
2787 // validate max_active_levels
2788 if (max_active_levels < 0) {
2789 KMP_WARNING(ActiveLevelsNegative, max_active_levels)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_ActiveLevelsNegative
, max_active_levels), __kmp_msg_null)
;
2790 // We ignore this call if the user has specified a negative value.
2791 // The current setting won't be changed. The last valid setting will be
2792 // used. A warning will be issued (if warnings are allowed as controlled by
2793 // the KMP_WARNINGS env var).
2794 KF_TRACE(10, ("__kmp_set_max_active_levels: the call is ignored: new "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: the call is ignored: new "
"max_active_levels for thread %d = (%d)\n", gtid, max_active_levels
); }
2795 "max_active_levels for thread %d = (%d)\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: the call is ignored: new "
"max_active_levels for thread %d = (%d)\n", gtid, max_active_levels
); }
2796 gtid, max_active_levels))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: the call is ignored: new "
"max_active_levels for thread %d = (%d)\n", gtid, max_active_levels
); }
;
2797 return;
2798 }
2799 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT2147483647) {
2800 // it's OK, the max_active_levels is within the valid range: [ 0;
2801 // KMP_MAX_ACTIVE_LEVELS_LIMIT ]
2802 // We allow a zero value. (implementation defined behavior)
2803 } else {
2804 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_ActiveLevelsExceedLimit
, max_active_levels, 2147483647), __kmp_msg_null)
2805 KMP_MAX_ACTIVE_LEVELS_LIMIT)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_ActiveLevelsExceedLimit
, max_active_levels, 2147483647), __kmp_msg_null)
;
2806 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT2147483647;
2807 // Current upper limit is MAX_INT. (implementation defined behavior)
2808 // If the input exceeds the upper limit, we correct the input to be the
2809 // upper limit. (implementation defined behavior)
2810 // Actually, the flow should never get here until we use MAX_INT limit.
2811 }
2812 KF_TRACE(10, ("__kmp_set_max_active_levels: after validation: new "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: after validation: new "
"max_active_levels for thread %d = (%d)\n", gtid, max_active_levels
); }
2813 "max_active_levels for thread %d = (%d)\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: after validation: new "
"max_active_levels for thread %d = (%d)\n", gtid, max_active_levels
); }
2814 gtid, max_active_levels))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: after validation: new "
"max_active_levels for thread %d = (%d)\n", gtid, max_active_levels
); }
;
2815
2816 thread = __kmp_threads[gtid];
2817
2818 __kmp_save_internal_controls(thread);
2819
2820 set__max_active_levels(thread, max_active_levels)(((thread)->th.th_current_task->td_icvs.max_active_levels
) = (max_active_levels))
;
2821}
2822
2823/* Gets max_active_levels */
2824int __kmp_get_max_active_levels(int gtid) {
2825 kmp_info_t *thread;
2826
2827 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d\n", gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_max_active_levels: thread %d\n"
, gtid); }
;
2828 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 2828); }
;
2829
2830 thread = __kmp_threads[gtid];
2831 KMP_DEBUG_ASSERT(thread->th.th_current_task)if (!(thread->th.th_current_task)) { __kmp_debug_assert("thread->th.th_current_task"
, "openmp/runtime/src/kmp_runtime.cpp", 2831); }
;
2832 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d, curtask=%p, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_max_active_levels: thread %d, curtask=%p, "
"curtask_maxaclevel=%d\n", gtid, thread->th.th_current_task
, thread->th.th_current_task->td_icvs.max_active_levels
); }
2833 "curtask_maxaclevel=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_max_active_levels: thread %d, curtask=%p, "
"curtask_maxaclevel=%d\n", gtid, thread->th.th_current_task
, thread->th.th_current_task->td_icvs.max_active_levels
); }
2834 gtid, thread->th.th_current_task,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_max_active_levels: thread %d, curtask=%p, "
"curtask_maxaclevel=%d\n", gtid, thread->th.th_current_task
, thread->th.th_current_task->td_icvs.max_active_levels
); }
2835 thread->th.th_current_task->td_icvs.max_active_levels))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_max_active_levels: thread %d, curtask=%p, "
"curtask_maxaclevel=%d\n", gtid, thread->th.th_current_task
, thread->th.th_current_task->td_icvs.max_active_levels
); }
;
2836 return thread->th.th_current_task->td_icvs.max_active_levels;
2837}
2838
2839// nteams-var per-device ICV
2840void __kmp_set_num_teams(int num_teams) {
2841 if (num_teams > 0)
2842 __kmp_nteams = num_teams;
2843}
2844int __kmp_get_max_teams(void) { return __kmp_nteams; }
2845// teams-thread-limit-var per-device ICV
2846void __kmp_set_teams_thread_limit(int limit) {
2847 if (limit > 0)
2848 __kmp_teams_thread_limit = limit;
2849}
2850int __kmp_get_teams_thread_limit(void) { return __kmp_teams_thread_limit; }
2851
2852KMP_BUILD_ASSERT(sizeof(kmp_sched_t) == sizeof(int))static_assert(sizeof(kmp_sched_t) == sizeof(int), "Build condition error"
)
;
2853KMP_BUILD_ASSERT(sizeof(enum sched_type) == sizeof(int))static_assert(sizeof(enum sched_type) == sizeof(int), "Build condition error"
)
;
2854
2855/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */
2856void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) {
2857 kmp_info_t *thread;
2858 kmp_sched_t orig_kind;
2859 // kmp_team_t *team;
2860
2861 KF_TRACE(10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n"
, gtid, (int)kind, chunk); }
2862 gtid, (int)kind, chunk))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n"
, gtid, (int)kind, chunk); }
;
2863 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 2863); }
;
2864
2865 // Check if the kind parameter is valid, correct if needed.
2866 // Valid parameters should fit in one of two intervals - standard or extended:
2867 // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper>
2868 // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103
2869 orig_kind = kind;
2870 kind = __kmp_sched_without_mods(kind);
2871
2872 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2873 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2874 // TODO: Hint needs attention in case we change the default schedule.
2875 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind)__kmp_msg_format(kmp_i18n_msg_ScheduleKindOutOfRange, kind),
2876 KMP_HNT(DefaultScheduleKindUsed, "static, no chunk")__kmp_msg_format(kmp_i18n_hnt_DefaultScheduleKindUsed, "static, no chunk"
)
,
2877 __kmp_msg_null);
2878 kind = kmp_sched_default;
2879 chunk = 0; // ignore chunk value in case of bad kind
2880 }
2881
2882 thread = __kmp_threads[gtid];
2883
2884 __kmp_save_internal_controls(thread);
2885
2886 if (kind < kmp_sched_upper_std) {
2887 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK1) {
2888 // differ static chunked vs. unchunked: chunk should be invalid to
2889 // indicate unchunked schedule (which is the default)
2890 thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static;
2891 } else {
2892 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2893 __kmp_sch_map[kind - kmp_sched_lower - 1];
2894 }
2895 } else {
2896 // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2897 // kmp_sched_lower - 2 ];
2898 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2899 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2900 kmp_sched_lower - 2];
2901 }
2902 __kmp_sched_apply_mods_intkind(
2903 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2904 if (kind == kmp_sched_auto || chunk < 1) {
2905 // ignore parameter chunk for schedule auto
2906 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK1;
2907 } else {
2908 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2909 }
2910}
2911
2912/* Gets def_sched_var ICV values */
2913void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk) {
2914 kmp_info_t *thread;
2915 enum sched_type th_type;
2916
2917 KF_TRACE(10, ("__kmp_get_schedule: thread %d\n", gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_schedule: thread %d\n"
, gtid); }
;
2918 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 2918); }
;
2919
2920 thread = __kmp_threads[gtid];
2921
2922 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2923 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)(enum sched_type)( (th_type) & ~(kmp_sch_modifier_nonmonotonic
| kmp_sch_modifier_monotonic))
) {
2924 case kmp_sch_static:
2925 case kmp_sch_static_greedy:
2926 case kmp_sch_static_balanced:
2927 *kind = kmp_sched_static;
2928 __kmp_sched_apply_mods_stdkind(kind, th_type);
2929 *chunk = 0; // chunk was not set, try to show this fact via zero value
2930 return;
2931 case kmp_sch_static_chunked:
2932 *kind = kmp_sched_static;
2933 break;
2934 case kmp_sch_dynamic_chunked:
2935 *kind = kmp_sched_dynamic;
2936 break;
2937 case kmp_sch_guided_chunked:
2938 case kmp_sch_guided_iterative_chunked:
2939 case kmp_sch_guided_analytical_chunked:
2940 *kind = kmp_sched_guided;
2941 break;
2942 case kmp_sch_auto:
2943 *kind = kmp_sched_auto;
2944 break;
2945 case kmp_sch_trapezoidal:
2946 *kind = kmp_sched_trapezoidal;
2947 break;
2948#if KMP_STATIC_STEAL_ENABLED1
2949 case kmp_sch_static_steal:
2950 *kind = kmp_sched_static_steal;
2951 break;
2952#endif
2953 default:
2954 KMP_FATAL(UnknownSchedulingType, th_type)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_UnknownSchedulingType
, th_type), __kmp_msg_null)
;
2955 }
2956
2957 __kmp_sched_apply_mods_stdkind(kind, th_type);
2958 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
2959}
2960
2961int __kmp_get_ancestor_thread_num(int gtid, int level) {
2962
2963 int ii, dd;
2964 kmp_team_t *team;
2965 kmp_info_t *thr;
2966
2967 KF_TRACE(10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_ancestor_thread_num: thread %d %d\n"
, gtid, level); }
;
2968 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 2968); }
;
2969
2970 // validate level
2971 if (level == 0)
2972 return 0;
2973 if (level < 0)
2974 return -1;
2975 thr = __kmp_threads[gtid];
2976 team = thr->th.th_team;
2977 ii = team->t.t_level;
2978 if (level > ii)
2979 return -1;
2980
2981 if (thr->th.th_teams_microtask) {
2982 // AC: we are in teams region where multiple nested teams have same level
2983 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2984 if (level <=
2985 tlevel) { // otherwise usual algorithm works (will not touch the teams)
2986 KMP_DEBUG_ASSERT(ii >= tlevel)if (!(ii >= tlevel)) { __kmp_debug_assert("ii >= tlevel"
, "openmp/runtime/src/kmp_runtime.cpp", 2986); }
;
2987 // AC: As we need to pass by the teams league, we need to artificially
2988 // increase ii
2989 if (ii == tlevel) {
2990 ii += 2; // three teams have same level
2991 } else {
2992 ii++; // two teams have same level
2993 }
2994 }
2995 }
2996
2997 if (ii == level)
2998 return __kmp_tid_from_gtid(gtid);
2999
3000 dd = team->t.t_serialized;
3001 level++;
3002 while (ii > level) {
3003 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3004 }
3005 if ((team->t.t_serialized) && (!dd)) {
3006 team = team->t.t_parent;
3007 continue;
3008 }
3009 if (ii > level) {
3010 team = team->t.t_parent;
3011 dd = team->t.t_serialized;
3012 ii--;
3013 }
3014 }
3015
3016 return (dd > 1) ? (0) : (team->t.t_master_tid);
3017}
3018
3019int __kmp_get_team_size(int gtid, int level) {
3020
3021 int ii, dd;
3022 kmp_team_t *team;
3023 kmp_info_t *thr;
3024
3025 KF_TRACE(10, ("__kmp_get_team_size: thread %d %d\n", gtid, level))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_team_size: thread %d %d\n"
, gtid, level); }
;
3026 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 3026); }
;
3027
3028 // validate level
3029 if (level == 0)
3030 return 1;
3031 if (level < 0)
3032 return -1;
3033 thr = __kmp_threads[gtid];
3034 team = thr->th.th_team;
3035 ii = team->t.t_level;
3036 if (level > ii)
3037 return -1;
3038
3039 if (thr->th.th_teams_microtask) {
3040 // AC: we are in teams region where multiple nested teams have same level
3041 int tlevel = thr->th.th_teams_level; // the level of the teams construct
3042 if (level <=
3043 tlevel) { // otherwise usual algorithm works (will not touch the teams)
3044 KMP_DEBUG_ASSERT(ii >= tlevel)if (!(ii >= tlevel)) { __kmp_debug_assert("ii >= tlevel"
, "openmp/runtime/src/kmp_runtime.cpp", 3044); }
;
3045 // AC: As we need to pass by the teams league, we need to artificially
3046 // increase ii
3047 if (ii == tlevel) {
3048 ii += 2; // three teams have same level
3049 } else {
3050 ii++; // two teams have same level
3051 }
3052 }
3053 }
3054
3055 while (ii > level) {
3056 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3057 }
3058 if (team->t.t_serialized && (!dd)) {
3059 team = team->t.t_parent;
3060 continue;
3061 }
3062 if (ii > level) {
3063 team = team->t.t_parent;
3064 ii--;
3065 }
3066 }
3067
3068 return team->t.t_nproc;
3069}
3070
3071kmp_r_sched_t __kmp_get_schedule_global() {
3072 // This routine created because pairs (__kmp_sched, __kmp_chunk) and
3073 // (__kmp_static, __kmp_guided) may be changed by kmp_set_defaults
3074 // independently. So one can get the updated schedule here.
3075
3076 kmp_r_sched_t r_sched;
3077
3078 // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static,
3079 // __kmp_guided. __kmp_sched should keep original value, so that user can set
3080 // KMP_SCHEDULE multiple times, and thus have different run-time schedules in
3081 // different roots (even in OMP 2.5)
3082 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched)(enum sched_type)( (__kmp_sched) & ~(kmp_sch_modifier_nonmonotonic
| kmp_sch_modifier_monotonic))
;
3083 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched)((enum sched_type)( (__kmp_sched) & (kmp_sch_modifier_nonmonotonic
| kmp_sch_modifier_monotonic)))
;
3084 if (s == kmp_sch_static) {
3085 // replace STATIC with more detailed schedule (balanced or greedy)
3086 r_sched.r_sched_type = __kmp_static;
3087 } else if (s == kmp_sch_guided_chunked) {
3088 // replace GUIDED with more detailed schedule (iterative or analytical)
3089 r_sched.r_sched_type = __kmp_guided;
3090 } else { // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other
3091 r_sched.r_sched_type = __kmp_sched;
3092 }
3093 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers)(r_sched.r_sched_type = (enum sched_type)((kmp_int32)r_sched.
r_sched_type | (kmp_int32)sched_modifiers))
;
3094
3095 if (__kmp_chunk < KMP_DEFAULT_CHUNK1) {
3096 // __kmp_chunk may be wrong here (if it was not ever set)
3097 r_sched.chunk = KMP_DEFAULT_CHUNK1;
3098 } else {
3099 r_sched.chunk = __kmp_chunk;
3100 }
3101
3102 return r_sched;
3103}
3104
3105/* Allocate (realloc == FALSE) * or reallocate (realloc == TRUE)
3106 at least argc number of *t_argv entries for the requested team. */
3107static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, int realloc) {
3108
3109 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 3109); }
;
3110 if (!realloc || argc > team->t.t_max_argc) {
3111
3112 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: needed entries=%d, "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: needed entries=%d, "
"current entries=%d\n", team->t.t_id, argc, (realloc) ? team
->t.t_max_argc : 0); }
3113 "current entries=%d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: needed entries=%d, "
"current entries=%d\n", team->t.t_id, argc, (realloc) ? team
->t.t_max_argc : 0); }
3114 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: needed entries=%d, "
"current entries=%d\n", team->t.t_id, argc, (realloc) ? team
->t.t_max_argc : 0); }
;
3115 /* if previously allocated heap space for args, free them */
3116 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3117 __kmp_free((void *)team->t.t_argv)___kmp_free(((void *)team->t.t_argv), "openmp/runtime/src/kmp_runtime.cpp"
, 3117)
;
3118
3119 if (argc <= KMP_INLINE_ARGV_ENTRIES(int)((4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 *
sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) %
64)) / (sizeof(void *)))
) {
3120 /* use unused space in the cache line for arguments */
3121 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES(int)((4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 *
sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) %
64)) / (sizeof(void *)))
;
3122 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: inline allocate %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: inline allocate %d "
"argv entries\n", team->t.t_id, team->t.t_max_argc); }
3123 "argv entries\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: inline allocate %d "
"argv entries\n", team->t.t_id, team->t.t_max_argc); }
3124 team->t.t_id, team->t.t_max_argc))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: inline allocate %d "
"argv entries\n", team->t.t_id, team->t.t_max_argc); }
;
3125 team->t.t_argv = &team->t.t_inline_argv[0];
3126 if (__kmp_storage_map) {
3127 __kmp_print_storage_map_gtid(
3128 -1, &team->t.t_inline_argv[0],
3129 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES(int)((4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 *
sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) %
64)) / (sizeof(void *)))
],
3130 (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES(int)((4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 *
sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) %
64)) / (sizeof(void *)))
), "team_%d.t_inline_argv",
3131 team->t.t_id);
3132 }
3133 } else {
3134 /* allocate space for arguments in the heap */
3135 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES100 >> 1))
3136 ? KMP_MIN_MALLOC_ARGV_ENTRIES100
3137 : 2 * argc;
3138 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
"argv entries\n", team->t.t_id, team->t.t_max_argc); }
3139 "argv entries\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
"argv entries\n", team->t.t_id, team->t.t_max_argc); }
3140 team->t.t_id, team->t.t_max_argc))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
"argv entries\n", team->t.t_id, team->t.t_max_argc); }
;
3141 team->t.t_argv =
3142 (void **)__kmp_page_allocate(sizeof(void *) * team->t.t_max_argc)___kmp_page_allocate((sizeof(void *) * team->t.t_max_argc)
, "openmp/runtime/src/kmp_runtime.cpp", 3142)
;
3143 if (__kmp_storage_map) {
3144 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3145 &team->t.t_argv[team->t.t_max_argc],
3146 sizeof(void *) * team->t.t_max_argc,
3147 "team_%d.t_argv", team->t.t_id);
3148 }
3149 }
3150 }
3151}
3152
3153static void __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) {
3154 int i;
3155 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3156 team->t.t_threads =
3157 (kmp_info_t **)__kmp_allocate(sizeof(kmp_info_t *) * max_nth)___kmp_allocate((sizeof(kmp_info_t *) * max_nth), "openmp/runtime/src/kmp_runtime.cpp"
, 3157)
;
3158 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(___kmp_allocate((sizeof(dispatch_shared_info_t) * num_disp_buff
), "openmp/runtime/src/kmp_runtime.cpp", 3159)
3159 sizeof(dispatch_shared_info_t) * num_disp_buff)___kmp_allocate((sizeof(dispatch_shared_info_t) * num_disp_buff
), "openmp/runtime/src/kmp_runtime.cpp", 3159)
;
3160 team->t.t_dispatch =
3161 (kmp_disp_t *)__kmp_allocate(sizeof(kmp_disp_t) * max_nth)___kmp_allocate((sizeof(kmp_disp_t) * max_nth), "openmp/runtime/src/kmp_runtime.cpp"
, 3161)
;
3162 team->t.t_implicit_task_taskdata =
3163 (kmp_taskdata_t *)__kmp_allocate(sizeof(kmp_taskdata_t) * max_nth)___kmp_allocate((sizeof(kmp_taskdata_t) * max_nth), "openmp/runtime/src/kmp_runtime.cpp"
, 3163)
;
3164 team->t.t_max_nproc = max_nth;
3165
3166 /* setup dispatch buffers */
3167 for (i = 0; i < num_disp_buff; ++i) {
3168 team->t.t_disp_buffer[i].buffer_index = i;
3169 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3170 }
3171}
3172
3173static void __kmp_free_team_arrays(kmp_team_t *team) {
3174 /* Note: this does not free the threads in t_threads (__kmp_free_threads) */
3175 int i;
3176 for (i = 0; i < team->t.t_max_nproc; ++i) {
3177 if (team->t.t_dispatch[i].th_disp_buffer != NULL__null) {
3178 __kmp_free(team->t.t_dispatch[i].th_disp_buffer)___kmp_free((team->t.t_dispatch[i].th_disp_buffer), "openmp/runtime/src/kmp_runtime.cpp"
, 3178)
;
3179 team->t.t_dispatch[i].th_disp_buffer = NULL__null;
3180 }
3181 }
3182#if KMP_USE_HIER_SCHED0
3183 __kmp_dispatch_free_hierarchies(team);
3184#endif
3185 __kmp_free(team->t.t_threads)___kmp_free((team->t.t_threads), "openmp/runtime/src/kmp_runtime.cpp"
, 3185)
;
3186 __kmp_free(team->t.t_disp_buffer)___kmp_free((team->t.t_disp_buffer), "openmp/runtime/src/kmp_runtime.cpp"
, 3186)
;
3187 __kmp_free(team->t.t_dispatch)___kmp_free((team->t.t_dispatch), "openmp/runtime/src/kmp_runtime.cpp"
, 3187)
;
3188 __kmp_free(team->t.t_implicit_task_taskdata)___kmp_free((team->t.t_implicit_task_taskdata), "openmp/runtime/src/kmp_runtime.cpp"
, 3188)
;
3189 team->t.t_threads = NULL__null;
3190 team->t.t_disp_buffer = NULL__null;
3191 team->t.t_dispatch = NULL__null;
3192 team->t.t_implicit_task_taskdata = 0;
3193}
3194
3195static void __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) {
3196 kmp_info_t **oldThreads = team->t.t_threads;
3197
3198 __kmp_free(team->t.t_disp_buffer)___kmp_free((team->t.t_disp_buffer), "openmp/runtime/src/kmp_runtime.cpp"
, 3198)
;
3199 __kmp_free(team->t.t_dispatch)___kmp_free((team->t.t_dispatch), "openmp/runtime/src/kmp_runtime.cpp"
, 3199)
;
3200 __kmp_free(team->t.t_implicit_task_taskdata)___kmp_free((team->t.t_implicit_task_taskdata), "openmp/runtime/src/kmp_runtime.cpp"
, 3200)
;
3201 __kmp_allocate_team_arrays(team, max_nth);
3202
3203 KMP_MEMCPYmemcpy(team->t.t_threads, oldThreads,
3204 team->t.t_nproc * sizeof(kmp_info_t *));
3205
3206 __kmp_free(oldThreads)___kmp_free((oldThreads), "openmp/runtime/src/kmp_runtime.cpp"
, 3206)
;
3207}
3208
3209static kmp_internal_control_t __kmp_get_global_icvs(void) {
3210
3211 kmp_r_sched_t r_sched =
3212 __kmp_get_schedule_global(); // get current state of scheduling globals
3213
3214 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0)if (!(__kmp_nested_proc_bind.used > 0)) { __kmp_debug_assert
("__kmp_nested_proc_bind.used > 0", "openmp/runtime/src/kmp_runtime.cpp"
, 3214); }
;
3215
3216 kmp_internal_control_t g_icvs = {
3217 0, // int serial_nesting_level; //corresponds to value of th_team_serialized
3218 (kmp_int8)__kmp_global.g.g_dynamic, // internal control for dynamic
3219 // adjustment of threads (per thread)
3220 (kmp_int8)__kmp_env_blocktime, // int bt_set; //internal control for
3221 // whether blocktime is explicitly set
3222 __kmp_dflt_blocktime, // int blocktime; //internal control for blocktime
3223#if KMP_USE_MONITOR
3224 __kmp_bt_intervals, // int bt_intervals; //internal control for blocktime
3225// intervals
3226#endif
3227 __kmp_dflt_team_nth, // int nproc; //internal control for # of threads for
3228 // next parallel region (per thread)
3229 // (use a max ub on value if __kmp_parallel_initialize not called yet)
3230 __kmp_cg_max_nth, // int thread_limit;
3231 __kmp_dflt_max_active_levels, // int max_active_levels; //internal control
3232 // for max_active_levels
3233 r_sched, // kmp_r_sched_t sched; //internal control for runtime schedule
3234 // {sched,chunk} pair
3235 __kmp_nested_proc_bind.bind_types[0],
3236 __kmp_default_device,
3237 NULL__null // struct kmp_internal_control *next;
3238 };
3239
3240 return g_icvs;
3241}
3242
3243static kmp_internal_control_t __kmp_get_x_global_icvs(const kmp_team_t *team) {
3244
3245 kmp_internal_control_t gx_icvs;
3246 gx_icvs.serial_nesting_level =
3247 0; // probably =team->t.t_serial like in save_inter_controls
3248 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3249 gx_icvs.next = NULL__null;
3250
3251 return gx_icvs;
3252}
3253
3254static void __kmp_initialize_root(kmp_root_t *root) {
3255 int f;
3256 kmp_team_t *root_team;
3257 kmp_team_t *hot_team;
3258 int hot_team_max_nth;
3259 kmp_r_sched_t r_sched =
3260 __kmp_get_schedule_global(); // get current state of scheduling globals
3261 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3262 KMP_DEBUG_ASSERT(root)if (!(root)) { __kmp_debug_assert("root", "openmp/runtime/src/kmp_runtime.cpp"
, 3262); }
;
3263 KMP_ASSERT(!root->r.r_begin)if (!(!root->r.r_begin)) { __kmp_debug_assert("!root->r.r_begin"
, "openmp/runtime/src/kmp_runtime.cpp", 3263); }
;
3264
3265 /* setup the root state structure */
3266 __kmp_init_lock(&root->r.r_begin_lock);
3267 root->r.r_begin = FALSE0;
3268 root->r.r_active = FALSE0;
3269 root->r.r_in_parallel = 0;
3270 root->r.r_blocktime = __kmp_dflt_blocktime;
3271#if KMP_AFFINITY_SUPPORTED1
3272 root->r.r_affinity_assigned = FALSE0;
3273#endif
3274
3275 /* setup the root team for this task */
3276 /* allocate the root team structure */
3277 KF_TRACE(10, ("__kmp_initialize_root: before root_team\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_root: before root_team\n"
); }
;
3278
3279 root_team =
3280 __kmp_allocate_team(root,
3281 1, // new_nproc
3282 1, // max_nproc
3283#if OMPT_SUPPORT1
3284 ompt_data_none{0}, // root parallel id
3285#endif
3286 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3287 0 // argc
3288 USE_NESTED_HOT_ARG(NULL), __null // primary thread is unknown
3289 );
3290#if USE_DEBUGGER0
3291 // Non-NULL value should be assigned to make the debugger display the root
3292 // team.
3293 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0))((root_team->t.t_pkfn)) = (((microtask_t)(~0)));
3294#endif
3295
3296 KF_TRACE(10, ("__kmp_initialize_root: after root_team = %p\n", root_team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_root: after root_team = %p\n"
, root_team); }
;
3297
3298 root->r.r_root_team = root_team;
3299 root_team->t.t_control_stack_top = NULL__null;
3300
3301 /* initialize root team */
3302 root_team->t.t_threads[0] = NULL__null;
3303 root_team->t.t_nproc = 1;
3304 root_team->t.t_serialized = 1;
3305 // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3306 root_team->t.t_sched.sched = r_sched.sched;
3307 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n"
, root_team->t.t_id, 0, 0); }
3308 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n"
, root_team->t.t_id, 0, 0); }
3309 ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n"
, root_team->t.t_id, 0, 0); }
3310 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n"
, root_team->t.t_id, 0, 0); }
;
3311
3312 /* setup the hot team for this task */
3313 /* allocate the hot team structure */
3314 KF_TRACE(10, ("__kmp_initialize_root: before hot_team\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_root: before hot_team\n"
); }
;
3315
3316 hot_team =
3317 __kmp_allocate_team(root,
3318 1, // new_nproc
3319 __kmp_dflt_team_nth_ub * 2, // max_nproc
3320#if OMPT_SUPPORT1
3321 ompt_data_none{0}, // root parallel id
3322#endif
3323 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3324 0 // argc
3325 USE_NESTED_HOT_ARG(NULL), __null // primary thread is unknown
3326 );
3327 KF_TRACE(10, ("__kmp_initialize_root: after hot_team = %p\n", hot_team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_root: after hot_team = %p\n"
, hot_team); }
;
3328
3329 root->r.r_hot_team = hot_team;
3330 root_team->t.t_control_stack_top = NULL__null;
3331
3332 /* first-time initialization */
3333 hot_team->t.t_parent = root_team;
3334
3335 /* initialize hot team */
3336 hot_team_max_nth = hot_team->t.t_max_nproc;
3337 for (f = 0; f < hot_team_max_nth; ++f) {
3338 hot_team->t.t_threads[f] = NULL__null;
3339 }
3340 hot_team->t.t_nproc = 1;
3341 // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3342 hot_team->t.t_sched.sched = r_sched.sched;
3343 hot_team->t.t_size_changed = 0;
3344}
3345
3346#ifdef KMP_DEBUG1
3347
3348typedef struct kmp_team_list_item {
3349 kmp_team_p const *entry;
3350 struct kmp_team_list_item *next;
3351} kmp_team_list_item_t;
3352typedef kmp_team_list_item_t *kmp_team_list_t;
3353
3354static void __kmp_print_structure_team_accum( // Add team to list of teams.
3355 kmp_team_list_t list, // List of teams.
3356 kmp_team_p const *team // Team to add.
3357) {
3358
3359 // List must terminate with item where both entry and next are NULL.
3360 // Team is added to the list only once.
3361 // List is sorted in ascending order by team id.
3362 // Team id is *not* a key.
3363
3364 kmp_team_list_t l;
3365
3366 KMP_DEBUG_ASSERT(list != NULL)if (!(list != __null)) { __kmp_debug_assert("list != __null",
"openmp/runtime/src/kmp_runtime.cpp", 3366); }
;
3367 if (team == NULL__null) {
3368 return;
3369 }
3370
3371 __kmp_print_structure_team_accum(list, team->t.t_parent);
3372 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3373
3374 // Search list for the team.
3375 l = list;
3376 while (l->next != NULL__null && l->entry != team) {
3377 l = l->next;
3378 }
3379 if (l->next != NULL__null) {
3380 return; // Team has been added before, exit.
3381 }
3382
3383 // Team is not found. Search list again for insertion point.
3384 l = list;
3385 while (l->next != NULL__null && l->entry->t.t_id <= team->t.t_id) {
3386 l = l->next;
3387 }
3388
3389 // Insert team.
3390 {
3391 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(malloc(sizeof(kmp_team_list_item_t))
3392 sizeof(kmp_team_list_item_t))malloc(sizeof(kmp_team_list_item_t));
3393 *item = *l;
3394 l->entry = team;
3395 l->next = item;
3396 }
3397}
3398
3399static void __kmp_print_structure_team(char const *title, kmp_team_p const *team
3400
3401) {
3402 __kmp_printf("%s", title);
3403 if (team != NULL__null) {
3404 __kmp_printf("%2x %p\n", team->t.t_id, team);
3405 } else {
3406 __kmp_printf(" - (nil)\n");
3407 }
3408}
3409
3410static void __kmp_print_structure_thread(char const *title,
3411 kmp_info_p const *thread) {
3412 __kmp_printf("%s", title);
3413 if (thread != NULL__null) {
3414 __kmp_printf("%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3415 } else {
3416 __kmp_printf(" - (nil)\n");
3417 }
3418}
3419
3420void __kmp_print_structure(void) {
3421
3422 kmp_team_list_t list;
3423
3424 // Initialize list of teams.
3425 list =
3426 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(sizeof(kmp_team_list_item_t))malloc(sizeof(kmp_team_list_item_t));
3427 list->entry = NULL__null;
3428 list->next = NULL__null;
3429
3430 __kmp_printf("\n------------------------------\nGlobal Thread "
3431 "Table\n------------------------------\n");
3432 {
3433 int gtid;
3434 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3435 __kmp_printf("%2d", gtid);
3436 if (__kmp_threads != NULL__null) {
3437 __kmp_printf(" %p", __kmp_threads[gtid]);
3438 }
3439 if (__kmp_root != NULL__null) {
3440 __kmp_printf(" %p", __kmp_root[gtid]);
3441 }
3442 __kmp_printf("\n");
3443 }
3444 }
3445
3446 // Print out __kmp_threads array.
3447 __kmp_printf("\n------------------------------\nThreads\n--------------------"
3448 "----------\n");
3449 if (__kmp_threads != NULL__null) {
3450 int gtid;
3451 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3452 kmp_info_t const *thread = __kmp_threads[gtid];
3453 if (thread != NULL__null) {
3454 __kmp_printf("GTID %2d %p:\n", gtid, thread);
3455 __kmp_printf(" Our Root: %p\n", thread->th.th_root);
3456 __kmp_print_structure_team(" Our Team: ", thread->th.th_team);
3457 __kmp_print_structure_team(" Serial Team: ",
3458 thread->th.th_serial_team);
3459 __kmp_printf(" Threads: %2d\n", thread->th.th_team_nproc);
3460 __kmp_print_structure_thread(" Primary: ",
3461 thread->th.th_team_master);
3462 __kmp_printf(" Serialized?: %2d\n", thread->th.th_team_serialized);
3463 __kmp_printf(" Set NProc: %2d\n", thread->th.th_set_nproc);
3464 __kmp_printf(" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3465 __kmp_print_structure_thread(" Next in pool: ",
3466 thread->th.th_next_pool);
3467 __kmp_printf("\n");
3468 __kmp_print_structure_team_accum(list, thread->th.th_team);
3469 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3470 }
3471 }
3472 } else {
3473 __kmp_printf("Threads array is not allocated.\n");
3474 }
3475
3476 // Print out __kmp_root array.
3477 __kmp_printf("\n------------------------------\nUbers\n----------------------"
3478 "--------\n");
3479 if (__kmp_root != NULL__null) {
3480 int gtid;
3481 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3482 kmp_root_t const *root = __kmp_root[gtid];
3483 if (root != NULL__null) {
3484 __kmp_printf("GTID %2d %p:\n", gtid, root);
3485 __kmp_print_structure_team(" Root Team: ", root->r.r_root_team);
3486 __kmp_print_structure_team(" Hot Team: ", root->r.r_hot_team);
3487 __kmp_print_structure_thread(" Uber Thread: ",
3488 root->r.r_uber_thread);
3489 __kmp_printf(" Active?: %2d\n", root->r.r_active);
3490 __kmp_printf(" In Parallel: %2d\n",
3491 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel)(&root->r.r_in_parallel)->load(std::memory_order_relaxed
)
);
3492 __kmp_printf("\n");
3493 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3494 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3495 }
3496 }
3497 } else {
3498 __kmp_printf("Ubers array is not allocated.\n");
3499 }
3500
3501 __kmp_printf("\n------------------------------\nTeams\n----------------------"
3502 "--------\n");
3503 while (list->next != NULL__null) {
3504 kmp_team_p const *team = list->entry;
3505 int i;
3506 __kmp_printf("Team %2x %p:\n", team->t.t_id, team);
3507 __kmp_print_structure_team(" Parent Team: ", team->t.t_parent);
3508 __kmp_printf(" Primary TID: %2d\n", team->t.t_master_tid);
3509 __kmp_printf(" Max threads: %2d\n", team->t.t_max_nproc);
3510 __kmp_printf(" Levels of serial: %2d\n", team->t.t_serialized);
3511 __kmp_printf(" Number threads: %2d\n", team->t.t_nproc);
3512 for (i = 0; i < team->t.t_nproc; ++i) {
3513 __kmp_printf(" Thread %2d: ", i);
3514 __kmp_print_structure_thread("", team->t.t_threads[i]);
3515 }
3516 __kmp_print_structure_team(" Next in pool: ", team->t.t_next_pool);
3517 __kmp_printf("\n");
3518 list = list->next;
3519 }
3520
3521 // Print out __kmp_thread_pool and __kmp_team_pool.
3522 __kmp_printf("\n------------------------------\nPools\n----------------------"
3523 "--------\n");
3524 __kmp_print_structure_thread("Thread pool: ",
3525 CCAST(kmp_info_t *, __kmp_thread_pool)const_cast<kmp_info_t *>(__kmp_thread_pool));
3526 __kmp_print_structure_team("Team pool: ",
3527 CCAST(kmp_team_t *, __kmp_team_pool)const_cast<kmp_team_t *>(__kmp_team_pool));
3528 __kmp_printf("\n");
3529
3530 // Free team list.
3531 while (list != NULL__null) {
3532 kmp_team_list_item_t *item = list;
3533 list = list->next;
3534 KMP_INTERNAL_FREE(item)free(item);
3535 }
3536}
3537
3538#endif
3539
3540//---------------------------------------------------------------------------
3541// Stuff for per-thread fast random number generator
3542// Table of primes
3543static const unsigned __kmp_primes[] = {
3544 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3545 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3546 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3547 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3548 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3549 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3550 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3551 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3552 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3553 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3554 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3555
3556//---------------------------------------------------------------------------
3557// __kmp_get_random: Get a random number using a linear congruential method.
3558unsigned short __kmp_get_random(kmp_info_t *thread) {
3559 unsigned x = thread->th.th_x;
3560 unsigned short r = (unsigned short)(x >> 16);
3561
3562 thread->th.th_x = x * thread->th.th_a + 1;
3563
3564 KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n",if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_get_random: THREAD: %d, RETURN: %u\n"
, thread->th.th_info.ds.ds_tid, r); }
3565 thread->th.th_info.ds.ds_tid, r))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_get_random: THREAD: %d, RETURN: %u\n"
, thread->th.th_info.ds.ds_tid, r); }
;
3566
3567 return r;
3568}
3569//--------------------------------------------------------
3570// __kmp_init_random: Initialize a random number generator
3571void __kmp_init_random(kmp_info_t *thread) {
3572 unsigned seed = thread->th.th_info.ds.ds_tid;
3573
3574 thread->th.th_a =
3575 __kmp_primes[seed % (sizeof(__kmp_primes) / sizeof(__kmp_primes[0]))];
3576 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3577 KA_TRACE(30,if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_init_random: THREAD: %u; A: %u\n"
, seed, thread->th.th_a); }
3578 ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_init_random: THREAD: %u; A: %u\n"
, seed, thread->th.th_a); }
;
3579}
3580
3581#if KMP_OS_WINDOWS0
3582/* reclaim array entries for root threads that are already dead, returns number
3583 * reclaimed */
3584static int __kmp_reclaim_dead_roots(void) {
3585 int i, r = 0;
3586
3587 for (i = 0; i < __kmp_threads_capacity; ++i) {
3588 if (KMP_UBER_GTID(i) &&
3589 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])((void *)(__kmp_threads[i]))) &&
3590 !__kmp_root[i]
3591 ->r.r_active) { // AC: reclaim only roots died in non-active state
3592 r += __kmp_unregister_root_other_thread(i);
3593 }
3594 }
3595 return r;
3596}
3597#endif
3598
3599/* This function attempts to create free entries in __kmp_threads and
3600 __kmp_root, and returns the number of free entries generated.
3601
3602 For Windows* OS static library, the first mechanism used is to reclaim array
3603 entries for root threads that are already dead.
3604
3605 On all platforms, expansion is attempted on the arrays __kmp_threads_ and
3606 __kmp_root, with appropriate update to __kmp_threads_capacity. Array
3607 capacity is increased by doubling with clipping to __kmp_tp_capacity, if
3608 threadprivate cache array has been created. Synchronization with
3609 __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock.
3610
3611 After any dead root reclamation, if the clipping value allows array expansion
3612 to result in the generation of a total of nNeed free slots, the function does
3613 that expansion. If not, nothing is done beyond the possible initial root
3614 thread reclamation.
3615
3616 If any argument is negative, the behavior is undefined. */
3617static int __kmp_expand_threads(int nNeed) {
3618 int added = 0;
3619 int minimumRequiredCapacity;
3620 int newCapacity;
3621 kmp_info_t **newThreads;
3622 kmp_root_t **newRoot;
3623
3624 // All calls to __kmp_expand_threads should be under __kmp_forkjoin_lock, so
3625 // resizing __kmp_threads does not need additional protection if foreign
3626 // threads are present
3627
3628#if KMP_OS_WINDOWS0 && !KMP_DYNAMIC_LIB1
3629 /* only for Windows static library */
3630 /* reclaim array entries for root threads that are already dead */
3631 added = __kmp_reclaim_dead_roots();
3632
3633 if (nNeed) {
3634 nNeed -= added;
3635 if (nNeed < 0)
3636 nNeed = 0;
3637 }
3638#endif
3639 if (nNeed <= 0)
3640 return added;
3641
3642 // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. If
3643 // __kmp_max_nth is set to some value less than __kmp_sys_max_nth by the
3644 // user via KMP_DEVICE_THREAD_LIMIT, then __kmp_threads_capacity may become
3645 // > __kmp_max_nth in one of two ways:
3646 //
3647 // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0]
3648 // may not be reused by another thread, so we may need to increase
3649 // __kmp_threads_capacity to __kmp_max_nth + 1.
3650 //
3651 // 2) New foreign root(s) are encountered. We always register new foreign
3652 // roots. This may cause a smaller # of threads to be allocated at
3653 // subsequent parallel regions, but the worker threads hang around (and
3654 // eventually go to sleep) and need slots in the __kmp_threads[] array.
3655 //
3656 // Anyway, that is the reason for moving the check to see if
3657 // __kmp_max_nth was exceeded into __kmp_reserve_threads()
3658 // instead of having it performed here. -BB
3659
3660 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity)if (!(__kmp_sys_max_nth >= __kmp_threads_capacity)) { __kmp_debug_assert
("__kmp_sys_max_nth >= __kmp_threads_capacity", "openmp/runtime/src/kmp_runtime.cpp"
, 3660); }
;
3661
3662 /* compute expansion headroom to check if we can expand */
3663 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3664 /* possible expansion too small -- give up */
3665 return added;
3666 }
3667 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3668
3669 newCapacity = __kmp_threads_capacity;
3670 do {
3671 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3672 : __kmp_sys_max_nth;
3673 } while (newCapacity < minimumRequiredCapacity);
3674 newThreads = (kmp_info_t **)__kmp_allocate(___kmp_allocate(((sizeof(kmp_info_t *) + sizeof(kmp_root_t *)
) * newCapacity + 64), "openmp/runtime/src/kmp_runtime.cpp", 3675
)
3675 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE)___kmp_allocate(((sizeof(kmp_info_t *) + sizeof(kmp_root_t *)
) * newCapacity + 64), "openmp/runtime/src/kmp_runtime.cpp", 3675
)
;
3676 newRoot =
3677 (kmp_root_t **)((char *)newThreads + sizeof(kmp_info_t *) * newCapacity);
3678 KMP_MEMCPYmemcpy(newThreads, __kmp_threads,
3679 __kmp_threads_capacity * sizeof(kmp_info_t *));
3680 KMP_MEMCPYmemcpy(newRoot, __kmp_root,
3681 __kmp_threads_capacity * sizeof(kmp_root_t *));
3682 // Put old __kmp_threads array on a list. Any ongoing references to the old
3683 // list will be valid. This list is cleaned up at library shutdown.
3684 kmp_old_threads_list_t *node =
3685 (kmp_old_threads_list_t *)__kmp_allocate(sizeof(kmp_old_threads_list_t))___kmp_allocate((sizeof(kmp_old_threads_list_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 3685)
;
3686 node->threads = __kmp_threads;
3687 node->next = __kmp_old_threads_list;
3688 __kmp_old_threads_list = node;
3689
3690 *(kmp_info_t * *volatile *)&__kmp_threads = newThreads;
3691 *(kmp_root_t * *volatile *)&__kmp_root = newRoot;
3692 added += newCapacity - __kmp_threads_capacity;
3693 *(volatile int *)&__kmp_threads_capacity = newCapacity;
3694
3695 if (newCapacity > __kmp_tp_capacity) {
3696 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3697 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3698 __kmp_threadprivate_resize_cache(newCapacity);
3699 } else { // increase __kmp_tp_capacity to correspond with kmp_threads size
3700 *(volatile int *)&__kmp_tp_capacity = newCapacity;
3701 }
3702 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3703 }
3704
3705 return added;
3706}
3707
3708/* Register the current thread as a root thread and obtain our gtid. We must
3709 have the __kmp_initz_lock held at this point. Argument TRUE only if are the
3710 thread that calls from __kmp_do_serial_initialize() */
3711int __kmp_register_root(int initial_thread) {
3712 kmp_info_t *root_thread;
3713 kmp_root_t *root;
3714 int gtid;
3715 int capacity;
3716 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3717 KA_TRACE(20, ("__kmp_register_root: entered\n"))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_register_root: entered\n"
); }
;
3718 KMP_MB();
3719
3720 /* 2007-03-02:
3721 If initial thread did not invoke OpenMP RTL yet, and this thread is not an
3722 initial one, "__kmp_all_nth >= __kmp_threads_capacity" condition does not
3723 work as expected -- it may return false (that means there is at least one
3724 empty slot in __kmp_threads array), but it is possible the only free slot
3725 is #0, which is reserved for initial thread and so cannot be used for this
3726 one. Following code workarounds this bug.
3727
3728 However, right solution seems to be not reserving slot #0 for initial
3729 thread because:
3730 (1) there is no magic in slot #0,
3731 (2) we cannot detect initial thread reliably (the first thread which does
3732 serial initialization may be not a real initial thread).
3733 */
3734 capacity = __kmp_threads_capacity;
3735 if (!initial_thread && TCR_PTR(__kmp_threads[0])((void *)(__kmp_threads[0])) == NULL__null) {
3736 --capacity;
3737 }
3738
3739 // If it is not for initializing the hidden helper team, we need to take
3740 // __kmp_hidden_helper_threads_num out of the capacity because it is included
3741 // in __kmp_threads_capacity.
3742 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)(__kmp_init_hidden_helper_threads)) {
3743 capacity -= __kmp_hidden_helper_threads_num;
3744 }
3745
3746 /* see if there are too many threads */
3747 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3748 if (__kmp_tp_cached) {
3749 __kmp_fatal(KMP_MSG(CantRegisterNewThread)__kmp_msg_format(kmp_i18n_msg_CantRegisterNewThread),
3750 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity)__kmp_msg_format(kmp_i18n_hnt_Set_ALL_THREADPRIVATE, __kmp_tp_capacity
)
,
3751 KMP_HNT(PossibleSystemLimitOnThreads)__kmp_msg_format(kmp_i18n_hnt_PossibleSystemLimitOnThreads), __kmp_msg_null);
3752 } else {
3753 __kmp_fatal(KMP_MSG(CantRegisterNewThread)__kmp_msg_format(kmp_i18n_msg_CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads)__kmp_msg_format(kmp_i18n_hnt_SystemLimitOnThreads),
3754 __kmp_msg_null);
3755 }
3756 }
3757
3758 // When hidden helper task is enabled, __kmp_threads is organized as follows:
3759 // 0: initial thread, also a regular OpenMP thread.
3760 // [1, __kmp_hidden_helper_threads_num]: slots for hidden helper threads.
3761 // [__kmp_hidden_helper_threads_num + 1, __kmp_threads_capacity): slots for
3762 // regular OpenMP threads.
3763 if (TCR_4(__kmp_init_hidden_helper_threads)(__kmp_init_hidden_helper_threads)) {
3764 // Find an available thread slot for hidden helper thread. Slots for hidden
3765 // helper threads start from 1 to __kmp_hidden_helper_threads_num.
3766 for (gtid = 1; TCR_PTR(__kmp_threads[gtid])((void *)(__kmp_threads[gtid])) != NULL__null &&
3767 gtid <= __kmp_hidden_helper_threads_num;
3768 gtid++)
3769 ;
3770 KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num)if (!(gtid <= __kmp_hidden_helper_threads_num)) { __kmp_debug_assert
("gtid <= __kmp_hidden_helper_threads_num", "openmp/runtime/src/kmp_runtime.cpp"
, 3770); }
;
3771 KA_TRACE(1, ("__kmp_register_root: found slot in threads array for "if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_register_root: found slot in threads array for "
"hidden helper thread: T#%d\n", gtid); }
3772 "hidden helper thread: T#%d\n",if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_register_root: found slot in threads array for "
"hidden helper thread: T#%d\n", gtid); }
3773 gtid))if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_register_root: found slot in threads array for "
"hidden helper thread: T#%d\n", gtid); }
;
3774 } else {
3775 /* find an available thread slot */
3776 // Don't reassign the zero slot since we need that to only be used by
3777 // initial thread. Slots for hidden helper threads should also be skipped.
3778 if (initial_thread && TCR_PTR(__kmp_threads[0])((void *)(__kmp_threads[0])) == NULL__null) {
3779 gtid = 0;
3780 } else {
3781 for (gtid = __kmp_hidden_helper_threads_num + 1;
3782 TCR_PTR(__kmp_threads[gtid])((void *)(__kmp_threads[gtid])) != NULL__null; gtid++)
3783 ;
3784 }
3785 KA_TRACE(if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_register_root: found slot in threads array: T#%d\n"
, gtid); }
3786 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid))if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_register_root: found slot in threads array: T#%d\n"
, gtid); }
;
3787 KMP_ASSERT(gtid < __kmp_threads_capacity)if (!(gtid < __kmp_threads_capacity)) { __kmp_debug_assert
("gtid < __kmp_threads_capacity", "openmp/runtime/src/kmp_runtime.cpp"
, 3787); }
;
3788 }
3789
3790 /* update global accounting */
3791 __kmp_all_nth++;
3792 TCW_4(__kmp_nth, __kmp_nth + 1)(__kmp_nth) = (__kmp_nth + 1);
3793
3794 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
3795 // numbers of procs, and method #2 (keyed API call) for higher numbers.
3796 if (__kmp_adjust_gtid_mode) {
3797 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3798 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) != 2) {
3799 TCW_4(__kmp_gtid_mode, 2)(__kmp_gtid_mode) = (2);
3800 }
3801 } else {
3802 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) != 1) {
3803 TCW_4(__kmp_gtid_mode, 1)(__kmp_gtid_mode) = (1);
3804 }
3805 }
3806 }
3807
3808#ifdef KMP_ADJUST_BLOCKTIME1
3809 /* Adjust blocktime to zero if necessary */
3810 /* Middle initialization might not have occurred yet */
3811 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3812 if (__kmp_nth > __kmp_avail_proc) {
3813 __kmp_zero_bt = TRUE(!0);
3814 }
3815 }
3816#endif /* KMP_ADJUST_BLOCKTIME */
3817
3818 /* setup this new hierarchy */
3819 if (!(root = __kmp_root[gtid])) {
3820 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(sizeof(kmp_root_t))___kmp_allocate((sizeof(kmp_root_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 3820)
;
3821 KMP_DEBUG_ASSERT(!root->r.r_root_team)if (!(!root->r.r_root_team)) { __kmp_debug_assert("!root->r.r_root_team"
, "openmp/runtime/src/kmp_runtime.cpp", 3821); }
;
3822 }
3823
3824#if KMP_STATS_ENABLED0
3825 // Initialize stats as soon as possible (right after gtid assignment).
3826 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3827 __kmp_stats_thread_ptr->startLife();
3828 KMP_SET_THREAD_STATE(SERIAL_REGION)((void)0);
3829 KMP_INIT_PARTITIONED_TIMERS(OMP_serial)((void)0);
3830#endif
3831 __kmp_initialize_root(root);
3832
3833 /* setup new root thread structure */
3834 if (root->r.r_uber_thread) {
3835 root_thread = root->r.r_uber_thread;
3836 } else {
3837 root_thread = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t))___kmp_allocate((sizeof(kmp_info_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 3837)
;
3838 if (__kmp_storage_map) {
3839 __kmp_print_thread_storage_map(root_thread, gtid);
3840 }
3841 root_thread->th.th_info.ds.ds_gtid = gtid;
3842#if OMPT_SUPPORT1
3843 root_thread->th.ompt_thread_info.thread_data = ompt_data_none{0};
3844#endif
3845 root_thread->th.th_root = root;
3846 if (__kmp_env_consistency_check) {
3847 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3848 }
3849#if USE_FAST_MEMORY3
3850 __kmp_initialize_fast_memory(root_thread);
3851#endif /* USE_FAST_MEMORY */
3852
3853#if KMP_USE_BGET1
3854 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL)if (!(root_thread->th.th_local.bget_data == __null)) { __kmp_debug_assert
("root_thread->th.th_local.bget_data == __null", "openmp/runtime/src/kmp_runtime.cpp"
, 3854); }
;
3855 __kmp_initialize_bget(root_thread);
3856#endif
3857 __kmp_init_random(root_thread); // Initialize random number generator
3858 }
3859
3860 /* setup the serial team held in reserve by the root thread */
3861 if (!root_thread->th.th_serial_team) {
3862 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3863 KF_TRACE(10, ("__kmp_register_root: before serial_team\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_register_root: before serial_team\n"
); }
;
3864 root_thread->th.th_serial_team = __kmp_allocate_team(
3865 root, 1, 1,
3866#if OMPT_SUPPORT1
3867 ompt_data_none{0}, // root parallel id
3868#endif
3869 proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL), __null);
3870 }
3871 KMP_ASSERT(root_thread->th.th_serial_team)if (!(root_thread->th.th_serial_team)) { __kmp_debug_assert
("root_thread->th.th_serial_team", "openmp/runtime/src/kmp_runtime.cpp"
, 3871); }
;
3872 KF_TRACE(10, ("__kmp_register_root: after serial_team = %p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_register_root: after serial_team = %p\n"
, root_thread->th.th_serial_team); }
3873 root_thread->th.th_serial_team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_register_root: after serial_team = %p\n"
, root_thread->th.th_serial_team); }
;
3874
3875 /* drop root_thread into place */
3876 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread)((__kmp_threads[gtid])) = ((root_thread));
3877
3878 root->r.r_root_team->t.t_threads[0] = root_thread;
3879 root->r.r_hot_team->t.t_threads[0] = root_thread;
3880 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3881 // AC: the team created in reserve, not for execution (it is unused for now).
3882 root_thread->th.th_serial_team->t.t_serialized = 0;
3883 root->r.r_uber_thread = root_thread;
3884
3885 /* initialize the thread, get it ready to go */
3886 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3887 TCW_4(__kmp_init_gtid, TRUE)(__kmp_init_gtid) = ((!0));
3888
3889 /* prepare the primary thread for get_gtid() */
3890 __kmp_gtid_set_specific(gtid);
3891
3892#if USE_ITT_BUILD1
3893 __kmp_itt_thread_name(gtid);
3894#endif /* USE_ITT_BUILD */
3895
3896#ifdef KMP_TDATA_GTID1
3897 __kmp_gtid = gtid;
3898#endif
3899 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3900 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid)if (!(__kmp_gtid_get_specific() == gtid)) { __kmp_debug_assert
("__kmp_gtid_get_specific() == gtid", "openmp/runtime/src/kmp_runtime.cpp"
, 3900); }
;
3901
3902 KA_TRACE(20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
"plain=%u\n", gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team
), root->r.r_hot_team->t.t_id, 0, 0, 0); }
3903 "plain=%u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
"plain=%u\n", gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team
), root->r.r_hot_team->t.t_id, 0, 0, 0); }
3904 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
"plain=%u\n", gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team
), root->r.r_hot_team->t.t_id, 0, 0, 0); }
3905 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
"plain=%u\n", gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team
), root->r.r_hot_team->t.t_id, 0, 0, 0); }
3906 KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
"plain=%u\n", gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team
), root->r.r_hot_team->t.t_id, 0, 0, 0); }
;
3907 { // Initialize barrier data.
3908 int b;
3909 for (b = 0; b < bs_last_barrier; ++b) {
3910 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE0;
3911#if USE_DEBUGGER0
3912 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3913#endif
3914 }
3915 }
3916 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==if (!(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].
b_arrived == 0)) { __kmp_debug_assert("root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived == 0"
, "openmp/runtime/src/kmp_runtime.cpp", 3917); }
3917 KMP_INIT_BARRIER_STATE)if (!(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].
b_arrived == 0)) { __kmp_debug_assert("root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived == 0"
, "openmp/runtime/src/kmp_runtime.cpp", 3917); }
;
3918
3919#if KMP_AFFINITY_SUPPORTED1
3920 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED(-2);
3921 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED(-2);
3922 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED(-2);
3923 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED(-2);
3924#endif /* KMP_AFFINITY_SUPPORTED */
3925 root_thread->th.th_def_allocator = __kmp_def_allocator;
3926 root_thread->th.th_prev_level = 0;
3927 root_thread->th.th_prev_num_threads = 1;
3928
3929 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(sizeof(kmp_cg_root_t))___kmp_allocate((sizeof(kmp_cg_root_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 3929)
;
3930 tmp->cg_root = root_thread;
3931 tmp->cg_thread_limit = __kmp_cg_max_nth;
3932 tmp->cg_nthreads = 1;
3933 KA_TRACE(100, ("__kmp_register_root: Thread %p created node %p with"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_register_root: Thread %p created node %p with"
" cg_nthreads init to 1\n", root_thread, tmp); }
3934 " cg_nthreads init to 1\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_register_root: Thread %p created node %p with"
" cg_nthreads init to 1\n", root_thread, tmp); }
3935 root_thread, tmp))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_register_root: Thread %p created node %p with"
" cg_nthreads init to 1\n", root_thread, tmp); }
;
3936 tmp->up = NULL__null;
3937 root_thread->th.th_cg_roots = tmp;
3938
3939 __kmp_root_counter++;
3940
3941#if OMPT_SUPPORT1
3942 if (!initial_thread && ompt_enabled.enabled) {
3943
3944 kmp_info_t *root_thread = ompt_get_thread();
3945
3946 ompt_set_thread_state(root_thread, ompt_state_overhead);
3947
3948 if (ompt_enabled.ompt_callback_thread_begin) {
3949 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)ompt_callback_thread_begin_callback(
3950 ompt_thread_initial, __ompt_get_thread_data_internal());
3951 }
3952 ompt_data_t *task_data;
3953 ompt_data_t *parallel_data;
3954 __ompt_get_task_info_internal(0, NULL__null, &task_data, NULL__null, &parallel_data,
3955 NULL__null);
3956 if (ompt_enabled.ompt_callback_implicit_task) {
3957 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
3958 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
3959 }
3960
3961 ompt_set_thread_state(root_thread, ompt_state_work_serial);
3962 }
3963#endif
3964#if OMPD_SUPPORT1
3965 if (ompd_state & OMPD_ENABLE_BP0x1)
3966 ompd_bp_thread_begin();
3967#endif
3968
3969 KMP_MB();
3970 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3971
3972 return gtid;
3973}
3974
3975#if KMP_NESTED_HOT_TEAMS1
3976static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr, int level,
3977 const int max_level) {
3978 int i, n, nth;
3979 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3980 if (!hot_teams || !hot_teams[level].hot_team) {
3981 return 0;
3982 }
3983 KMP_DEBUG_ASSERT(level < max_level)if (!(level < max_level)) { __kmp_debug_assert("level < max_level"
, "openmp/runtime/src/kmp_runtime.cpp", 3983); }
;
3984 kmp_team_t *team = hot_teams[level].hot_team;
3985 nth = hot_teams[level].hot_team_nth;
3986 n = nth - 1; // primary thread is not freed
3987 if (level < max_level - 1) {
3988 for (i = 0; i < nth; ++i) {
3989 kmp_info_t *th = team->t.t_threads[i];
3990 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
3991 if (i > 0 && th->th.th_hot_teams) {
3992 __kmp_free(th->th.th_hot_teams)___kmp_free((th->th.th_hot_teams), "openmp/runtime/src/kmp_runtime.cpp"
, 3992)
;
3993 th->th.th_hot_teams = NULL__null;
3994 }
3995 }
3996 }
3997 __kmp_free_team(root, team, NULL__null);
3998 return n;
3999}
4000#endif
4001
4002// Resets a root thread and clear its root and hot teams.
4003// Returns the number of __kmp_threads entries directly and indirectly freed.
4004static int __kmp_reset_root(int gtid, kmp_root_t *root) {
4005 kmp_team_t *root_team = root->r.r_root_team;
4006 kmp_team_t *hot_team = root->r.r_hot_team;
4007 int n = hot_team->t.t_nproc;
4008 int i;
4009
4010 KMP_DEBUG_ASSERT(!root->r.r_active)if (!(!root->r.r_active)) { __kmp_debug_assert("!root->r.r_active"
, "openmp/runtime/src/kmp_runtime.cpp", 4010); }
;
4011
4012 root->r.r_root_team = NULL__null;
4013 root->r.r_hot_team = NULL__null;
4014 // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team
4015 // before call to __kmp_free_team().
4016 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL), __null);
4017#if KMP_NESTED_HOT_TEAMS1
4018 if (__kmp_hot_teams_max_level >
4019 0) { // need to free nested hot teams and their threads if any
4020 for (i = 0; i < hot_team->t.t_nproc; ++i) {
4021 kmp_info_t *th = hot_team->t.t_threads[i];
4022 if (__kmp_hot_teams_max_level > 1) {
4023 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
4024 }
4025 if (th->th.th_hot_teams) {
4026 __kmp_free(th->th.th_hot_teams)___kmp_free((th->th.th_hot_teams), "openmp/runtime/src/kmp_runtime.cpp"
, 4026)
;
4027 th->th.th_hot_teams = NULL__null;
4028 }
4029 }
4030 }
4031#endif
4032 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL), __null);
4033
4034 // Before we can reap the thread, we need to make certain that all other
4035 // threads in the teams that had this root as ancestor have stopped trying to
4036 // steal tasks.
4037 if (__kmp_tasking_mode != tskm_immediate_exec) {
4038 __kmp_wait_to_unref_task_teams();
4039 }
4040
4041#if KMP_OS_WINDOWS0
4042 /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */
4043 KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_reset_root: free handle, th = %p, handle = %"
"lu" "\n", (LPVOID) & (root->r.r_uber_thread->th),
root->r.r_uber_thread->th.th_info.ds.ds_thread); }
4044 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPECif (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_reset_root: free handle, th = %p, handle = %"
"lu" "\n", (LPVOID) & (root->r.r_uber_thread->th),
root->r.r_uber_thread->th.th_info.ds.ds_thread); }
4045 "\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_reset_root: free handle, th = %p, handle = %"
"lu" "\n", (LPVOID) & (root->r.r_uber_thread->th),
root->r.r_uber_thread->th.th_info.ds.ds_thread); }
4046 (LPVOID) & (root->r.r_uber_thread->th),if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_reset_root: free handle, th = %p, handle = %"
"lu" "\n", (LPVOID) & (root->r.r_uber_thread->th),
root->r.r_uber_thread->th.th_info.ds.ds_thread); }
4047 root->r.r_uber_thread->th.th_info.ds.ds_thread))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_reset_root: free handle, th = %p, handle = %"
"lu" "\n", (LPVOID) & (root->r.r_uber_thread->th),
root->r.r_uber_thread->th.th_info.ds.ds_thread); }
;
4048 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
4049#endif /* KMP_OS_WINDOWS */
4050
4051#if OMPD_SUPPORT1
4052 if (ompd_state & OMPD_ENABLE_BP0x1)
4053 ompd_bp_thread_end();
4054#endif
4055
4056#if OMPT_SUPPORT1
4057 ompt_data_t *task_data;
4058 ompt_data_t *parallel_data;
4059 __ompt_get_task_info_internal(0, NULL__null, &task_data, NULL__null, &parallel_data,
4060 NULL__null);
4061 if (ompt_enabled.ompt_callback_implicit_task) {
4062 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
4063 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
4064 }
4065 if (ompt_enabled.ompt_callback_thread_end) {
4066 ompt_callbacks.ompt_callback(ompt_callback_thread_end)ompt_callback_thread_end_callback(
4067 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4068 }
4069#endif
4070
4071 TCW_4(__kmp_nth,(__kmp_nth) = (__kmp_nth - 1)
4072 __kmp_nth - 1)(__kmp_nth) = (__kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
4073 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4074 KA_TRACE(100, ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
" to %d\n", root->r.r_uber_thread, root->r.r_uber_thread
->th.th_cg_roots, root->r.r_uber_thread->th.th_cg_roots
->cg_nthreads); }
4075 " to %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
" to %d\n", root->r.r_uber_thread, root->r.r_uber_thread
->th.th_cg_roots, root->r.r_uber_thread->th.th_cg_roots
->cg_nthreads); }
4076 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
" to %d\n", root->r.r_uber_thread, root->r.r_uber_thread
->th.th_cg_roots, root->r.r_uber_thread->th.th_cg_roots
->cg_nthreads); }
4077 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
" to %d\n", root->r.r_uber_thread, root->r.r_uber_thread
->th.th_cg_roots, root->r.r_uber_thread->th.th_cg_roots
->cg_nthreads); }
;
4078 if (i == 1) {
4079 // need to free contention group structure
4080 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==if (!(root->r.r_uber_thread == root->r.r_uber_thread->
th.th_cg_roots->cg_root)) { __kmp_debug_assert("root->r.r_uber_thread == root->r.r_uber_thread->th.th_cg_roots->cg_root"
, "openmp/runtime/src/kmp_runtime.cpp", 4081); }
4081 root->r.r_uber_thread->th.th_cg_roots->cg_root)if (!(root->r.r_uber_thread == root->r.r_uber_thread->
th.th_cg_roots->cg_root)) { __kmp_debug_assert("root->r.r_uber_thread == root->r.r_uber_thread->th.th_cg_roots->cg_root"
, "openmp/runtime/src/kmp_runtime.cpp", 4081); }
;
4082 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL)if (!(root->r.r_uber_thread->th.th_cg_roots->up == __null
)) { __kmp_debug_assert("root->r.r_uber_thread->th.th_cg_roots->up == __null"
, "openmp/runtime/src/kmp_runtime.cpp", 4082); }
;
4083 __kmp_free(root->r.r_uber_thread->th.th_cg_roots)___kmp_free((root->r.r_uber_thread->th.th_cg_roots), "openmp/runtime/src/kmp_runtime.cpp"
, 4083)
;
4084 root->r.r_uber_thread->th.th_cg_roots = NULL__null;
4085 }
4086 __kmp_reap_thread(root->r.r_uber_thread, 1);
4087
4088 // We canot put root thread to __kmp_thread_pool, so we have to reap it
4089 // instead of freeing.
4090 root->r.r_uber_thread = NULL__null;
4091 /* mark root as no longer in use */
4092 root->r.r_begin = FALSE0;
4093
4094 return n;
4095}
4096
4097void __kmp_unregister_root_current_thread(int gtid) {
4098 KA_TRACE(1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid))if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_unregister_root_current_thread: enter T#%d\n"
, gtid); }
;
4099 /* this lock should be ok, since unregister_root_current_thread is never
4100 called during an abort, only during a normal close. furthermore, if you
4101 have the forkjoin lock, you should never try to get the initz lock */
4102 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
4103 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done) || !__kmp_init_serial) {
4104 KC_TRACE(10, ("__kmp_unregister_root_current_thread: already finished, "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_current_thread: already finished, "
"exiting T#%d\n", gtid); }
4105 "exiting T#%d\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_current_thread: already finished, "
"exiting T#%d\n", gtid); }
4106 gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_current_thread: already finished, "
"exiting T#%d\n", gtid); }
;
4107 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4108 return;
4109 }
4110 kmp_root_t *root = __kmp_root[gtid];
4111
4112 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid])if (!(__kmp_threads && __kmp_threads[gtid])) { __kmp_debug_assert
("__kmp_threads && __kmp_threads[gtid]", "openmp/runtime/src/kmp_runtime.cpp"
, 4112); }
;
4113 KMP_ASSERT(KMP_UBER_GTID(gtid))if (!(KMP_UBER_GTID(gtid))) { __kmp_debug_assert("KMP_UBER_GTID(gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 4113); }
;
4114 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root)if (!(root == __kmp_threads[gtid]->th.th_root)) { __kmp_debug_assert
("root == __kmp_threads[gtid]->th.th_root", "openmp/runtime/src/kmp_runtime.cpp"
, 4114); }
;
4115 KMP_ASSERT(root->r.r_active == FALSE)if (!(root->r.r_active == 0)) { __kmp_debug_assert("root->r.r_active == FALSE"
, "openmp/runtime/src/kmp_runtime.cpp", 4115); }
;
4116
4117 KMP_MB();
4118
4119 kmp_info_t *thread = __kmp_threads[gtid];
4120 kmp_team_t *team = thread->th.th_team;
4121 kmp_task_team_t *task_team = thread->th.th_task_team;
4122
4123 // we need to wait for the proxy tasks before finishing the thread
4124 if (task_team != NULL__null && (task_team->tt.tt_found_proxy_tasks ||
4125 task_team->tt.tt_hidden_helper_task_encountered)) {
4126#if OMPT_SUPPORT1
4127 // the runtime is shutting down so we won't report any events
4128 thread->th.ompt_thread_info.state = ompt_state_undefined;
4129#endif
4130 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL), __null);
4131 }
4132
4133 __kmp_reset_root(gtid, root);
4134
4135 KMP_MB();
4136 KC_TRACE(10,if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_current_thread: T#%d unregistered\n"
, gtid); }
4137 ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_current_thread: T#%d unregistered\n"
, gtid); }
;
4138
4139 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4140}
4141
4142#if KMP_OS_WINDOWS0
4143/* __kmp_forkjoin_lock must be already held
4144 Unregisters a root thread that is not the current thread. Returns the number
4145 of __kmp_threads entries freed as a result. */
4146static int __kmp_unregister_root_other_thread(int gtid) {
4147 kmp_root_t *root = __kmp_root[gtid];
4148 int r;
4149
4150 KA_TRACE(1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid))if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_unregister_root_other_thread: enter T#%d\n"
, gtid); }
;
4151 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid])if (!(__kmp_threads && __kmp_threads[gtid])) { __kmp_debug_assert
("__kmp_threads && __kmp_threads[gtid]", "openmp/runtime/src/kmp_runtime.cpp"
, 4151); }
;
4152 KMP_ASSERT(KMP_UBER_GTID(gtid))if (!(KMP_UBER_GTID(gtid))) { __kmp_debug_assert("KMP_UBER_GTID(gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 4152); }
;
4153 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root)if (!(root == __kmp_threads[gtid]->th.th_root)) { __kmp_debug_assert
("root == __kmp_threads[gtid]->th.th_root", "openmp/runtime/src/kmp_runtime.cpp"
, 4153); }
;
4154 KMP_ASSERT(root->r.r_active == FALSE)if (!(root->r.r_active == 0)) { __kmp_debug_assert("root->r.r_active == FALSE"
, "openmp/runtime/src/kmp_runtime.cpp", 4154); }
;
4155
4156 r = __kmp_reset_root(gtid, root);
4157 KC_TRACE(10,if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_other_thread: T#%d unregistered\n"
, gtid); }
4158 ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_other_thread: T#%d unregistered\n"
, gtid); }
;
4159 return r;
4160}
4161#endif
4162
4163#if KMP_DEBUG1
4164void __kmp_task_info() {
4165
4166 kmp_int32 gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
4167 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4168 kmp_info_t *this_thr = __kmp_threads[gtid];
4169 kmp_team_t *steam = this_thr->th.th_serial_team;
4170 kmp_team_t *team = this_thr->th.th_team;
4171
4172 __kmp_printf(
4173 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4174 "ptask=%p\n",
4175 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4176 team->t.t_implicit_task_taskdata[tid].td_parent);
4177}
4178#endif // KMP_DEBUG
4179
4180/* TODO optimize with one big memclr, take out what isn't needed, split
4181 responsibility to workers as much as possible, and delay initialization of
4182 features as much as possible */
4183static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4184 int tid, int gtid) {
4185 /* this_thr->th.th_info.ds.ds_gtid is setup in
4186 kmp_allocate_thread/create_worker.
4187 this_thr->th.th_serial_team is setup in __kmp_allocate_thread */
4188 KMP_DEBUG_ASSERT(this_thr != NULL)if (!(this_thr != __null)) { __kmp_debug_assert("this_thr != __null"
, "openmp/runtime/src/kmp_runtime.cpp", 4188); }
;
4189 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team)if (!(this_thr->th.th_serial_team)) { __kmp_debug_assert("this_thr->th.th_serial_team"
, "openmp/runtime/src/kmp_runtime.cpp", 4189); }
;
4190 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 4190); }
;
4191 KMP_DEBUG_ASSERT(team->t.t_threads)if (!(team->t.t_threads)) { __kmp_debug_assert("team->t.t_threads"
, "openmp/runtime/src/kmp_runtime.cpp", 4191); }
;
4192 KMP_DEBUG_ASSERT(team->t.t_dispatch)if (!(team->t.t_dispatch)) { __kmp_debug_assert("team->t.t_dispatch"
, "openmp/runtime/src/kmp_runtime.cpp", 4192); }
;
4193 kmp_info_t *master = team->t.t_threads[0];
4194 KMP_DEBUG_ASSERT(master)if (!(master)) { __kmp_debug_assert("master", "openmp/runtime/src/kmp_runtime.cpp"
, 4194); }
;
4195 KMP_DEBUG_ASSERT(master->th.th_root)if (!(master->th.th_root)) { __kmp_debug_assert("master->th.th_root"
, "openmp/runtime/src/kmp_runtime.cpp", 4195); }
;
4196
4197 KMP_MB();
4198
4199 TCW_SYNC_PTR(this_thr->th.th_team, team)((this_thr->th.th_team)) = ((team));
4200
4201 this_thr->th.th_info.ds.ds_tid = tid;
4202 this_thr->th.th_set_nproc = 0;
4203 if (__kmp_tasking_mode != tskm_immediate_exec)
4204 // When tasking is possible, threads are not safe to reap until they are
4205 // done tasking; this will be set when tasking code is exited in wait
4206 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP0;
4207 else // no tasking --> always safe to reap
4208 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP1;
4209 this_thr->th.th_set_proc_bind = proc_bind_default;
4210#if KMP_AFFINITY_SUPPORTED1
4211 this_thr->th.th_new_place = this_thr->th.th_current_place;
4212#endif
4213 this_thr->th.th_root = master->th.th_root;
4214
4215 /* setup the thread's cache of the team structure */
4216 this_thr->th.th_team_nproc = team->t.t_nproc;
4217 this_thr->th.th_team_master = master;
4218 this_thr->th.th_team_serialized = team->t.t_serialized;
4219
4220 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata)if (!(team->t.t_implicit_task_taskdata)) { __kmp_debug_assert
("team->t.t_implicit_task_taskdata", "openmp/runtime/src/kmp_runtime.cpp"
, 4220); }
;
4221
4222 KF_TRACE(10, ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n"
, tid, gtid, this_thr, this_thr->th.th_current_task); }
4223 tid, gtid, this_thr, this_thr->th.th_current_task))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n"
, tid, gtid, this_thr, this_thr->th.th_current_task); }
;
4224
4225 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4226 team, tid, TRUE(!0));
4227
4228 KF_TRACE(10, ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n"
, tid, gtid, this_thr, this_thr->th.th_current_task); }
4229 tid, gtid, this_thr, this_thr->th.th_current_task))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n"
, tid, gtid, this_thr, this_thr->th.th_current_task); }
;
4230 // TODO: Initialize ICVs from parent; GEH - isn't that already done in
4231 // __kmp_initialize_team()?
4232
4233 /* TODO no worksharing in speculative threads */
4234 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4235
4236 this_thr->th.th_local.this_construct = 0;
4237
4238 if (!this_thr->th.th_pri_common) {
4239 this_thr->th.th_pri_common =
4240 (struct common_table *)__kmp_allocate(sizeof(struct common_table))___kmp_allocate((sizeof(struct common_table)), "openmp/runtime/src/kmp_runtime.cpp"
, 4240)
;
4241 if (__kmp_storage_map) {
4242 __kmp_print_storage_map_gtid(
4243 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4244 sizeof(struct common_table), "th_%d.th_pri_common\n", gtid);
4245 }
4246 this_thr->th.th_pri_head = NULL__null;
4247 }
4248
4249 if (this_thr != master && // Primary thread's CG root is initialized elsewhere
4250 this_thr->th.th_cg_roots != master->th.th_cg_roots) { // CG root not set
4251 // Make new thread's CG root same as primary thread's
4252 KMP_DEBUG_ASSERT(master->th.th_cg_roots)if (!(master->th.th_cg_roots)) { __kmp_debug_assert("master->th.th_cg_roots"
, "openmp/runtime/src/kmp_runtime.cpp", 4252); }
;
4253 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4254 if (tmp) {
4255 // worker changes CG, need to check if old CG should be freed
4256 int i = tmp->cg_nthreads--;
4257 KA_TRACE(100, ("__kmp_initialize_info: Thread %p decrement cg_nthreads"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p decrement cg_nthreads"
" on node %p of thread %p to %d\n", this_thr, tmp, tmp->cg_root
, tmp->cg_nthreads); }
4258 " on node %p of thread %p to %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p decrement cg_nthreads"
" on node %p of thread %p to %d\n", this_thr, tmp, tmp->cg_root
, tmp->cg_nthreads); }
4259 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p decrement cg_nthreads"
" on node %p of thread %p to %d\n", this_thr, tmp, tmp->cg_root
, tmp->cg_nthreads); }
;
4260 if (i == 1) {
4261 __kmp_free(tmp)___kmp_free((tmp), "openmp/runtime/src/kmp_runtime.cpp", 4261
)
; // last thread left CG --> free it
4262 }
4263 }
4264 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4265 // Increment new thread's CG root's counter to add the new thread
4266 this_thr->th.th_cg_roots->cg_nthreads++;
4267 KA_TRACE(100, ("__kmp_initialize_info: Thread %p increment cg_nthreads on"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p increment cg_nthreads on"
" node %p of thread %p to %d\n", this_thr, this_thr->th.th_cg_roots
, this_thr->th.th_cg_roots->cg_root, this_thr->th.th_cg_roots
->cg_nthreads); }
4268 " node %p of thread %p to %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p increment cg_nthreads on"
" node %p of thread %p to %d\n", this_thr, this_thr->th.th_cg_roots
, this_thr->th.th_cg_roots->cg_root, this_thr->th.th_cg_roots
->cg_nthreads); }
4269 this_thr, this_thr->th.th_cg_roots,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p increment cg_nthreads on"
" node %p of thread %p to %d\n", this_thr, this_thr->th.th_cg_roots
, this_thr->th.th_cg_roots->cg_root, this_thr->th.th_cg_roots
->cg_nthreads); }
4270 this_thr->th.th_cg_roots->cg_root,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p increment cg_nthreads on"
" node %p of thread %p to %d\n", this_thr, this_thr->th.th_cg_roots
, this_thr->th.th_cg_roots->cg_root, this_thr->th.th_cg_roots
->cg_nthreads); }
4271 this_thr->th.th_cg_roots->cg_nthreads))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p increment cg_nthreads on"
" node %p of thread %p to %d\n", this_thr, this_thr->th.th_cg_roots
, this_thr->th.th_cg_roots->cg_root, this_thr->th.th_cg_roots
->cg_nthreads); }
;
4272 this_thr->th.th_current_task->td_icvs.thread_limit =
4273 this_thr->th.th_cg_roots->cg_thread_limit;
4274 }
4275
4276 /* Initialize dynamic dispatch */
4277 {
4278 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4279 // Use team max_nproc since this will never change for the team.
4280 size_t disp_size =
4281 sizeof(dispatch_private_info_t) *
4282 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4283 KD_TRACE(10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_info: T#%d max_nproc: %d\n"
, gtid, team->t.t_max_nproc); }
4284 team->t.t_max_nproc))if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_info: T#%d max_nproc: %d\n"
, gtid, team->t.t_max_nproc); }
;
4285 KMP_ASSERT(dispatch)if (!(dispatch)) { __kmp_debug_assert("dispatch", "openmp/runtime/src/kmp_runtime.cpp"
, 4285); }
;
4286 KMP_DEBUG_ASSERT(team->t.t_dispatch)if (!(team->t.t_dispatch)) { __kmp_debug_assert("team->t.t_dispatch"
, "openmp/runtime/src/kmp_runtime.cpp", 4286); }
;
4287 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid])if (!(dispatch == &team->t.t_dispatch[tid])) { __kmp_debug_assert
("dispatch == &team->t.t_dispatch[tid]", "openmp/runtime/src/kmp_runtime.cpp"
, 4287); }
;
4288
4289 dispatch->th_disp_index = 0;
4290 dispatch->th_doacross_buf_idx = 0;
4291 if (!dispatch->th_disp_buffer) {
4292 dispatch->th_disp_buffer =
4293 (dispatch_private_info_t *)__kmp_allocate(disp_size)___kmp_allocate((disp_size), "openmp/runtime/src/kmp_runtime.cpp"
, 4293)
;
4294
4295 if (__kmp_storage_map) {
4296 __kmp_print_storage_map_gtid(
4297 gtid, &dispatch->th_disp_buffer[0],
4298 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4299 ? 1
4300 : __kmp_dispatch_num_buffers],
4301 disp_size,
4302 "th_%d.th_dispatch.th_disp_buffer "
4303 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4304 gtid, team->t.t_id, gtid);
4305 }
4306 } else {
4307 memset(&dispatch->th_disp_buffer[0], '\0', disp_size);
4308 }
4309
4310 dispatch->th_dispatch_pr_current = 0;
4311 dispatch->th_dispatch_sh_current = 0;
4312
4313 dispatch->th_deo_fcn = 0; /* ORDERED */
4314 dispatch->th_dxo_fcn = 0; /* END ORDERED */
4315 }
4316
4317 this_thr->th.th_next_pool = NULL__null;
4318
4319 if (!this_thr->th.th_task_state_memo_stack) {
4320 size_t i;
4321 this_thr->th.th_task_state_memo_stack =
4322 (kmp_uint8 *)__kmp_allocate(4 * sizeof(kmp_uint8))___kmp_allocate((4 * sizeof(kmp_uint8)), "openmp/runtime/src/kmp_runtime.cpp"
, 4322)
;
4323 this_thr->th.th_task_state_top = 0;
4324 this_thr->th.th_task_state_stack_sz = 4;
4325 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4326 ++i) // zero init the stack
4327 this_thr->th.th_task_state_memo_stack[i] = 0;
4328 }
4329
4330 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here)if (!(!this_thr->th.th_spin_here)) { __kmp_debug_assert("!this_thr->th.th_spin_here"
, "openmp/runtime/src/kmp_runtime.cpp", 4330); }
;
4331 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0)if (!(this_thr->th.th_next_waiting == 0)) { __kmp_debug_assert
("this_thr->th.th_next_waiting == 0", "openmp/runtime/src/kmp_runtime.cpp"
, 4331); }
;
4332
4333 KMP_MB();
4334}
4335
4336/* allocate a new thread for the requesting team. this is only called from
4337 within a forkjoin critical section. we will first try to get an available
4338 thread from the thread pool. if none is available, we will fork a new one
4339 assuming we are able to create a new one. this should be assured, as the
4340 caller should check on this first. */
4341kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4342 int new_tid) {
4343 kmp_team_t *serial_team;
4344 kmp_info_t *new_thr;
4345 int new_gtid;
4346
4347 KA_TRACE(20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d\n"
, __kmp_get_global_thread_id()); }
;
4348 KMP_DEBUG_ASSERT(root && team)if (!(root && team)) { __kmp_debug_assert("root && team"
, "openmp/runtime/src/kmp_runtime.cpp", 4348); }
;
4349#if !KMP_NESTED_HOT_TEAMS1
4350 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()))if (!((0 == __kmp_tid_from_gtid((__kmp_get_global_thread_id()
))))) { __kmp_debug_assert("(0 == __kmp_tid_from_gtid((__kmp_get_global_thread_id())))"
, "openmp/runtime/src/kmp_runtime.cpp", 4350); }
;
4351#endif
4352 KMP_MB();
4353
4354 /* first, try to get one from the thread pool */
4355 if (__kmp_thread_pool) {
4356 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool)const_cast<kmp_info_t *>(__kmp_thread_pool);
4357 __kmp_thread_pool = (volatile kmp_info_t *)new_thr->th.th_next_pool;
4358 if (new_thr == __kmp_thread_pool_insert_pt) {
4359 __kmp_thread_pool_insert_pt = NULL__null;
4360 }
4361 TCW_4(new_thr->th.th_in_pool, FALSE)(new_thr->th.th_in_pool) = (0);
4362 __kmp_suspend_initialize_thread(new_thr);
4363 __kmp_lock_suspend_mx(new_thr);
4364 if (new_thr->th.th_active_in_pool == TRUE(!0)) {
4365 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE)if (!(new_thr->th.th_active == (!0))) { __kmp_debug_assert
("new_thr->th.th_active == (!0)", "openmp/runtime/src/kmp_runtime.cpp"
, 4365); }
;
4366 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth)(&__kmp_thread_pool_active_nth)->fetch_sub(1, std::memory_order_acq_rel
)
;
4367 new_thr->th.th_active_in_pool = FALSE0;
4368 }
4369 __kmp_unlock_suspend_mx(new_thr);
4370
4371 KA_TRACE(20, ("__kmp_allocate_thread: T#%d using thread T#%d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d using thread T#%d\n"
, __kmp_get_global_thread_id(), new_thr->th.th_info.ds.ds_gtid
); }
4372 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d using thread T#%d\n"
, __kmp_get_global_thread_id(), new_thr->th.th_info.ds.ds_gtid
); }
;
4373 KMP_ASSERT(!new_thr->th.th_team)if (!(!new_thr->th.th_team)) { __kmp_debug_assert("!new_thr->th.th_team"
, "openmp/runtime/src/kmp_runtime.cpp", 4373); }
;
4374 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity)if (!(__kmp_nth < __kmp_threads_capacity)) { __kmp_debug_assert
("__kmp_nth < __kmp_threads_capacity", "openmp/runtime/src/kmp_runtime.cpp"
, 4374); }
;
4375
4376 /* setup the thread structure */
4377 __kmp_initialize_info(new_thr, team, new_tid,
4378 new_thr->th.th_info.ds.ds_gtid);
4379 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team)if (!(new_thr->th.th_serial_team)) { __kmp_debug_assert("new_thr->th.th_serial_team"
, "openmp/runtime/src/kmp_runtime.cpp", 4379); }
;
4380
4381 TCW_4(__kmp_nth, __kmp_nth + 1)(__kmp_nth) = (__kmp_nth + 1);
4382
4383 new_thr->th.th_task_state = 0;
4384 new_thr->th.th_task_state_top = 0;
4385 new_thr->th.th_task_state_stack_sz = 4;
4386
4387 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
4388 // Make sure pool thread has transitioned to waiting on own thread struct
4389 KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0)if (!(new_thr->th.th_used_in_team.load() == 0)) { __kmp_debug_assert
("new_thr->th.th_used_in_team.load() == 0", "openmp/runtime/src/kmp_runtime.cpp"
, 4389); }
;
4390 // Thread activated in __kmp_allocate_team when increasing team size
4391 }
4392
4393#ifdef KMP_ADJUST_BLOCKTIME1
4394 /* Adjust blocktime back to zero if necessary */
4395 /* Middle initialization might not have occurred yet */
4396 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4397 if (__kmp_nth > __kmp_avail_proc) {
4398 __kmp_zero_bt = TRUE(!0);
4399 }
4400 }
4401#endif /* KMP_ADJUST_BLOCKTIME */
4402
4403#if KMP_DEBUG1
4404 // If thread entered pool via __kmp_free_thread, wait_flag should !=
4405 // KMP_BARRIER_PARENT_FLAG.
4406 int b;
4407 kmp_balign_t *balign = new_thr->th.th_bar;
4408 for (b = 0; b < bs_last_barrier; ++b)
4409 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG)if (!(balign[b].bb.wait_flag != 2)) { __kmp_debug_assert("balign[b].bb.wait_flag != 2"
, "openmp/runtime/src/kmp_runtime.cpp", 4409); }
;
4410#endif
4411
4412 KF_TRACE(10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d using thread %p T#%d\n"
, __kmp_get_global_thread_id(), new_thr, new_thr->th.th_info
.ds.ds_gtid); }
4413 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d using thread %p T#%d\n"
, __kmp_get_global_thread_id(), new_thr, new_thr->th.th_info
.ds.ds_gtid); }
;
4414
4415 KMP_MB();
4416 return new_thr;
4417 }
4418
4419 /* no, well fork a new one */
4420 KMP_ASSERT(__kmp_nth == __kmp_all_nth)if (!(__kmp_nth == __kmp_all_nth)) { __kmp_debug_assert("__kmp_nth == __kmp_all_nth"
, "openmp/runtime/src/kmp_runtime.cpp", 4420); }
;
4421 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity)if (!(__kmp_all_nth < __kmp_threads_capacity)) { __kmp_debug_assert
("__kmp_all_nth < __kmp_threads_capacity", "openmp/runtime/src/kmp_runtime.cpp"
, 4421); }
;
4422
4423#if KMP_USE_MONITOR
4424 // If this is the first worker thread the RTL is creating, then also
4425 // launch the monitor thread. We try to do this as early as possible.
4426 if (!TCR_4(__kmp_init_monitor)(__kmp_init_monitor)) {
4427 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4428 if (!TCR_4(__kmp_init_monitor)(__kmp_init_monitor)) {
4429 KF_TRACE(10, ("before __kmp_create_monitor\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("before __kmp_create_monitor\n"
); }
;
4430 TCW_4(__kmp_init_monitor, 1)(__kmp_init_monitor) = (1);
4431 __kmp_create_monitor(&__kmp_monitor);
4432 KF_TRACE(10, ("after __kmp_create_monitor\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("after __kmp_create_monitor\n"
); }
;
4433#if KMP_OS_WINDOWS0
4434 // AC: wait until monitor has started. This is a fix for CQ232808.
4435 // The reason is that if the library is loaded/unloaded in a loop with
4436 // small (parallel) work in between, then there is high probability that
4437 // monitor thread started after the library shutdown. At shutdown it is
4438 // too late to cope with the problem, because when the primary thread is
4439 // in DllMain (process detach) the monitor has no chances to start (it is
4440 // blocked), and primary thread has no means to inform the monitor that
4441 // the library has gone, because all the memory which the monitor can
4442 // access is going to be released/reset.
4443 while (TCR_4(__kmp_init_monitor)(__kmp_init_monitor) < 2) {
4444 KMP_YIELD(TRUE){ __kmp_x86_pause(); if (((!0)) && (((__kmp_use_yield
== 1) || (__kmp_use_yield == 2 && (((__kmp_nth) >
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))))))) __kmp_yield
(); }
;
4445 }
4446 KF_TRACE(10, ("after monitor thread has started\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("after monitor thread has started\n"
); }
;
4447#endif
4448 }
4449 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4450 }
4451#endif
4452
4453 KMP_MB();
4454
4455 {
4456 int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)(__kmp_init_hidden_helper_threads)
4457 ? 1
4458 : __kmp_hidden_helper_threads_num + 1;
4459
4460 for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid])((void *)(__kmp_threads[new_gtid])) != NULL__null;
4461 ++new_gtid) {
4462 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity)if (!(new_gtid < __kmp_threads_capacity)) { __kmp_debug_assert
("new_gtid < __kmp_threads_capacity", "openmp/runtime/src/kmp_runtime.cpp"
, 4462); }
;
4463 }
4464
4465 if (TCR_4(__kmp_init_hidden_helper_threads)(__kmp_init_hidden_helper_threads)) {
4466 KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num)if (!(new_gtid <= __kmp_hidden_helper_threads_num)) { __kmp_debug_assert
("new_gtid <= __kmp_hidden_helper_threads_num", "openmp/runtime/src/kmp_runtime.cpp"
, 4466); }
;
4467 }
4468 }
4469
4470 /* allocate space for it. */
4471 new_thr = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t))___kmp_allocate((sizeof(kmp_info_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 4471)
;
4472
4473 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr)((__kmp_threads[new_gtid])) = ((new_thr));
4474
4475#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 && KMP_DEBUG1
4476 // suppress race conditions detection on synchronization flags in debug mode
4477 // this helps to analyze library internals eliminating false positives
4478 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(
4479 __itt_suppress_range, __itt_suppress_threading_errors0x000000ff,
4480 &new_thr->th.th_sleep_loc, sizeof(new_thr->th.th_sleep_loc));
4481 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(
4482 __itt_suppress_range, __itt_suppress_threading_errors0x000000ff,
4483 &new_thr->th.th_reap_state, sizeof(new_thr->th.th_reap_state));
4484#if KMP_OS_WINDOWS0
4485 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(
4486 __itt_suppress_range, __itt_suppress_threading_errors0x000000ff,
4487 &new_thr->th.th_suspend_init, sizeof(new_thr->th.th_suspend_init));
4488#else
4489 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(__itt_suppress_range,
4490 __itt_suppress_threading_errors0x000000ff,
4491 &new_thr->th.th_suspend_init_count,
4492 sizeof(new_thr->th.th_suspend_init_count));
4493#endif
4494 // TODO: check if we need to also suppress b_arrived flags
4495 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(__itt_suppress_range,
4496 __itt_suppress_threading_errors0x000000ff,
4497 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go)const_cast<kmp_uint64 *>(&new_thr->th.th_bar[0].
bb.b_go)
,
4498 sizeof(new_thr->th.th_bar[0].bb.b_go));
4499 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(__itt_suppress_range,
4500 __itt_suppress_threading_errors0x000000ff,
4501 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go)const_cast<kmp_uint64 *>(&new_thr->th.th_bar[1].
bb.b_go)
,
4502 sizeof(new_thr->th.th_bar[1].bb.b_go));
4503 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(__itt_suppress_range,
4504 __itt_suppress_threading_errors0x000000ff,
4505 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go)const_cast<kmp_uint64 *>(&new_thr->th.th_bar[2].
bb.b_go)
,
4506 sizeof(new_thr->th.th_bar[2].bb.b_go));
4507#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG */
4508 if (__kmp_storage_map) {
4509 __kmp_print_thread_storage_map(new_thr, new_gtid);
4510 }
4511
4512 // add the reserve serialized team, initialized from the team's primary thread
4513 {
4514 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4515 KF_TRACE(10, ("__kmp_allocate_thread: before th_serial/serial_team\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: before th_serial/serial_team\n"
); }
;
4516 new_thr->th.th_serial_team = serial_team =
4517 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4518#if OMPT_SUPPORT1
4519 ompt_data_none{0}, // root parallel id
4520#endif
4521 proc_bind_default, &r_icvs,
4522 0 USE_NESTED_HOT_ARG(NULL), __null);
4523 }
4524 KMP_ASSERT(serial_team)if (!(serial_team)) { __kmp_debug_assert("serial_team", "openmp/runtime/src/kmp_runtime.cpp"
, 4524); }
;
4525 serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for
4526 // execution (it is unused for now).
4527 serial_team->t.t_threads[0] = new_thr;
4528 KF_TRACE(10,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n"
, new_thr); }
4529 ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n"
, new_thr); }
4530 new_thr))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n"
, new_thr); }
;
4531
4532 /* setup the thread structures */
4533 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4534
4535#if USE_FAST_MEMORY3
4536 __kmp_initialize_fast_memory(new_thr);
4537#endif /* USE_FAST_MEMORY */
4538
4539#if KMP_USE_BGET1
4540 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL)if (!(new_thr->th.th_local.bget_data == __null)) { __kmp_debug_assert
("new_thr->th.th_local.bget_data == __null", "openmp/runtime/src/kmp_runtime.cpp"
, 4540); }
;
4541 __kmp_initialize_bget(new_thr);
4542#endif
4543
4544 __kmp_init_random(new_thr); // Initialize random number generator
4545
4546 /* Initialize these only once when thread is grabbed for a team allocation */
4547 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n"
, __kmp_get_global_thread_id(), 0, 0); }
4548 ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n"
, __kmp_get_global_thread_id(), 0, 0); }
4549 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n"
, __kmp_get_global_thread_id(), 0, 0); }
;
4550
4551 int b;
4552 kmp_balign_t *balign = new_thr->th.th_bar;
4553 for (b = 0; b < bs_last_barrier; ++b) {
4554 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE0;
4555 balign[b].bb.team = NULL__null;
4556 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING0;
4557 balign[b].bb.use_oncore_barrier = 0;
4558 }
4559
4560 TCW_PTR(new_thr->th.th_sleep_loc, NULL)((new_thr->th.th_sleep_loc)) = ((__null));
4561 new_thr->th.th_sleep_loc_type = flag_unset;
4562
4563 new_thr->th.th_spin_here = FALSE0;
4564 new_thr->th.th_next_waiting = 0;
4565#if KMP_OS_UNIX1
4566 new_thr->th.th_blocking = false;
4567#endif
4568
4569#if KMP_AFFINITY_SUPPORTED1
4570 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED(-2);
4571 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED(-2);
4572 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED(-2);
4573 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED(-2);
4574#endif
4575 new_thr->th.th_def_allocator = __kmp_def_allocator;
4576 new_thr->th.th_prev_level = 0;
4577 new_thr->th.th_prev_num_threads = 1;
4578
4579 TCW_4(new_thr->th.th_in_pool, FALSE)(new_thr->th.th_in_pool) = (0);
4580 new_thr->th.th_active_in_pool = FALSE0;
4581 TCW_4(new_thr->th.th_active, TRUE)(new_thr->th.th_active) = ((!0));
4582
4583 /* adjust the global counters */
4584 __kmp_all_nth++;
4585 __kmp_nth++;
4586
4587 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
4588 // numbers of procs, and method #2 (keyed API call) for higher numbers.
4589 if (__kmp_adjust_gtid_mode) {
4590 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4591 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) != 2) {
4592 TCW_4(__kmp_gtid_mode, 2)(__kmp_gtid_mode) = (2);
4593 }
4594 } else {
4595 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) != 1) {
4596 TCW_4(__kmp_gtid_mode, 1)(__kmp_gtid_mode) = (1);
4597 }
4598 }
4599 }
4600
4601#ifdef KMP_ADJUST_BLOCKTIME1
4602 /* Adjust blocktime back to zero if necessary */
4603 /* Middle initialization might not have occurred yet */
4604 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4605 if (__kmp_nth > __kmp_avail_proc) {
4606 __kmp_zero_bt = TRUE(!0);
4607 }
4608 }
4609#endif /* KMP_ADJUST_BLOCKTIME */
4610
4611 /* actually fork it and create the new worker thread */
4612 KF_TRACE(if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: before __kmp_create_worker: %p\n"
, new_thr); }
4613 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: before __kmp_create_worker: %p\n"
, new_thr); }
;
4614 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4615 KF_TRACE(10,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: after __kmp_create_worker: %p\n"
, new_thr); }
4616 ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: after __kmp_create_worker: %p\n"
, new_thr); }
;
4617
4618 KA_TRACE(20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d forked T#%d\n"
, __kmp_get_global_thread_id(), new_gtid); }
4619 new_gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d forked T#%d\n"
, __kmp_get_global_thread_id(), new_gtid); }
;
4620 KMP_MB();
4621 return new_thr;
4622}
4623
4624/* Reinitialize team for reuse.
4625 The hot team code calls this case at every fork barrier, so EPCC barrier
4626 test are extremely sensitive to changes in it, esp. writes to the team
4627 struct, which cause a cache invalidation in all threads.
4628 IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! */
4629static void __kmp_reinitialize_team(kmp_team_t *team,
4630 kmp_internal_control_t *new_icvs,
4631 ident_t *loc) {
4632 KF_TRACE(10, ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n"
, team->t.t_threads[0], team); }
4633 team->t.t_threads[0], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n"
, team->t.t_threads[0], team); }
;
4634 KMP_DEBUG_ASSERT(team && new_icvs)if (!(team && new_icvs)) { __kmp_debug_assert("team && new_icvs"
, "openmp/runtime/src/kmp_runtime.cpp", 4634); }
;
4635 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc)if (!((!(__kmp_init_parallel)) || new_icvs->nproc)) { __kmp_debug_assert
("(!(__kmp_init_parallel)) || new_icvs->nproc", "openmp/runtime/src/kmp_runtime.cpp"
, 4635); }
;
4636 KMP_CHECK_UPDATE(team->t.t_ident, loc)if ((team->t.t_ident) != (loc)) (team->t.t_ident) = (loc
)
;
4637
4638 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID())if ((team->t.t_id) != ((~0))) (team->t.t_id) = ((~0));
4639 // Copy ICVs to the primary thread's implicit taskdata
4640 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE0);
4641 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4642
4643 KF_TRACE(10, ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n"
, team->t.t_threads[0], team); }
4644 team->t.t_threads[0], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n"
, team->t.t_threads[0], team); }
;
4645}
4646
4647/* Initialize the team data structure.
4648 This assumes the t_threads and t_max_nproc are already set.
4649 Also, we don't touch the arguments */
4650static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
4651 kmp_internal_control_t *new_icvs,
4652 ident_t *loc) {
4653 KF_TRACE(10, ("__kmp_initialize_team: enter: team=%p\n", team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_team: enter: team=%p\n"
, team); }
;
4654
4655 /* verify */
4656 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 4656); }
;
4657 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc)if (!(new_nproc <= team->t.t_max_nproc)) { __kmp_debug_assert
("new_nproc <= team->t.t_max_nproc", "openmp/runtime/src/kmp_runtime.cpp"
, 4657); }
;
4658 KMP_DEBUG_ASSERT(team->t.t_threads)if (!(team->t.t_threads)) { __kmp_debug_assert("team->t.t_threads"
, "openmp/runtime/src/kmp_runtime.cpp", 4658); }
;
4659 KMP_MB();
4660
4661 team->t.t_master_tid = 0; /* not needed */
4662 /* team->t.t_master_bar; not needed */
4663 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4664 team->t.t_nproc = new_nproc;
4665
4666 /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */
4667 team->t.t_next_pool = NULL__null;
4668 /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess
4669 * up hot team */
4670
4671 TCW_SYNC_PTR(team->t.t_pkfn, NULL)((team->t.t_pkfn)) = ((__null)); /* not needed */
4672 team->t.t_invoke = NULL__null; /* not needed */
4673
4674 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4675 team->t.t_sched.sched = new_icvs->sched.sched;
4676
4677#if KMP_ARCH_X860 || KMP_ARCH_X86_641
4678 team->t.t_fp_control_saved = FALSE0; /* not needed */
4679 team->t.t_x87_fpu_control_word = 0; /* not needed */
4680 team->t.t_mxcsr = 0; /* not needed */
4681#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
4682
4683 team->t.t_construct = 0;
4684
4685 team->t.t_ordered.dt.t_value = 0;
4686 team->t.t_master_active = FALSE0;
4687
4688#ifdef KMP_DEBUG1
4689 team->t.t_copypriv_data = NULL__null; /* not necessary, but nice for debugging */
4690#endif
4691#if KMP_OS_WINDOWS0
4692 team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */
4693#endif
4694
4695 team->t.t_control_stack_top = NULL__null;
4696
4697 __kmp_reinitialize_team(team, new_icvs, loc);
4698
4699 KMP_MB();
4700 KF_TRACE(10, ("__kmp_initialize_team: exit: team=%p\n", team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_team: exit: team=%p\n"
, team); }
;
4701}
4702
4703#if (KMP_OS_LINUX1 || KMP_OS_FREEBSD0) && KMP_AFFINITY_SUPPORTED1
4704/* Sets full mask for thread and returns old mask, no changes to structures. */
4705static void
4706__kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4707 if (KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {
4708 int status;
4709 if (old_mask != NULL__null) {
4710 status = __kmp_get_system_affinity(old_mask, TRUE)(old_mask)->get_system_affinity((!0));
4711 int error = errno(*__errno_location ());
4712 if (status != 0) {
4713 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError)__kmp_msg_format(kmp_i18n_msg_ChangeThreadAffMaskError), KMP_ERR(error)__kmp_msg_error_code(error),
4714 __kmp_msg_null);
4715 }
4716 }
4717 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE)(__kmp_affin_fullMask)->set_system_affinity((!0));
4718 }
4719}
4720#endif
4721
4722#if KMP_AFFINITY_SUPPORTED1
4723
4724// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
4725// It calculates the worker + primary thread's partition based upon the parent
4726// thread's partition, and binds each worker to a thread in their partition.
4727// The primary thread's partition should already include its current binding.
4728static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
4729 // Do not partition places for the hidden helper team
4730 if (KMP_HIDDEN_HELPER_TEAM(team)(team->t.t_threads[0] == __kmp_hidden_helper_main_thread))
4731 return;
4732 // Copy the primary thread's place partition to the team struct
4733 kmp_info_t *master_th = team->t.t_threads[0];
4734 KMP_DEBUG_ASSERT(master_th != NULL)if (!(master_th != __null)) { __kmp_debug_assert("master_th != __null"
, "openmp/runtime/src/kmp_runtime.cpp", 4734); }
;
4735 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4736 int first_place = master_th->th.th_first_place;
4737 int last_place = master_th->th.th_last_place;
4738 int masters_place = master_th->th.th_current_place;
4739 int num_masks = __kmp_affinity.num_masks;
4740 team->t.t_first_place = first_place;
4741 team->t.t_last_place = last_place;
4742
4743 KA_TRACE(20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
"bound to place %d partition = [%d,%d]\n", proc_bind, __kmp_gtid_from_thread
(team->t.t_threads[0]), team->t.t_id, masters_place, first_place
, last_place); }
4744 "bound to place %d partition = [%d,%d]\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
"bound to place %d partition = [%d,%d]\n", proc_bind, __kmp_gtid_from_thread
(team->t.t_threads[0]), team->t.t_id, masters_place, first_place
, last_place); }
4745 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
"bound to place %d partition = [%d,%d]\n", proc_bind, __kmp_gtid_from_thread
(team->t.t_threads[0]), team->t.t_id, masters_place, first_place
, last_place); }
4746 team->t.t_id, masters_place, first_place, last_place))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
"bound to place %d partition = [%d,%d]\n", proc_bind, __kmp_gtid_from_thread
(team->t.t_threads[0]), team->t.t_id, masters_place, first_place
, last_place); }
;
4747
4748 switch (proc_bind) {
4749
4750 case proc_bind_default:
4751 // Serial teams might have the proc_bind policy set to proc_bind_default.
4752 // Not an issue -- we don't rebind primary thread for any proc_bind policy.
4753 KMP_DEBUG_ASSERT(team->t.t_nproc == 1)if (!(team->t.t_nproc == 1)) { __kmp_debug_assert("team->t.t_nproc == 1"
, "openmp/runtime/src/kmp_runtime.cpp", 4753); }
;
4754 break;
4755
4756 case proc_bind_primary: {
4757 int f;
4758 int n_th = team->t.t_nproc;
4759 for (f = 1; f < n_th; f++) {
4760 kmp_info_t *th = team->t.t_threads[f];
4761 KMP_DEBUG_ASSERT(th != NULL)if (!(th != __null)) { __kmp_debug_assert("th != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 4761); }
;
4762 th->th.th_first_place = first_place;
4763 th->th.th_last_place = last_place;
4764 th->th.th_new_place = masters_place;
4765 if (__kmp_display_affinity && masters_place != th->th.th_current_place &&
4766 team->t.t_display_affinity != 1) {
4767 team->t.t_display_affinity = 1;
4768 }
4769
4770 KA_TRACE(100, ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, masters_place, first_place, last_place
); }
4771 "partition = [%d,%d]\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, masters_place, first_place, last_place
); }
4772 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, masters_place, first_place, last_place
); }
4773 f, masters_place, first_place, last_place))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, masters_place, first_place, last_place
); }
;
4774 }
4775 } break;
4776
4777 case proc_bind_close: {
4778 int f;
4779 int n_th = team->t.t_nproc;
4780 int n_places;
4781 if (first_place <= last_place) {
4782 n_places = last_place - first_place + 1;
4783 } else {
4784 n_places = num_masks - first_place + last_place + 1;
4785 }
4786 if (n_th <= n_places) {
4787 int place = masters_place;
4788 for (f = 1; f < n_th; f++) {
4789 kmp_info_t *th = team->t.t_threads[f];
4790 KMP_DEBUG_ASSERT(th != NULL)if (!(th != __null)) { __kmp_debug_assert("th != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 4790); }
;
4791
4792 if (place == last_place) {
4793 place = first_place;
4794 } else if (place == (num_masks - 1)) {
4795 place = 0;
4796 } else {
4797 place++;
4798 }
4799 th->th.th_first_place = first_place;
4800 th->th.th_last_place = last_place;
4801 th->th.th_new_place = place;
4802 if (__kmp_display_affinity && place != th->th.th_current_place &&
4803 team->t.t_display_affinity != 1) {
4804 team->t.t_display_affinity = 1;
4805 }
4806
4807 KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, place, first_place, last_place); }
4808 "partition = [%d,%d]\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, place, first_place, last_place); }
4809 __kmp_gtid_from_thread(team->t.t_threads[f]),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, place, first_place, last_place); }
4810 team->t.t_id, f, place, first_place, last_place))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, place, first_place, last_place); }
;
4811 }
4812 } else {
4813 int S, rem, gap, s_count;
4814 S = n_th / n_places;
4815 s_count = 0;
4816 rem = n_th - (S * n_places);
4817 gap = rem > 0 ? n_places / rem : n_places;
4818 int place = masters_place;
4819 int gap_ct = gap;
4820 for (f = 0; f < n_th; f++) {
4821 kmp_info_t *th = team->t.t_threads[f];
4822 KMP_DEBUG_ASSERT(th != NULL)if (!(th != __null)) { __kmp_debug_assert("th != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 4822); }
;
4823
4824 th->th.th_first_place = first_place;
4825 th->th.th_last_place = last_place;
4826 th->th.th_new_place = place;
4827 if (__kmp_display_affinity && place != th->th.th_current_place &&
4828 team->t.t_display_affinity != 1) {
4829 team->t.t_display_affinity = 1;
4830 }
4831 s_count++;
4832
4833 if ((s_count == S) && rem && (gap_ct == gap)) {
4834 // do nothing, add an extra thread to place on next iteration
4835 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4836 // we added an extra thread to this place; move to next place
4837 if (place == last_place) {
4838 place = first_place;
4839 } else if (place == (num_masks - 1)) {
4840 place = 0;
4841 } else {
4842 place++;
4843 }
4844 s_count = 0;
4845 gap_ct = 1;
4846 rem--;
4847 } else if (s_count == S) { // place full; don't add extra
4848 if (place == last_place) {
4849 place = first_place;
4850 } else if (place == (num_masks - 1)) {
4851 place = 0;
4852 } else {
4853 place++;
4854 }
4855 gap_ct++;
4856 s_count = 0;
4857 }
4858
4859 KA_TRACE(100,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, first_place
, last_place); }
4860 ("__kmp_partition_places: close: T#%d(%d:%d) place %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, first_place
, last_place); }
4861 "partition = [%d,%d]\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, first_place
, last_place); }
4862 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, first_place
, last_place); }
4863 th->th.th_new_place, first_place, last_place))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, first_place
, last_place); }
;
4864 }
4865 KMP_DEBUG_ASSERT(place == masters_place)if (!(place == masters_place)) { __kmp_debug_assert("place == masters_place"
, "openmp/runtime/src/kmp_runtime.cpp", 4865); }
;
4866 }
4867 } break;
4868
4869 case proc_bind_spread: {
4870 int f;
4871 int n_th = team->t.t_nproc;
4872 int n_places;
4873 int thidx;
4874 if (first_place <= last_place) {
4875 n_places = last_place - first_place + 1;
4876 } else {
4877 n_places = num_masks - first_place + last_place + 1;
4878 }
4879 if (n_th <= n_places) {
4880 int place = -1;
4881
4882 if (n_places != num_masks) {
4883 int S = n_places / n_th;
4884 int s_count, rem, gap, gap_ct;
4885
4886 place = masters_place;
4887 rem = n_places - n_th * S;
4888 gap = rem ? n_th / rem : 1;
4889 gap_ct = gap;
4890 thidx = n_th;
4891 if (update_master_only == 1)
4892 thidx = 1;
4893 for (f = 0; f < thidx; f++) {
4894 kmp_info_t *th = team->t.t_threads[f];
4895 KMP_DEBUG_ASSERT(th != NULL)if (!(th != __null)) { __kmp_debug_assert("th != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 4895); }
;
4896
4897 th->th.th_first_place = place;
4898 th->th.th_new_place = place;
4899 if (__kmp_display_affinity && place != th->th.th_current_place &&
4900 team->t.t_display_affinity != 1) {
4901 team->t.t_display_affinity = 1;
4902 }
4903 s_count = 1;
4904 while (s_count < S) {
4905 if (place == last_place) {
4906 place = first_place;
4907 } else if (place == (num_masks - 1)) {
4908 place = 0;
4909 } else {
4910 place++;
4911 }
4912 s_count++;
4913 }
4914 if (rem && (gap_ct == gap)) {
4915 if (place == last_place) {
4916 place = first_place;
4917 } else if (place == (num_masks - 1)) {
4918 place = 0;
4919 } else {
4920 place++;
4921 }
4922 rem--;
4923 gap_ct = 0;
4924 }
4925 th->th.th_last_place = place;
4926 gap_ct++;
4927
4928 if (place == last_place) {
4929 place = first_place;
4930 } else if (place == (num_masks - 1)) {
4931 place = 0;
4932 } else {
4933 place++;
4934 }
4935
4936 KA_TRACE(100,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], num_masks: %u\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, num_masks
); }
4937 ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], num_masks: %u\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, num_masks
); }
4938 "partition = [%d,%d], num_masks: %u\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], num_masks: %u\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, num_masks
); }
4939 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], num_masks: %u\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, num_masks
); }
4940 f, th->th.th_new_place, th->th.th_first_place,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], num_masks: %u\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, num_masks
); }
4941 th->th.th_last_place, num_masks))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], num_masks: %u\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, num_masks
); }
;
4942 }
4943 } else {
4944 /* Having uniform space of available computation places I can create
4945 T partitions of round(P/T) size and put threads into the first
4946 place of each partition. */
4947 double current = static_cast<double>(masters_place);
4948 double spacing =
4949 (static_cast<double>(n_places + 1) / static_cast<double>(n_th));
4950 int first, last;
4951 kmp_info_t *th;
4952
4953 thidx = n_th + 1;
4954 if (update_master_only == 1)
4955 thidx = 1;
4956 for (f = 0; f < thidx; f++) {
4957 first = static_cast<int>(current);
4958 last = static_cast<int>(current + spacing) - 1;
4959 KMP_DEBUG_ASSERT(last >= first)if (!(last >= first)) { __kmp_debug_assert("last >= first"
, "openmp/runtime/src/kmp_runtime.cpp", 4959); }
;
4960 if (first >= n_places) {
4961 if (masters_place) {
4962 first -= n_places;
4963 last -= n_places;
4964 if (first == (masters_place + 1)) {
4965 KMP_DEBUG_ASSERT(f == n_th)if (!(f == n_th)) { __kmp_debug_assert("f == n_th", "openmp/runtime/src/kmp_runtime.cpp"
, 4965); }
;
4966 first--;
4967 }
4968 if (last == masters_place) {
4969 KMP_DEBUG_ASSERT(f == (n_th - 1))if (!(f == (n_th - 1))) { __kmp_debug_assert("f == (n_th - 1)"
, "openmp/runtime/src/kmp_runtime.cpp", 4969); }
;
4970 last--;
4971 }
4972 } else {
4973 KMP_DEBUG_ASSERT(f == n_th)if (!(f == n_th)) { __kmp_debug_assert("f == n_th", "openmp/runtime/src/kmp_runtime.cpp"
, 4973); }
;
4974 first = 0;
4975 last = 0;
4976 }
4977 }
4978 if (last >= n_places) {
4979 last = (n_places - 1);
4980 }
4981 place = first;
4982 current += spacing;
4983 if (f < n_th) {
4984 KMP_DEBUG_ASSERT(0 <= first)if (!(0 <= first)) { __kmp_debug_assert("0 <= first", "openmp/runtime/src/kmp_runtime.cpp"
, 4984); }
;
4985 KMP_DEBUG_ASSERT(n_places > first)if (!(n_places > first)) { __kmp_debug_assert("n_places > first"
, "openmp/runtime/src/kmp_runtime.cpp", 4985); }
;
4986 KMP_DEBUG_ASSERT(0 <= last)if (!(0 <= last)) { __kmp_debug_assert("0 <= last", "openmp/runtime/src/kmp_runtime.cpp"
, 4986); }
;
4987 KMP_DEBUG_ASSERT(n_places > last)if (!(n_places > last)) { __kmp_debug_assert("n_places > last"
, "openmp/runtime/src/kmp_runtime.cpp", 4987); }
;
4988 KMP_DEBUG_ASSERT(last_place >= first_place)if (!(last_place >= first_place)) { __kmp_debug_assert("last_place >= first_place"
, "openmp/runtime/src/kmp_runtime.cpp", 4988); }
;
4989 th = team->t.t_threads[f];
4990 KMP_DEBUG_ASSERT(th)if (!(th)) { __kmp_debug_assert("th", "openmp/runtime/src/kmp_runtime.cpp"
, 4990); }
;
4991 th->th.th_first_place = first;
4992 th->th.th_new_place = place;
4993 th->th.th_last_place = last;
4994 if (__kmp_display_affinity && place != th->th.th_current_place &&
4995 team->t.t_display_affinity != 1) {
4996 team->t.t_display_affinity = 1;
4997 }
4998 KA_TRACE(100,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, spacing)
; }
4999 ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, spacing)
; }
5000 "partition = [%d,%d], spacing = %.4f\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, spacing)
; }
5001 __kmp_gtid_from_thread(team->t.t_threads[f]),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, spacing)
; }
5002 team->t.t_id, f, th->th.th_new_place,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, spacing)
; }
5003 th->th.th_first_place, th->th.th_last_place, spacing))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, spacing)
; }
;
5004 }
5005 }
5006 }
5007 KMP_DEBUG_ASSERT(update_master_only || place == masters_place)if (!(update_master_only || place == masters_place)) { __kmp_debug_assert
("update_master_only || place == masters_place", "openmp/runtime/src/kmp_runtime.cpp"
, 5007); }
;
5008 } else {
5009 int S, rem, gap, s_count;
5010 S = n_th / n_places;
5011 s_count = 0;
5012 rem = n_th - (S * n_places);
5013 gap = rem > 0 ? n_places / rem : n_places;
5014 int place = masters_place;
5015 int gap_ct = gap;
5016 thidx = n_th;
5017 if (update_master_only == 1)
5018 thidx = 1;
5019 for (f = 0; f < thidx; f++) {
5020 kmp_info_t *th = team->t.t_threads[f];
5021 KMP_DEBUG_ASSERT(th != NULL)if (!(th != __null)) { __kmp_debug_assert("th != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 5021); }
;
5022
5023 th->th.th_first_place = place;
5024 th->th.th_last_place = place;
5025 th->th.th_new_place = place;
5026 if (__kmp_display_affinity && place != th->th.th_current_place &&
5027 team->t.t_display_affinity != 1) {
5028 team->t.t_display_affinity = 1;
5029 }
5030 s_count++;
5031
5032 if ((s_count == S) && rem && (gap_ct == gap)) {
5033 // do nothing, add an extra thread to place on next iteration
5034 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
5035 // we added an extra thread to this place; move on to next place
5036 if (place == last_place) {
5037 place = first_place;
5038 } else if (place == (num_masks - 1)) {
5039 place = 0;
5040 } else {
5041 place++;
5042 }
5043 s_count = 0;
5044 gap_ct = 1;
5045 rem--;
5046 } else if (s_count == S) { // place is full; don't add extra thread
5047 if (place == last_place) {
5048 place = first_place;
5049 } else if (place == (num_masks - 1)) {
5050 place = 0;
5051 } else {
5052 place++;
5053 }
5054 gap_ct++;
5055 s_count = 0;
5056 }
5057
5058 KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, th->th.th_first_place
, th->th.th_last_place); }
5059 "partition = [%d,%d]\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, th->th.th_first_place
, th->th.th_last_place); }
5060 __kmp_gtid_from_thread(team->t.t_threads[f]),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, th->th.th_first_place
, th->th.th_last_place); }
5061 team->t.t_id, f, th->th.th_new_place,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, th->th.th_first_place
, th->th.th_last_place); }
5062 th->th.th_first_place, th->th.th_last_place))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, th->th.th_first_place
, th->th.th_last_place); }
;
5063 }
5064 KMP_DEBUG_ASSERT(update_master_only || place == masters_place)if (!(update_master_only || place == masters_place)) { __kmp_debug_assert
("update_master_only || place == masters_place", "openmp/runtime/src/kmp_runtime.cpp"
, 5064); }
;
5065 }
5066 } break;
5067
5068 default:
5069 break;
5070 }
5071
5072 KA_TRACE(20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_partition_places: exit T#%d\n"
, team->t.t_id); }
;
5073}
5074
5075#endif // KMP_AFFINITY_SUPPORTED
5076
5077/* allocate a new team data structure to use. take one off of the free pool if
5078 available */
5079kmp_team_t *
5080__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
5081#if OMPT_SUPPORT1
5082 ompt_data_t ompt_parallel_data,
5083#endif
5084 kmp_proc_bind_t new_proc_bind,
5085 kmp_internal_control_t *new_icvs,
5086 int argc USE_NESTED_HOT_ARG(kmp_info_t *master), kmp_info_t *master) {
5087 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team)((void)0);
5088 int f;
5089 kmp_team_t *team;
5090 int use_hot_team = !root->r.r_active;
5091 int level = 0;
5092 int do_place_partition = 1;
5093
5094 KA_TRACE(20, ("__kmp_allocate_team: called\n"))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: called\n"
); }
;
5095 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0)if (!(new_nproc >= 1 && argc >= 0)) { __kmp_debug_assert
("new_nproc >= 1 && argc >= 0", "openmp/runtime/src/kmp_runtime.cpp"
, 5095); }
;
5096 KMP_DEBUG_ASSERT(max_nproc >= new_nproc)if (!(max_nproc >= new_nproc)) { __kmp_debug_assert("max_nproc >= new_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5096); }
;
5097 KMP_MB();
5098
5099#if KMP_NESTED_HOT_TEAMS1
5100 kmp_hot_team_ptr_t *hot_teams;
5101 if (master) {
5102 team = master->th.th_team;
5103 level = team->t.t_active_level;
5104 if (master->th.th_teams_microtask) { // in teams construct?
5105 if (master->th.th_teams_size.nteams > 1 &&
5106 ( // #teams > 1
5107 team->t.t_pkfn ==
5108 (microtask_t)__kmp_teams_master || // inner fork of the teams
5109 master->th.th_teams_level <
5110 team->t.t_level)) { // or nested parallel inside the teams
5111 ++level; // not increment if #teams==1, or for outer fork of the teams;
5112 // increment otherwise
5113 }
5114 // Do not perform the place partition if inner fork of the teams
5115 // Wait until nested parallel region encountered inside teams construct
5116 if ((master->th.th_teams_size.nteams == 1 &&
5117 master->th.th_teams_level >= team->t.t_level) ||
5118 (team->t.t_pkfn == (microtask_t)__kmp_teams_master))
5119 do_place_partition = 0;
5120 }
5121 hot_teams = master->th.th_hot_teams;
5122 if (level < __kmp_hot_teams_max_level && hot_teams &&
5123 hot_teams[level].hot_team) {
5124 // hot team has already been allocated for given level
5125 use_hot_team = 1;
5126 } else {
5127 use_hot_team = 0;
5128 }
5129 } else {
5130 // check we won't access uninitialized hot_teams, just in case
5131 KMP_DEBUG_ASSERT(new_nproc == 1)if (!(new_nproc == 1)) { __kmp_debug_assert("new_nproc == 1",
"openmp/runtime/src/kmp_runtime.cpp", 5131); }
;
5132 }
5133#endif
5134 // Optimization to use a "hot" team
5135 if (use_hot_team && new_nproc > 1) {
5136 KMP_DEBUG_ASSERT(new_nproc <= max_nproc)if (!(new_nproc <= max_nproc)) { __kmp_debug_assert("new_nproc <= max_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5136); }
;
5137#if KMP_NESTED_HOT_TEAMS1
5138 team = hot_teams[level].hot_team;
5139#else
5140 team = root->r.r_hot_team;
5141#endif
5142#if KMP_DEBUG1
5143 if (__kmp_tasking_mode != tskm_immediate_exec) {
5144 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: hot team task_team[0] = %p "
"task_team[1] = %p before reinit\n", team->t.t_task_team[
0], team->t.t_task_team[1]); }
5145 "task_team[1] = %p before reinit\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: hot team task_team[0] = %p "
"task_team[1] = %p before reinit\n", team->t.t_task_team[
0], team->t.t_task_team[1]); }
5146 team->t.t_task_team[0], team->t.t_task_team[1]))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: hot team task_team[0] = %p "
"task_team[1] = %p before reinit\n", team->t.t_task_team[
0], team->t.t_task_team[1]); }
;
5147 }
5148#endif
5149
5150 if (team->t.t_nproc != new_nproc &&
5151 __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5152 // Distributed barrier may need a resize
5153 int old_nthr = team->t.t_nproc;
5154 __kmp_resize_dist_barrier(team, old_nthr, new_nproc);
5155 }
5156
5157 // If not doing the place partition, then reset the team's proc bind
5158 // to indicate that partitioning of all threads still needs to take place
5159 if (do_place_partition == 0)
5160 team->t.t_proc_bind = proc_bind_default;
5161 // Has the number of threads changed?
5162 /* Let's assume the most common case is that the number of threads is
5163 unchanged, and put that case first. */
5164 if (team->t.t_nproc == new_nproc) { // Check changes in number of threads
5165 KA_TRACE(20, ("__kmp_allocate_team: reusing hot team\n"))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: reusing hot team\n"
); }
;
5166 // This case can mean that omp_set_num_threads() was called and the hot
5167 // team size was already reduced, so we check the special flag
5168 if (team->t.t_size_changed == -1) {
5169 team->t.t_size_changed = 1;
5170 } else {
5171 KMP_CHECK_UPDATE(team->t.t_size_changed, 0)if ((team->t.t_size_changed) != (0)) (team->t.t_size_changed
) = (0)
;
5172 }
5173
5174 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
5175 kmp_r_sched_t new_sched = new_icvs->sched;
5176 // set primary thread's schedule as new run-time schedule
5177 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched)if ((team->t.t_sched.sched) != (new_sched.sched)) (team->
t.t_sched.sched) = (new_sched.sched)
;
5178
5179 __kmp_reinitialize_team(team, new_icvs,
5180 root->r.r_uber_thread->th.th_ident);
5181
5182 KF_TRACE(10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n"
, 0, team->t.t_threads[0], team); }
5183 team->t.t_threads[0], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n"
, 0, team->t.t_threads[0], team); }
;
5184 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5185
5186#if KMP_AFFINITY_SUPPORTED1
5187 if ((team->t.t_size_changed == 0) &&
5188 (team->t.t_proc_bind == new_proc_bind)) {
5189 if (new_proc_bind == proc_bind_spread) {
5190 if (do_place_partition) {
5191 // add flag to update only master for spread
5192 __kmp_partition_places(team, 1);
5193 }
5194 }
5195 KA_TRACE(200, ("__kmp_allocate_team: reusing hot team #%d bindings: "if (kmp_a_debug >= 200) { __kmp_debug_printf ("__kmp_allocate_team: reusing hot team #%d bindings: "
"proc_bind = %d, partition = [%d,%d]\n", team->t.t_id, new_proc_bind
, team->t.t_first_place, team->t.t_last_place); }
5196 "proc_bind = %d, partition = [%d,%d]\n",if (kmp_a_debug >= 200) { __kmp_debug_printf ("__kmp_allocate_team: reusing hot team #%d bindings: "
"proc_bind = %d, partition = [%d,%d]\n", team->t.t_id, new_proc_bind
, team->t.t_first_place, team->t.t_last_place); }
5197 team->t.t_id, new_proc_bind, team->t.t_first_place,if (kmp_a_debug >= 200) { __kmp_debug_printf ("__kmp_allocate_team: reusing hot team #%d bindings: "
"proc_bind = %d, partition = [%d,%d]\n", team->t.t_id, new_proc_bind
, team->t.t_first_place, team->t.t_last_place); }
5198 team->t.t_last_place))if (kmp_a_debug >= 200) { __kmp_debug_printf ("__kmp_allocate_team: reusing hot team #%d bindings: "
"proc_bind = %d, partition = [%d,%d]\n", team->t.t_id, new_proc_bind
, team->t.t_first_place, team->t.t_last_place); }
;
5199 } else {
5200 if (do_place_partition) {
5201 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind)if ((team->t.t_proc_bind) != (new_proc_bind)) (team->t.
t_proc_bind) = (new_proc_bind)
;
5202 __kmp_partition_places(team);
5203 }
5204 }
5205#else
5206 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind)if ((team->t.t_proc_bind) != (new_proc_bind)) (team->t.
t_proc_bind) = (new_proc_bind)
;
5207#endif /* KMP_AFFINITY_SUPPORTED */
5208 } else if (team->t.t_nproc > new_nproc) {
5209 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: decreasing hot team thread count to %d\n"
, new_nproc); }
5210 ("__kmp_allocate_team: decreasing hot team thread count to %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: decreasing hot team thread count to %d\n"
, new_nproc); }
5211 new_nproc))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: decreasing hot team thread count to %d\n"
, new_nproc); }
;
5212
5213 team->t.t_size_changed = 1;
5214 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5215 // Barrier size already reduced earlier in this function
5216 // Activate team threads via th_used_in_team
5217 __kmp_add_threads_to_team(team, new_nproc);
5218 }
5219#if KMP_NESTED_HOT_TEAMS1
5220 if (__kmp_hot_teams_mode == 0) {
5221 // AC: saved number of threads should correspond to team's value in this
5222 // mode, can be bigger in mode 1, when hot team has threads in reserve
5223 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc)if (!(hot_teams[level].hot_team_nth == team->t.t_nproc)) {
__kmp_debug_assert("hot_teams[level].hot_team_nth == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5223); }
;
5224 hot_teams[level].hot_team_nth = new_nproc;
5225#endif // KMP_NESTED_HOT_TEAMS
5226 /* release the extra threads we don't need any more */
5227 for (f = new_nproc; f < team->t.t_nproc; f++) {
5228 KMP_DEBUG_ASSERT(team->t.t_threads[f])if (!(team->t.t_threads[f])) { __kmp_debug_assert("team->t.t_threads[f]"
, "openmp/runtime/src/kmp_runtime.cpp", 5228); }
;
5229 if (__kmp_tasking_mode != tskm_immediate_exec) {
5230 // When decreasing team size, threads no longer in the team should
5231 // unref task team.
5232 team->t.t_threads[f]->th.th_task_team = NULL__null;
5233 }
5234 __kmp_free_thread(team->t.t_threads[f]);
5235 team->t.t_threads[f] = NULL__null;
5236 }
5237#if KMP_NESTED_HOT_TEAMS1
5238 } // (__kmp_hot_teams_mode == 0)
5239 else {
5240 // When keeping extra threads in team, switch threads to wait on own
5241 // b_go flag
5242 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5243 KMP_DEBUG_ASSERT(team->t.t_threads[f])if (!(team->t.t_threads[f])) { __kmp_debug_assert("team->t.t_threads[f]"
, "openmp/runtime/src/kmp_runtime.cpp", 5243); }
;
5244 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5245 for (int b = 0; b < bs_last_barrier; ++b) {
5246 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG2) {
5247 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG3;
5248 }
5249 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0)if ((balign[b].bb.leaf_kids) != (0)) (balign[b].bb.leaf_kids)
= (0)
;
5250 }
5251 }
5252 }
5253#endif // KMP_NESTED_HOT_TEAMS
5254 team->t.t_nproc = new_nproc;
5255 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
5256 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched)if ((team->t.t_sched.sched) != (new_icvs->sched.sched))
(team->t.t_sched.sched) = (new_icvs->sched.sched)
;
5257 __kmp_reinitialize_team(team, new_icvs,
5258 root->r.r_uber_thread->th.th_ident);
5259
5260 // Update remaining threads
5261 for (f = 0; f < new_nproc; ++f) {
5262 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5263 }
5264
5265 // restore the current task state of the primary thread: should be the
5266 // implicit task
5267 KF_TRACE(10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n"
, 0, team->t.t_threads[0], team); }
5268 team->t.t_threads[0], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n"
, 0, team->t.t_threads[0], team); }
;
5269
5270 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5271
5272#ifdef KMP_DEBUG1
5273 for (f = 0; f < team->t.t_nproc; f++) {
5274 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&if (!(team->t.t_threads[f] && team->t.t_threads
[f]->th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("team->t.t_threads[f] && team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5276); }
5275 team->t.t_threads[f]->th.th_team_nproc ==if (!(team->t.t_threads[f] && team->t.t_threads
[f]->th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("team->t.t_threads[f] && team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5276); }
5276 team->t.t_nproc)if (!(team->t.t_threads[f] && team->t.t_threads
[f]->th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("team->t.t_threads[f] && team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5276); }
;
5277 }
5278#endif
5279
5280 if (do_place_partition) {
5281 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind)if ((team->t.t_proc_bind) != (new_proc_bind)) (team->t.
t_proc_bind) = (new_proc_bind)
;
5282#if KMP_AFFINITY_SUPPORTED1
5283 __kmp_partition_places(team);
5284#endif
5285 }
5286 } else { // team->t.t_nproc < new_nproc
5287#if (KMP_OS_LINUX1 || KMP_OS_FREEBSD0) && KMP_AFFINITY_SUPPORTED1
5288 kmp_affin_mask_t *old_mask;
5289 if (KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {
5290 KMP_CPU_ALLOC(old_mask)(old_mask = __kmp_affinity_dispatch->allocate_mask());
5291 }
5292#endif
5293
5294 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: increasing hot team thread count to %d\n"
, new_nproc); }
5295 ("__kmp_allocate_team: increasing hot team thread count to %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: increasing hot team thread count to %d\n"
, new_nproc); }
5296 new_nproc))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: increasing hot team thread count to %d\n"
, new_nproc); }
;
5297 int old_nproc = team->t.t_nproc; // save old value and use to update only
5298 team->t.t_size_changed = 1;
5299
5300#if KMP_NESTED_HOT_TEAMS1
5301 int avail_threads = hot_teams[level].hot_team_nth;
5302 if (new_nproc < avail_threads)
5303 avail_threads = new_nproc;
5304 kmp_info_t **other_threads = team->t.t_threads;
5305 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5306 // Adjust barrier data of reserved threads (if any) of the team
5307 // Other data will be set in __kmp_initialize_info() below.
5308 int b;
5309 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5310 for (b = 0; b < bs_last_barrier; ++b) {
5311 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5312 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG)if (!(balign[b].bb.wait_flag != 2)) { __kmp_debug_assert("balign[b].bb.wait_flag != 2"
, "openmp/runtime/src/kmp_runtime.cpp", 5312); }
;
5313#if USE_DEBUGGER0
5314 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5315#endif
5316 }
5317 }
5318 if (hot_teams[level].hot_team_nth >= new_nproc) {
5319 // we have all needed threads in reserve, no need to allocate any
5320 // this only possible in mode 1, cannot have reserved threads in mode 0
5321 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1)if (!(__kmp_hot_teams_mode == 1)) { __kmp_debug_assert("__kmp_hot_teams_mode == 1"
, "openmp/runtime/src/kmp_runtime.cpp", 5321); }
;
5322 team->t.t_nproc = new_nproc; // just get reserved threads involved
5323 } else {
5324 // We may have some threads in reserve, but not enough;
5325 // get reserved threads involved if any.
5326 team->t.t_nproc = hot_teams[level].hot_team_nth;
5327 hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size
5328#endif // KMP_NESTED_HOT_TEAMS
5329 if (team->t.t_max_nproc < new_nproc) {
5330 /* reallocate larger arrays */
5331 __kmp_reallocate_team_arrays(team, new_nproc);
5332 __kmp_reinitialize_team(team, new_icvs, NULL__null);
5333 }
5334
5335#if (KMP_OS_LINUX1 || KMP_OS_FREEBSD0) && KMP_AFFINITY_SUPPORTED1
5336 /* Temporarily set full mask for primary thread before creation of
5337 workers. The reason is that workers inherit the affinity from the
5338 primary thread, so if a lot of workers are created on the single
5339 core quickly, they don't get a chance to set their own affinity for
5340 a long time. */
5341 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
5342#endif
5343
5344 /* allocate new threads for the hot team */
5345 for (f = team->t.t_nproc; f < new_nproc; f++) {
5346 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5347 KMP_DEBUG_ASSERT(new_worker)if (!(new_worker)) { __kmp_debug_assert("new_worker", "openmp/runtime/src/kmp_runtime.cpp"
, 5347); }
;
5348 team->t.t_threads[f] = new_worker;
5349
5350 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init T#%d arrived: "
"join=%llu, plain=%llu\n", team->t.t_id, __kmp_gtid_from_tid
(f, team), team->t.t_id, f, team->t.t_bar[bs_forkjoin_barrier
].b_arrived, team->t.t_bar[bs_plain_barrier].b_arrived); }
5351 ("__kmp_allocate_team: team %d init T#%d arrived: "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init T#%d arrived: "
"join=%llu, plain=%llu\n", team->t.t_id, __kmp_gtid_from_tid
(f, team), team->t.t_id, f, team->t.t_bar[bs_forkjoin_barrier
].b_arrived, team->t.t_bar[bs_plain_barrier].b_arrived); }
5352 "join=%llu, plain=%llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init T#%d arrived: "
"join=%llu, plain=%llu\n", team->t.t_id, __kmp_gtid_from_tid
(f, team), team->t.t_id, f, team->t.t_bar[bs_forkjoin_barrier
].b_arrived, team->t.t_bar[bs_plain_barrier].b_arrived); }
5353 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init T#%d arrived: "
"join=%llu, plain=%llu\n", team->t.t_id, __kmp_gtid_from_tid
(f, team), team->t.t_id, f, team->t.t_bar[bs_forkjoin_barrier
].b_arrived, team->t.t_bar[bs_plain_barrier].b_arrived); }
5354 team->t.t_bar[bs_forkjoin_barrier].b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init T#%d arrived: "
"join=%llu, plain=%llu\n", team->t.t_id, __kmp_gtid_from_tid
(f, team), team->t.t_id, f, team->t.t_bar[bs_forkjoin_barrier
].b_arrived, team->t.t_bar[bs_plain_barrier].b_arrived); }
5355 team->t.t_bar[bs_plain_barrier].b_arrived))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init T#%d arrived: "
"join=%llu, plain=%llu\n", team->t.t_id, __kmp_gtid_from_tid
(f, team), team->t.t_id, f, team->t.t_bar[bs_forkjoin_barrier
].b_arrived, team->t.t_bar[bs_plain_barrier].b_arrived); }
;
5356
5357 { // Initialize barrier data for new threads.
5358 int b;
5359 kmp_balign_t *balign = new_worker->th.th_bar;
5360 for (b = 0; b < bs_last_barrier; ++b) {
5361 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5362 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=if (!(balign[b].bb.wait_flag != 2)) { __kmp_debug_assert("balign[b].bb.wait_flag != 2"
, "openmp/runtime/src/kmp_runtime.cpp", 5363); }
5363 KMP_BARRIER_PARENT_FLAG)if (!(balign[b].bb.wait_flag != 2)) { __kmp_debug_assert("balign[b].bb.wait_flag != 2"
, "openmp/runtime/src/kmp_runtime.cpp", 5363); }
;
5364#if USE_DEBUGGER0
5365 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5366#endif
5367 }
5368 }
5369 }
5370
5371#if (KMP_OS_LINUX1 || KMP_OS_FREEBSD0) && KMP_AFFINITY_SUPPORTED1
5372 if (KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {
5373 /* Restore initial primary thread's affinity mask */
5374 __kmp_set_system_affinity(old_mask, TRUE)(old_mask)->set_system_affinity((!0));
5375 KMP_CPU_FREE(old_mask)__kmp_affinity_dispatch->deallocate_mask(old_mask);
5376 }
5377#endif
5378#if KMP_NESTED_HOT_TEAMS1
5379 } // end of check of t_nproc vs. new_nproc vs. hot_team_nth
5380#endif // KMP_NESTED_HOT_TEAMS
5381 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5382 // Barrier size already increased earlier in this function
5383 // Activate team threads via th_used_in_team
5384 __kmp_add_threads_to_team(team, new_nproc);
5385 }
5386 /* make sure everyone is syncronized */
5387 // new threads below
5388 __kmp_initialize_team(team, new_nproc, new_icvs,
5389 root->r.r_uber_thread->th.th_ident);
5390
5391 /* reinitialize the threads */
5392 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc)if (!(team->t.t_nproc == new_nproc)) { __kmp_debug_assert(
"team->t.t_nproc == new_nproc", "openmp/runtime/src/kmp_runtime.cpp"
, 5392); }
;
5393 for (f = 0; f < team->t.t_nproc; ++f)
5394 __kmp_initialize_info(team->t.t_threads[f], team, f,
5395 __kmp_gtid_from_tid(f, team));
5396
5397 if (level) { // set th_task_state for new threads in nested hot team
5398 // __kmp_initialize_info() no longer zeroes th_task_state, so we should
5399 // only need to set the th_task_state for the new threads. th_task_state
5400 // for primary thread will not be accurate until after this in
5401 // __kmp_fork_call(), so we look to the primary thread's memo_stack to
5402 // get the correct value.
5403 for (f = old_nproc; f < team->t.t_nproc; ++f)
5404 team->t.t_threads[f]->th.th_task_state =
5405 team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5406 } else { // set th_task_state for new threads in non-nested hot team
5407 // copy primary thread's state
5408 kmp_uint8 old_state = team->t.t_threads[0]->th.th_task_state;
5409 for (f = old_nproc; f < team->t.t_nproc; ++f)
5410 team->t.t_threads[f]->th.th_task_state = old_state;
5411 }
5412
5413#ifdef KMP_DEBUG1
5414 for (f = 0; f < team->t.t_nproc; ++f) {
5415 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&if (!(team->t.t_threads[f] && team->t.t_threads
[f]->th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("team->t.t_threads[f] && team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5417); }
5416 team->t.t_threads[f]->th.th_team_nproc ==if (!(team->t.t_threads[f] && team->t.t_threads
[f]->th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("team->t.t_threads[f] && team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5417); }
5417 team->t.t_nproc)if (!(team->t.t_threads[f] && team->t.t_threads
[f]->th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("team->t.t_threads[f] && team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5417); }
;
5418 }
5419#endif
5420
5421 if (do_place_partition) {
5422 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind)if ((team->t.t_proc_bind) != (new_proc_bind)) (team->t.
t_proc_bind) = (new_proc_bind)
;
5423#if KMP_AFFINITY_SUPPORTED1
5424 __kmp_partition_places(team);
5425#endif
5426 }
5427 } // Check changes in number of threads
5428
5429 kmp_info_t *master = team->t.t_threads[0];
5430 if (master->th.th_teams_microtask) {
5431 for (f = 1; f < new_nproc; ++f) {
5432 // propagate teams construct specific info to workers
5433 kmp_info_t *thr = team->t.t_threads[f];
5434 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5435 thr->th.th_teams_level = master->th.th_teams_level;
5436 thr->th.th_teams_size = master->th.th_teams_size;
5437 }
5438 }
5439#if KMP_NESTED_HOT_TEAMS1
5440 if (level) {
5441 // Sync barrier state for nested hot teams, not needed for outermost hot
5442 // team.
5443 for (f = 1; f < new_nproc; ++f) {
5444 kmp_info_t *thr = team->t.t_threads[f];
5445 int b;
5446 kmp_balign_t *balign = thr->th.th_bar;
5447 for (b = 0; b < bs_last_barrier; ++b) {
5448 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5449 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG)if (!(balign[b].bb.wait_flag != 2)) { __kmp_debug_assert("balign[b].bb.wait_flag != 2"
, "openmp/runtime/src/kmp_runtime.cpp", 5449); }
;
5450#if USE_DEBUGGER0
5451 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5452#endif
5453 }
5454 }
5455 }
5456#endif // KMP_NESTED_HOT_TEAMS
5457
5458 /* reallocate space for arguments if necessary */
5459 __kmp_alloc_argv_entries(argc, team, TRUE(!0));
5460 KMP_CHECK_UPDATE(team->t.t_argc, argc)if ((team->t.t_argc) != (argc)) (team->t.t_argc) = (argc
)
;
5461 // The hot team re-uses the previous task team,
5462 // if untouched during the previous release->gather phase.
5463
5464 KF_TRACE(10, (" hot_team = %p\n", team))if (kmp_f_debug >= 10) { __kmp_debug_printf (" hot_team = %p\n"
, team); }
;
5465
5466#if KMP_DEBUG1
5467 if (__kmp_tasking_mode != tskm_immediate_exec) {
5468 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: hot team task_team[0] = %p "
"task_team[1] = %p after reinit\n", team->t.t_task_team[0
], team->t.t_task_team[1]); }
5469 "task_team[1] = %p after reinit\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: hot team task_team[0] = %p "
"task_team[1] = %p after reinit\n", team->t.t_task_team[0
], team->t.t_task_team[1]); }
5470 team->t.t_task_team[0], team->t.t_task_team[1]))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: hot team task_team[0] = %p "
"task_team[1] = %p after reinit\n", team->t.t_task_team[0
], team->t.t_task_team[1]); }
;
5471 }
5472#endif
5473
5474#if OMPT_SUPPORT1
5475 __ompt_team_assign_id(team, ompt_parallel_data);
5476#endif
5477
5478 KMP_MB();
5479
5480 return team;
5481 }
5482
5483 /* next, let's try to take one from the team pool */
5484 KMP_MB();
5485 for (team = CCAST(kmp_team_t *, __kmp_team_pool)const_cast<kmp_team_t *>(__kmp_team_pool); (team);) {
5486 /* TODO: consider resizing undersized teams instead of reaping them, now
5487 that we have a resizing mechanism */
5488 if (team->t.t_max_nproc >= max_nproc) {
5489 /* take this team from the team pool */
5490 __kmp_team_pool = team->t.t_next_pool;
5491
5492 if (max_nproc > 1 &&
5493 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5494 if (!team->t.b) { // Allocate barrier structure
5495 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5496 }
5497 }
5498
5499 /* setup the team for fresh use */
5500 __kmp_initialize_team(team, new_nproc, new_icvs, NULL__null);
5501
5502 KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: setting task_team[0] %p and "
"task_team[1] %p to NULL\n", &team->t.t_task_team[0],
&team->t.t_task_team[1]); }
5503 "task_team[1] %p to NULL\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: setting task_team[0] %p and "
"task_team[1] %p to NULL\n", &team->t.t_task_team[0],
&team->t.t_task_team[1]); }
5504 &team->t.t_task_team[0], &team->t.t_task_team[1]))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: setting task_team[0] %p and "
"task_team[1] %p to NULL\n", &team->t.t_task_team[0],
&team->t.t_task_team[1]); }
;
5505 team->t.t_task_team[0] = NULL__null;
5506 team->t.t_task_team[1] = NULL__null;
5507
5508 /* reallocate space for arguments if necessary */
5509 __kmp_alloc_argv_entries(argc, team, TRUE(!0));
5510 KMP_CHECK_UPDATE(team->t.t_argc, argc)if ((team->t.t_argc) != (argc)) (team->t.t_argc) = (argc
)
;
5511
5512 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n"
, team->t.t_id, 0, 0); }
5513 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n"
, team->t.t_id, 0, 0); }
5514 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n"
, team->t.t_id, 0, 0); }
;
5515 { // Initialize barrier data.
5516 int b;
5517 for (b = 0; b < bs_last_barrier; ++b) {
5518 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE0;
5519#if USE_DEBUGGER0
5520 team->t.t_bar[b].b_master_arrived = 0;
5521 team->t.t_bar[b].b_team_arrived = 0;
5522#endif
5523 }
5524 }
5525
5526 team->t.t_proc_bind = new_proc_bind;
5527
5528 KA_TRACE(20, ("__kmp_allocate_team: using team from pool %d.\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: using team from pool %d.\n"
, team->t.t_id); }
5529 team->t.t_id))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: using team from pool %d.\n"
, team->t.t_id); }
;
5530
5531#if OMPT_SUPPORT1
5532 __ompt_team_assign_id(team, ompt_parallel_data);
5533#endif
5534
5535 KMP_MB();
5536
5537 return team;
5538 }
5539
5540 /* reap team if it is too small, then loop back and check the next one */
5541 // not sure if this is wise, but, will be redone during the hot-teams
5542 // rewrite.
5543 /* TODO: Use technique to find the right size hot-team, don't reap them */
5544 team = __kmp_reap_team(team);
5545 __kmp_team_pool = team;
5546 }
5547
5548 /* nothing available in the pool, no matter, make a new team! */
5549 KMP_MB();
5550 team = (kmp_team_t *)__kmp_allocate(sizeof(kmp_team_t))___kmp_allocate((sizeof(kmp_team_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 5550)
;
5551
5552 /* and set it up */
5553 team->t.t_max_nproc = max_nproc;
5554 if (max_nproc > 1 &&
5555 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5556 // Allocate barrier structure
5557 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5558 }
5559
5560 /* NOTE well, for some reason allocating one big buffer and dividing it up
5561 seems to really hurt performance a lot on the P4, so, let's not use this */
5562 __kmp_allocate_team_arrays(team, max_nproc);
5563
5564 KA_TRACE(20, ("__kmp_allocate_team: making a new team\n"))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: making a new team\n"
); }
;
5565 __kmp_initialize_team(team, new_nproc, new_icvs, NULL__null);
5566
5567 KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
"%p to NULL\n", &team->t.t_task_team[0], &team->
t.t_task_team[1]); }
5568 "%p to NULL\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
"%p to NULL\n", &team->t.t_task_team[0], &team->
t.t_task_team[1]); }
5569 &team->t.t_task_team[0], &team->t.t_task_team[1]))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
"%p to NULL\n", &team->t.t_task_team[0], &team->
t.t_task_team[1]); }
;
5570 team->t.t_task_team[0] = NULL__null; // to be removed, as __kmp_allocate zeroes
5571 // memory, no need to duplicate
5572 team->t.t_task_team[1] = NULL__null; // to be removed, as __kmp_allocate zeroes
5573 // memory, no need to duplicate
5574
5575 if (__kmp_storage_map) {
5576 __kmp_print_team_storage_map("team", team, team->t.t_id, new_nproc);
5577 }
5578
5579 /* allocate space for arguments */
5580 __kmp_alloc_argv_entries(argc, team, FALSE0);
5581 team->t.t_argc = argc;
5582
5583 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n"
, team->t.t_id, 0, 0); }
5584 ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n"
, team->t.t_id, 0, 0); }
5585 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n"
, team->t.t_id, 0, 0); }
;
5586 { // Initialize barrier data.
5587 int b;
5588 for (b = 0; b < bs_last_barrier; ++b) {
5589 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE0;
5590#if USE_DEBUGGER0
5591 team->t.t_bar[b].b_master_arrived = 0;
5592 team->t.t_bar[b].b_team_arrived = 0;
5593#endif
5594 }
5595 }
5596
5597 team->t.t_proc_bind = new_proc_bind;
5598
5599#if OMPT_SUPPORT1
5600 __ompt_team_assign_id(team, ompt_parallel_data);
5601 team->t.ompt_serialized_team_info = NULL__null;
5602#endif
5603
5604 KMP_MB();
5605
5606 KA_TRACE(20, ("__kmp_allocate_team: done creating a new team %d.\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: done creating a new team %d.\n"
, team->t.t_id); }
5607 team->t.t_id))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: done creating a new team %d.\n"
, team->t.t_id); }
;
5608
5609 return team;
5610}
5611
5612/* TODO implement hot-teams at all levels */
5613/* TODO implement lazy thread release on demand (disband request) */
5614
5615/* free the team. return it to the team pool. release all the threads
5616 * associated with it */
5617void __kmp_free_team(kmp_root_t *root,
5618 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master), kmp_info_t *master) {
5619 int f;
5620 KA_TRACE(20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_team: T#%d freeing team %d\n"
, __kmp_get_global_thread_id(), team->t.t_id); }
5621 team->t.t_id))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_team: T#%d freeing team %d\n"
, __kmp_get_global_thread_id(), team->t.t_id); }
;
5622
5623 /* verify state */
5624 KMP_DEBUG_ASSERT(root)if (!(root)) { __kmp_debug_assert("root", "openmp/runtime/src/kmp_runtime.cpp"
, 5624); }
;
5625 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 5625); }
;
5626 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc)if (!(team->t.t_nproc <= team->t.t_max_nproc)) { __kmp_debug_assert
("team->t.t_nproc <= team->t.t_max_nproc", "openmp/runtime/src/kmp_runtime.cpp"
, 5626); }
;
5627 KMP_DEBUG_ASSERT(team->t.t_threads)if (!(team->t.t_threads)) { __kmp_debug_assert("team->t.t_threads"
, "openmp/runtime/src/kmp_runtime.cpp", 5627); }
;
5628
5629 int use_hot_team = team == root->r.r_hot_team;
5630#if KMP_NESTED_HOT_TEAMS1
5631 int level;
5632 if (master) {
5633 level = team->t.t_active_level - 1;
5634 if (master->th.th_teams_microtask) { // in teams construct?
5635 if (master->th.th_teams_size.nteams > 1) {
5636 ++level; // level was not increased in teams construct for
5637 // team_of_masters
5638 }
5639 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5640 master->th.th_teams_level == team->t.t_level) {
5641 ++level; // level was not increased in teams construct for
5642 // team_of_workers before the parallel
5643 } // team->t.t_level will be increased inside parallel
5644 }
5645#if KMP_DEBUG1
5646 kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;
5647#endif
5648 if (level < __kmp_hot_teams_max_level) {
5649 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team)if (!(team == hot_teams[level].hot_team)) { __kmp_debug_assert
("team == hot_teams[level].hot_team", "openmp/runtime/src/kmp_runtime.cpp"
, 5649); }
;
5650 use_hot_team = 1;
5651 }
5652 }
5653#endif // KMP_NESTED_HOT_TEAMS
5654
5655 /* team is done working */
5656 TCW_SYNC_PTR(team->t.t_pkfn,((team->t.t_pkfn)) = ((__null))
5657 NULL)((team->t.t_pkfn)) = ((__null)); // Important for Debugging Support Library.
5658#if KMP_OS_WINDOWS0
5659 team->t.t_copyin_counter = 0; // init counter for possible reuse
5660#endif
5661 // Do not reset pointer to parent team to NULL for hot teams.
5662
5663 /* if we are non-hot team, release our threads */
5664 if (!use_hot_team) {
5665 if (__kmp_tasking_mode != tskm_immediate_exec) {
5666 // Wait for threads to reach reapable state
5667 for (f = 1; f < team->t.t_nproc; ++f) {
5668 KMP_DEBUG_ASSERT(team->t.t_threads[f])if (!(team->t.t_threads[f])) { __kmp_debug_assert("team->t.t_threads[f]"
, "openmp/runtime/src/kmp_runtime.cpp", 5668); }
;
5669 kmp_info_t *th = team->t.t_threads[f];
5670 volatile kmp_uint32 *state = &th->th.th_reap_state;
5671 while (*state != KMP_SAFE_TO_REAP1) {
5672#if KMP_OS_WINDOWS0
5673 // On Windows a thread can be killed at any time, check this
5674 DWORD ecode;
5675 if (!__kmp_is_thread_alive(th, &ecode)) {
5676 *state = KMP_SAFE_TO_REAP1; // reset the flag for dead thread
5677 break;
5678 }
5679#endif
5680 // first check if thread is sleeping
5681 kmp_flag_64<> fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5682 if (fl.is_sleeping())
5683 fl.resume(__kmp_gtid_from_thread(th));
5684 KMP_CPU_PAUSE()__kmp_x86_pause();
5685 }
5686 }
5687
5688 // Delete task teams
5689 int tt_idx;
5690 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5691 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5692 if (task_team != NULL__null) {
5693 for (f = 0; f < team->t.t_nproc; ++f) { // threads unref task teams
5694 KMP_DEBUG_ASSERT(team->t.t_threads[f])if (!(team->t.t_threads[f])) { __kmp_debug_assert("team->t.t_threads[f]"
, "openmp/runtime/src/kmp_runtime.cpp", 5694); }
;
5695 team->t.t_threads[f]->th.th_task_team = NULL__null;
5696 }
5697 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n"
, __kmp_get_global_thread_id(), task_team, team->t.t_id); }
5698 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n"
, __kmp_get_global_thread_id(), task_team, team->t.t_id); }
5699 ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n"
, __kmp_get_global_thread_id(), task_team, team->t.t_id); }
5700 __kmp_get_gtid(), task_team, team->t.t_id))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n"
, __kmp_get_global_thread_id(), task_team, team->t.t_id); }
;
5701#if KMP_NESTED_HOT_TEAMS1
5702 __kmp_free_task_team(master, task_team);
5703#endif
5704 team->t.t_task_team[tt_idx] = NULL__null;
5705 }
5706 }
5707 }
5708
5709 // Reset pointer to parent team only for non-hot teams.
5710 team->t.t_parent = NULL__null;
5711 team->t.t_level = 0;
5712 team->t.t_active_level = 0;
5713
5714 /* free the worker threads */
5715 for (f = 1; f < team->t.t_nproc; ++f) {
5716 KMP_DEBUG_ASSERT(team->t.t_threads[f])if (!(team->t.t_threads[f])) { __kmp_debug_assert("team->t.t_threads[f]"
, "openmp/runtime/src/kmp_runtime.cpp", 5716); }
;
5717 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5718 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team),__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(team
->t.t_threads[f]->th.th_used_in_team)), (kmp_uint32)(1)
, (kmp_uint32)(2))
5719 1, 2)__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(team
->t.t_threads[f]->th.th_used_in_team)), (kmp_uint32)(1)
, (kmp_uint32)(2))
;
5720 }
5721 __kmp_free_thread(team->t.t_threads[f]);
5722 }
5723
5724 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5725 if (team->t.b) {
5726 // wake up thread at old location
5727 team->t.b->go_release();
5728 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) {
5729 for (f = 1; f < team->t.t_nproc; ++f) {
5730 if (team->t.b->sleep[f].sleep) {
5731 __kmp_atomic_resume_64(
5732 team->t.t_threads[f]->th.th_info.ds.ds_gtid,
5733 (kmp_atomic_flag_64<> *)NULL__null);
5734 }
5735 }
5736 }
5737 // Wait for threads to be removed from team
5738 for (int f = 1; f < team->t.t_nproc; ++f) {
5739 while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
5740 KMP_CPU_PAUSE()__kmp_x86_pause();
5741 }
5742 }
5743 }
5744
5745 for (f = 1; f < team->t.t_nproc; ++f) {
5746 team->t.t_threads[f] = NULL__null;
5747 }
5748
5749 if (team->t.t_max_nproc > 1 &&
5750 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5751 distributedBarrier::deallocate(team->t.b);
5752 team->t.b = NULL__null;
5753 }
5754 /* put the team back in the team pool */
5755 /* TODO limit size of team pool, call reap_team if pool too large */
5756 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool)const_cast<kmp_team_t *>(__kmp_team_pool);
5757 __kmp_team_pool = (volatile kmp_team_t *)team;
5758 } else { // Check if team was created for primary threads in teams construct
5759 // See if first worker is a CG root
5760 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&if (!(team->t.t_threads[1] && team->t.t_threads
[1]->th.th_cg_roots)) { __kmp_debug_assert("team->t.t_threads[1] && team->t.t_threads[1]->th.th_cg_roots"
, "openmp/runtime/src/kmp_runtime.cpp", 5761); }
5761 team->t.t_threads[1]->th.th_cg_roots)if (!(team->t.t_threads[1] && team->t.t_threads
[1]->th.th_cg_roots)) { __kmp_debug_assert("team->t.t_threads[1] && team->t.t_threads[1]->th.th_cg_roots"
, "openmp/runtime/src/kmp_runtime.cpp", 5761); }
;
5762 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5763 // Clean up the CG root nodes on workers so that this team can be re-used
5764 for (f = 1; f < team->t.t_nproc; ++f) {
5765 kmp_info_t *thr = team->t.t_threads[f];
5766 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&if (!(thr && thr->th.th_cg_roots && thr->
th.th_cg_roots->cg_root == thr)) { __kmp_debug_assert("thr && thr->th.th_cg_roots && thr->th.th_cg_roots->cg_root == thr"
, "openmp/runtime/src/kmp_runtime.cpp", 5767); }
5767 thr->th.th_cg_roots->cg_root == thr)if (!(thr && thr->th.th_cg_roots && thr->
th.th_cg_roots->cg_root == thr)) { __kmp_debug_assert("thr && thr->th.th_cg_roots && thr->th.th_cg_roots->cg_root == thr"
, "openmp/runtime/src/kmp_runtime.cpp", 5767); }
;
5768 // Pop current CG root off list
5769 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5770 thr->th.th_cg_roots = tmp->up;
5771 KA_TRACE(100, ("__kmp_free_team: Thread %p popping node %p and moving"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_team: Thread %p popping node %p and moving"
" up to node %p. cg_nthreads was %d\n", thr, tmp, thr->th
.th_cg_roots, tmp->cg_nthreads); }
5772 " up to node %p. cg_nthreads was %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_team: Thread %p popping node %p and moving"
" up to node %p. cg_nthreads was %d\n", thr, tmp, thr->th
.th_cg_roots, tmp->cg_nthreads); }
5773 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_team: Thread %p popping node %p and moving"
" up to node %p. cg_nthreads was %d\n", thr, tmp, thr->th
.th_cg_roots, tmp->cg_nthreads); }
;
5774 int i = tmp->cg_nthreads--;
5775 if (i == 1) {
5776 __kmp_free(tmp)___kmp_free((tmp), "openmp/runtime/src/kmp_runtime.cpp", 5776
)
; // free CG if we are the last thread in it
5777 }
5778 // Restore current task's thread_limit from CG root
5779 if (thr->th.th_cg_roots)
5780 thr->th.th_current_task->td_icvs.thread_limit =
5781 thr->th.th_cg_roots->cg_thread_limit;
5782 }
5783 }
5784 }
5785
5786 KMP_MB();
5787}
5788
5789/* reap the team. destroy it, reclaim all its resources and free its memory */
5790kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5791 kmp_team_t *next_pool = team->t.t_next_pool;
5792
5793 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 5793); }
;
5794 KMP_DEBUG_ASSERT(team->t.t_dispatch)if (!(team->t.t_dispatch)) { __kmp_debug_assert("team->t.t_dispatch"
, "openmp/runtime/src/kmp_runtime.cpp", 5794); }
;
5795 KMP_DEBUG_ASSERT(team->t.t_disp_buffer)if (!(team->t.t_disp_buffer)) { __kmp_debug_assert("team->t.t_disp_buffer"
, "openmp/runtime/src/kmp_runtime.cpp", 5795); }
;
5796 KMP_DEBUG_ASSERT(team->t.t_threads)if (!(team->t.t_threads)) { __kmp_debug_assert("team->t.t_threads"
, "openmp/runtime/src/kmp_runtime.cpp", 5796); }
;
5797 KMP_DEBUG_ASSERT(team->t.t_argv)if (!(team->t.t_argv)) { __kmp_debug_assert("team->t.t_argv"
, "openmp/runtime/src/kmp_runtime.cpp", 5797); }
;
5798
5799 /* TODO clean the threads that are a part of this? */
5800
5801 /* free stuff */
5802 __kmp_free_team_arrays(team);
5803 if (team->t.t_argv != &team->t.t_inline_argv[0])
5804 __kmp_free((void *)team->t.t_argv)___kmp_free(((void *)team->t.t_argv), "openmp/runtime/src/kmp_runtime.cpp"
, 5804)
;
5805 __kmp_free(team)___kmp_free((team), "openmp/runtime/src/kmp_runtime.cpp", 5805
)
;
5806
5807 KMP_MB();
5808 return next_pool;
5809}
5810
5811// Free the thread. Don't reap it, just place it on the pool of available
5812// threads.
5813//
5814// Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid
5815// binding for the affinity mechanism to be useful.
5816//
5817// Now, we always keep the free list (__kmp_thread_pool) sorted by gtid.
5818// However, we want to avoid a potential performance problem by always
5819// scanning through the list to find the correct point at which to insert
5820// the thread (potential N**2 behavior). To do this we keep track of the
5821// last place a thread struct was inserted (__kmp_thread_pool_insert_pt).
5822// With single-level parallelism, threads will always be added to the tail
5823// of the list, kept track of by __kmp_thread_pool_insert_pt. With nested
5824// parallelism, all bets are off and we may need to scan through the entire
5825// free list.
5826//
5827// This change also has a potentially large performance benefit, for some
5828// applications. Previously, as threads were freed from the hot team, they
5829// would be placed back on the free list in inverse order. If the hot team
5830// grew back to it's original size, then the freed thread would be placed
5831// back on the hot team in reverse order. This could cause bad cache
5832// locality problems on programs where the size of the hot team regularly
5833// grew and shrunk.
5834//
5835// Now, for single-level parallelism, the OMP tid is always == gtid.
5836void __kmp_free_thread(kmp_info_t *this_th) {
5837 int gtid;
5838 kmp_info_t **scan;
5839
5840 KA_TRACE(20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n"
, __kmp_get_global_thread_id(), this_th->th.th_info.ds.ds_gtid
); }
5841 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n"
, __kmp_get_global_thread_id(), this_th->th.th_info.ds.ds_gtid
); }
;
5842
5843 KMP_DEBUG_ASSERT(this_th)if (!(this_th)) { __kmp_debug_assert("this_th", "openmp/runtime/src/kmp_runtime.cpp"
, 5843); }
;
5844
5845 // When moving thread to pool, switch thread to wait on own b_go flag, and
5846 // uninitialized (NULL team).
5847 int b;
5848 kmp_balign_t *balign = this_th->th.th_bar;
5849 for (b = 0; b < bs_last_barrier; ++b) {
5850 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG2)
5851 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG3;
5852 balign[b].bb.team = NULL__null;
5853 balign[b].bb.leaf_kids = 0;
5854 }
5855 this_th->th.th_task_state = 0;
5856 this_th->th.th_reap_state = KMP_SAFE_TO_REAP1;
5857
5858 /* put thread back on the free pool */
5859 TCW_PTR(this_th->th.th_team, NULL)((this_th->th.th_team)) = ((__null));
5860 TCW_PTR(this_th->th.th_root, NULL)((this_th->th.th_root)) = ((__null));
5861 TCW_PTR(this_th->th.th_dispatch, NULL)((this_th->th.th_dispatch)) = ((__null)); /* NOT NEEDED */
5862
5863 while (this_th->th.th_cg_roots) {
5864 this_th->th.th_cg_roots->cg_nthreads--;
5865 KA_TRACE(100, ("__kmp_free_thread: Thread %p decrement cg_nthreads on node"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_thread: Thread %p decrement cg_nthreads on node"
" %p of thread %p to %d\n", this_th, this_th->th.th_cg_roots
, this_th->th.th_cg_roots->cg_root, this_th->th.th_cg_roots
->cg_nthreads); }
5866 " %p of thread %p to %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_thread: Thread %p decrement cg_nthreads on node"
" %p of thread %p to %d\n", this_th, this_th->th.th_cg_roots
, this_th->th.th_cg_roots->cg_root, this_th->th.th_cg_roots
->cg_nthreads); }
5867 this_th, this_th->th.th_cg_roots,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_thread: Thread %p decrement cg_nthreads on node"
" %p of thread %p to %d\n", this_th, this_th->th.th_cg_roots
, this_th->th.th_cg_roots->cg_root, this_th->th.th_cg_roots
->cg_nthreads); }
5868 this_th->th.th_cg_roots->cg_root,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_thread: Thread %p decrement cg_nthreads on node"
" %p of thread %p to %d\n", this_th, this_th->th.th_cg_roots
, this_th->th.th_cg_roots->cg_root, this_th->th.th_cg_roots
->cg_nthreads); }
5869 this_th->th.th_cg_roots->cg_nthreads))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_thread: Thread %p decrement cg_nthreads on node"
" %p of thread %p to %d\n", this_th, this_th->th.th_cg_roots
, this_th->th.th_cg_roots->cg_root, this_th->th.th_cg_roots
->cg_nthreads); }
;
5870 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5871 if (tmp->cg_root == this_th) { // Thread is a cg_root
5872 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0)if (!(tmp->cg_nthreads == 0)) { __kmp_debug_assert("tmp->cg_nthreads == 0"
, "openmp/runtime/src/kmp_runtime.cpp", 5872); }
;
5873 KA_TRACE(if (kmp_a_debug >= 5) { __kmp_debug_printf ("__kmp_free_thread: Thread %p freeing node %p\n"
, this_th, tmp); }
5874 5, ("__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp))if (kmp_a_debug >= 5) { __kmp_debug_printf ("__kmp_free_thread: Thread %p freeing node %p\n"
, this_th, tmp); }
;
5875 this_th->th.th_cg_roots = tmp->up;
5876 __kmp_free(tmp)___kmp_free((tmp), "openmp/runtime/src/kmp_runtime.cpp", 5876
)
;
5877 } else { // Worker thread
5878 if (tmp->cg_nthreads == 0) { // last thread leaves contention group
5879 __kmp_free(tmp)___kmp_free((tmp), "openmp/runtime/src/kmp_runtime.cpp", 5879
)
;
5880 }
5881 this_th->th.th_cg_roots = NULL__null;
5882 break;
5883 }
5884 }
5885
5886 /* If the implicit task assigned to this thread can be used by other threads
5887 * -> multiple threads can share the data and try to free the task at
5888 * __kmp_reap_thread at exit. This duplicate use of the task data can happen
5889 * with higher probability when hot team is disabled but can occurs even when
5890 * the hot team is enabled */
5891 __kmp_free_implicit_task(this_th);
5892 this_th->th.th_current_task = NULL__null;
5893
5894 // If the __kmp_thread_pool_insert_pt is already past the new insert
5895 // point, then we need to re-scan the entire list.
5896 gtid = this_th->th.th_info.ds.ds_gtid;
5897 if (__kmp_thread_pool_insert_pt != NULL__null) {
5898 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL)if (!(__kmp_thread_pool != __null)) { __kmp_debug_assert("__kmp_thread_pool != __null"
, "openmp/runtime/src/kmp_runtime.cpp", 5898); }
;
5899 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5900 __kmp_thread_pool_insert_pt = NULL__null;
5901 }
5902 }
5903
5904 // Scan down the list to find the place to insert the thread.
5905 // scan is the address of a link in the list, possibly the address of
5906 // __kmp_thread_pool itself.
5907 //
5908 // In the absence of nested parallelism, the for loop will have 0 iterations.
5909 if (__kmp_thread_pool_insert_pt != NULL__null) {
5910 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5911 } else {
5912 scan = CCAST(kmp_info_t **, &__kmp_thread_pool)const_cast<kmp_info_t **>(&__kmp_thread_pool);
5913 }
5914 for (; (*scan != NULL__null) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5915 scan = &((*scan)->th.th_next_pool))
5916 ;
5917
5918 // Insert the new element on the list, and set __kmp_thread_pool_insert_pt
5919 // to its address.
5920 TCW_PTR(this_th->th.th_next_pool, *scan)((this_th->th.th_next_pool)) = ((*scan));
5921 __kmp_thread_pool_insert_pt = *scan = this_th;
5922 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||if (!((this_th->th.th_next_pool == __null) || (this_th->
th.th_info.ds.ds_gtid < this_th->th.th_next_pool->th
.th_info.ds.ds_gtid))) { __kmp_debug_assert("(this_th->th.th_next_pool == __null) || (this_th->th.th_info.ds.ds_gtid < this_th->th.th_next_pool->th.th_info.ds.ds_gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 5924); }
5923 (this_th->th.th_info.ds.ds_gtid <if (!((this_th->th.th_next_pool == __null) || (this_th->
th.th_info.ds.ds_gtid < this_th->th.th_next_pool->th
.th_info.ds.ds_gtid))) { __kmp_debug_assert("(this_th->th.th_next_pool == __null) || (this_th->th.th_info.ds.ds_gtid < this_th->th.th_next_pool->th.th_info.ds.ds_gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 5924); }
5924 this_th->th.th_next_pool->th.th_info.ds.ds_gtid))if (!((this_th->th.th_next_pool == __null) || (this_th->
th.th_info.ds.ds_gtid < this_th->th.th_next_pool->th
.th_info.ds.ds_gtid))) { __kmp_debug_assert("(this_th->th.th_next_pool == __null) || (this_th->th.th_info.ds.ds_gtid < this_th->th.th_next_pool->th.th_info.ds.ds_gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 5924); }
;
5925 TCW_4(this_th->th.th_in_pool, TRUE)(this_th->th.th_in_pool) = ((!0));
5926 __kmp_suspend_initialize_thread(this_th);
5927 __kmp_lock_suspend_mx(this_th);
5928 if (this_th->th.th_active == TRUE(!0)) {
5929 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth)(&__kmp_thread_pool_active_nth)->fetch_add(1, std::memory_order_acq_rel
)
;
5930 this_th->th.th_active_in_pool = TRUE(!0);
5931 }
5932#if KMP_DEBUG1
5933 else {
5934 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE)if (!(this_th->th.th_active_in_pool == 0)) { __kmp_debug_assert
("this_th->th.th_active_in_pool == 0", "openmp/runtime/src/kmp_runtime.cpp"
, 5934); }
;
5935 }
5936#endif
5937 __kmp_unlock_suspend_mx(this_th);
5938
5939 TCW_4(__kmp_nth, __kmp_nth - 1)(__kmp_nth) = (__kmp_nth - 1);
5940
5941#ifdef KMP_ADJUST_BLOCKTIME1
5942 /* Adjust blocktime back to user setting or default if necessary */
5943 /* Middle initialization might never have occurred */
5944 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5945 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0)if (!(__kmp_avail_proc > 0)) { __kmp_debug_assert("__kmp_avail_proc > 0"
, "openmp/runtime/src/kmp_runtime.cpp", 5945); }
;
5946 if (__kmp_nth <= __kmp_avail_proc) {
5947 __kmp_zero_bt = FALSE0;
5948 }
5949 }
5950#endif /* KMP_ADJUST_BLOCKTIME */
5951
5952 KMP_MB();
5953}
5954
5955/* ------------------------------------------------------------------------ */
5956
5957void *__kmp_launch_thread(kmp_info_t *this_thr) {
5958#if OMP_PROFILING_SUPPORT0
5959 ProfileTraceFile = getenv("LIBOMPTARGET_PROFILE");
5960 // TODO: add a configuration option for time granularity
5961 if (ProfileTraceFile)
5962 llvm::timeTraceProfilerInitialize(500 /* us */, "libomptarget");
5963#endif
5964
5965 int gtid = this_thr->th.th_info.ds.ds_gtid;
5966 /* void *stack_data;*/
5967 kmp_team_t **volatile pteam;
5968
5969 KMP_MB();
5970 KA_TRACE(10, ("__kmp_launch_thread: T#%d start\n", gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_launch_thread: T#%d start\n"
, gtid); }
;
5971
5972 if (__kmp_env_consistency_check) {
5973 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid); // ATT: Memory leak?
5974 }
5975
5976#if OMPD_SUPPORT1
5977 if (ompd_state & OMPD_ENABLE_BP0x1)
5978 ompd_bp_thread_begin();
5979#endif
5980
5981#if OMPT_SUPPORT1
5982 ompt_data_t *thread_data = nullptr;
5983 if (ompt_enabled.enabled) {
5984 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
5985 *thread_data = ompt_data_none{0};
5986
5987 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5988 this_thr->th.ompt_thread_info.wait_id = 0;
5989 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
5990 this_thr->th.ompt_thread_info.parallel_flags = 0;
5991 if (ompt_enabled.ompt_callback_thread_begin) {
5992 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)ompt_callback_thread_begin_callback(
5993 ompt_thread_worker, thread_data);
5994 }
5995 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5996 }
5997#endif
5998
5999 /* This is the place where threads wait for work */
6000 while (!TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) {
6001 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid])if (!(this_thr == __kmp_threads[gtid])) { __kmp_debug_assert(
"this_thr == __kmp_threads[gtid]", "openmp/runtime/src/kmp_runtime.cpp"
, 6001); }
;
6002 KMP_MB();
6003
6004 /* wait for work to do */
6005 KA_TRACE(20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_launch_thread: T#%d waiting for work\n"
, gtid); }
;
6006
6007 /* No tid yet since not part of a team */
6008 __kmp_fork_barrier(gtid, KMP_GTID_DNE(-2));
6009
6010#if OMPT_SUPPORT1
6011 if (ompt_enabled.enabled) {
6012 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6013 }
6014#endif
6015
6016 pteam = &this_thr->th.th_team;
6017
6018 /* have we been allocated? */
6019 if (TCR_SYNC_PTR(*pteam)((void *)(*pteam)) && !TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) {
6020 /* we were just woken up, so run our new task */
6021 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn)((void *)((*pteam)->t.t_pkfn)) != NULL__null) {
6022 int rc;
6023 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n"
, gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam
)->t.t_pkfn); }
6024 ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n"
, gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam
)->t.t_pkfn); }
6025 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n"
, gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam
)->t.t_pkfn); }
6026 (*pteam)->t.t_pkfn))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n"
, gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam
)->t.t_pkfn); }
;
6027
6028 updateHWFPControl(*pteam);
6029
6030#if OMPT_SUPPORT1
6031 if (ompt_enabled.enabled) {
6032 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
6033 }
6034#endif
6035
6036 rc = (*pteam)->t.t_invoke(gtid);
6037 KMP_ASSERT(rc)if (!(rc)) { __kmp_debug_assert("rc", "openmp/runtime/src/kmp_runtime.cpp"
, 6037); }
;
6038
6039 KMP_MB();
6040 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n"
, gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam
)->t.t_pkfn); }
6041 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n"
, gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam
)->t.t_pkfn); }
6042 (*pteam)->t.t_pkfn))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n"
, gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam
)->t.t_pkfn); }
;
6043 }
6044#if OMPT_SUPPORT1
6045 if (ompt_enabled.enabled) {
6046 /* no frame set while outside task */
6047 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none{0};
6048
6049 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6050 }
6051#endif
6052 /* join barrier after parallel region */
6053 __kmp_join_barrier(gtid);
6054 }
6055 }
6056 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done)((void *)((intptr_t)__kmp_global.g.g_done));
6057
6058#if OMPD_SUPPORT1
6059 if (ompd_state & OMPD_ENABLE_BP0x1)
6060 ompd_bp_thread_end();
6061#endif
6062
6063#if OMPT_SUPPORT1
6064 if (ompt_enabled.ompt_callback_thread_end) {
6065 ompt_callbacks.ompt_callback(ompt_callback_thread_end)ompt_callback_thread_end_callback(thread_data);
6066 }
6067#endif
6068
6069 this_thr->th.th_task_team = NULL__null;
6070 /* run the destructors for the threadprivate data for this thread */
6071 __kmp_common_destroy_gtid(gtid);
6072
6073 KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_launch_thread: T#%d done\n"
, gtid); }
;
6074 KMP_MB();
6075
6076#if OMP_PROFILING_SUPPORT0
6077 llvm::timeTraceProfilerFinishThread();
6078#endif
6079 return this_thr;
6080}
6081
6082/* ------------------------------------------------------------------------ */
6083
6084void __kmp_internal_end_dest(void *specific_gtid) {
6085 // Make sure no significant bits are lost
6086 int gtid;
6087 __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, &gtid);
6088
6089 KA_TRACE(30, ("__kmp_internal_end_dest: T#%d\n", gtid))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_internal_end_dest: T#%d\n"
, gtid); }
;
6090 /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage
6091 * this is because 0 is reserved for the nothing-stored case */
6092
6093 __kmp_internal_end_thread(gtid);
6094}
6095
6096#if KMP_OS_UNIX1 && KMP_DYNAMIC_LIB1
6097
6098__attribute__((destructor)) void __kmp_internal_end_dtor(void) {
6099 __kmp_internal_end_atexit();
6100}
6101
6102#endif
6103
6104/* [Windows] josh: when the atexit handler is called, there may still be more
6105 than one thread alive */
6106void __kmp_internal_end_atexit(void) {
6107 KA_TRACE(30, ("__kmp_internal_end_atexit\n"))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_internal_end_atexit\n"
); }
;
6108 /* [Windows]
6109 josh: ideally, we want to completely shutdown the library in this atexit
6110 handler, but stat code that depends on thread specific data for gtid fails
6111 because that data becomes unavailable at some point during the shutdown, so
6112 we call __kmp_internal_end_thread instead. We should eventually remove the
6113 dependency on __kmp_get_specific_gtid in the stat code and use
6114 __kmp_internal_end_library to cleanly shutdown the library.
6115
6116 // TODO: Can some of this comment about GVS be removed?
6117 I suspect that the offending stat code is executed when the calling thread
6118 tries to clean up a dead root thread's data structures, resulting in GVS
6119 code trying to close the GVS structures for that thread, but since the stat
6120 code uses __kmp_get_specific_gtid to get the gtid with the assumption that
6121 the calling thread is cleaning up itself instead of another thread, it get
6122 confused. This happens because allowing a thread to unregister and cleanup
6123 another thread is a recent modification for addressing an issue.
6124 Based on the current design (20050722), a thread may end up
6125 trying to unregister another thread only if thread death does not trigger
6126 the calling of __kmp_internal_end_thread. For Linux* OS, there is the
6127 thread specific data destructor function to detect thread death. For
6128 Windows dynamic, there is DllMain(THREAD_DETACH). For Windows static, there
6129 is nothing. Thus, the workaround is applicable only for Windows static
6130 stat library. */
6131 __kmp_internal_end_library(-1);
6132#if KMP_OS_WINDOWS0
6133 __kmp_close_console();
6134#endif
6135}
6136
6137static void __kmp_reap_thread(kmp_info_t *thread, int is_root) {
6138 // It is assumed __kmp_forkjoin_lock is acquired.
6139
6140 int gtid;
6141
6142 KMP_DEBUG_ASSERT(thread != NULL)if (!(thread != __null)) { __kmp_debug_assert("thread != __null"
, "openmp/runtime/src/kmp_runtime.cpp", 6142); }
;
6143
6144 gtid = thread->th.th_info.ds.ds_gtid;
6145
6146 if (!is_root) {
6147 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) {
6148 /* Assume the threads are at the fork barrier here */
6149 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n"
, gtid); }
6150 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n"
, gtid); }
6151 gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n"
, gtid); }
;
6152 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
6153 while (
6154 !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3)__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(thread
->th.th_used_in_team)), (kmp_uint32)(0), (kmp_uint32)(3))
)
6155 KMP_CPU_PAUSE()__kmp_x86_pause();
6156 __kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL__null);
6157 } else {
6158 /* Need release fence here to prevent seg faults for tree forkjoin
6159 barrier (GEH) */
6160 kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
6161 thread);
6162 __kmp_release_64(&flag);
6163 }
6164 }
6165
6166 // Terminate OS thread.
6167 __kmp_reap_worker(thread);
6168
6169 // The thread was killed asynchronously. If it was actively
6170 // spinning in the thread pool, decrement the global count.
6171 //
6172 // There is a small timing hole here - if the worker thread was just waking
6173 // up after sleeping in the pool, had reset it's th_active_in_pool flag but
6174 // not decremented the global counter __kmp_thread_pool_active_nth yet, then
6175 // the global counter might not get updated.
6176 //
6177 // Currently, this can only happen as the library is unloaded,
6178 // so there are no harmful side effects.
6179 if (thread->th.th_active_in_pool) {
6180 thread->th.th_active_in_pool = FALSE0;
6181 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth)(&__kmp_thread_pool_active_nth)->fetch_sub(1, std::memory_order_acq_rel
)
;
6182 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0)if (!(__kmp_thread_pool_active_nth >= 0)) { __kmp_debug_assert
("__kmp_thread_pool_active_nth >= 0", "openmp/runtime/src/kmp_runtime.cpp"
, 6182); }
;
6183 }
6184 }
6185
6186 __kmp_free_implicit_task(thread);
6187
6188// Free the fast memory for tasking
6189#if USE_FAST_MEMORY3
6190 __kmp_free_fast_memory(thread);
6191#endif /* USE_FAST_MEMORY */
6192
6193 __kmp_suspend_uninitialize_thread(thread);
6194
6195 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread)if (!(__kmp_threads[gtid] == thread)) { __kmp_debug_assert("__kmp_threads[gtid] == thread"
, "openmp/runtime/src/kmp_runtime.cpp", 6195); }
;
6196 TCW_SYNC_PTR(__kmp_threads[gtid], NULL)((__kmp_threads[gtid])) = ((__null));
6197
6198 --__kmp_all_nth;
6199 // __kmp_nth was decremented when thread is added to the pool.
6200
6201#ifdef KMP_ADJUST_BLOCKTIME1
6202 /* Adjust blocktime back to user setting or default if necessary */
6203 /* Middle initialization might never have occurred */
6204 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6205 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0)if (!(__kmp_avail_proc > 0)) { __kmp_debug_assert("__kmp_avail_proc > 0"
, "openmp/runtime/src/kmp_runtime.cpp", 6205); }
;
6206 if (__kmp_nth <= __kmp_avail_proc) {
6207 __kmp_zero_bt = FALSE0;
6208 }
6209 }
6210#endif /* KMP_ADJUST_BLOCKTIME */
6211
6212 /* free the memory being used */
6213 if (__kmp_env_consistency_check) {
6214 if (thread->th.th_cons) {
6215 __kmp_free_cons_stack(thread->th.th_cons);
6216 thread->th.th_cons = NULL__null;
6217 }
6218 }
6219
6220 if (thread->th.th_pri_common != NULL__null) {
6221 __kmp_free(thread->th.th_pri_common)___kmp_free((thread->th.th_pri_common), "openmp/runtime/src/kmp_runtime.cpp"
, 6221)
;
6222 thread->th.th_pri_common = NULL__null;
6223 }
6224
6225 if (thread->th.th_task_state_memo_stack != NULL__null) {
6226 __kmp_free(thread->th.th_task_state_memo_stack)___kmp_free((thread->th.th_task_state_memo_stack), "openmp/runtime/src/kmp_runtime.cpp"
, 6226)
;
6227 thread->th.th_task_state_memo_stack = NULL__null;
6228 }
6229
6230#if KMP_USE_BGET1
6231 if (thread->th.th_local.bget_data != NULL__null) {
6232 __kmp_finalize_bget(thread);
6233 }
6234#endif
6235
6236#if KMP_AFFINITY_SUPPORTED1
6237 if (thread->th.th_affin_mask != NULL__null) {
6238 KMP_CPU_FREE(thread->th.th_affin_mask)__kmp_affinity_dispatch->deallocate_mask(thread->th.th_affin_mask
)
;
6239 thread->th.th_affin_mask = NULL__null;
6240 }
6241#endif /* KMP_AFFINITY_SUPPORTED */
6242
6243#if KMP_USE_HIER_SCHED0
6244 if (thread->th.th_hier_bar_data != NULL__null) {
6245 __kmp_free(thread->th.th_hier_bar_data)___kmp_free((thread->th.th_hier_bar_data), "openmp/runtime/src/kmp_runtime.cpp"
, 6245)
;
6246 thread->th.th_hier_bar_data = NULL__null;
6247 }
6248#endif
6249
6250 __kmp_reap_team(thread->th.th_serial_team);
6251 thread->th.th_serial_team = NULL__null;
6252 __kmp_free(thread)___kmp_free((thread), "openmp/runtime/src/kmp_runtime.cpp", 6252
)
;
6253
6254 KMP_MB();
6255
6256} // __kmp_reap_thread
6257
6258static void __kmp_itthash_clean(kmp_info_t *th) {
6259#if USE_ITT_NOTIFY1
6260 if (__kmp_itt_region_domains.count > 0) {
6261 for (int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6262 kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i];
6263 while (bucket) {
6264 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6265 __kmp_thread_free(th, bucket)___kmp_thread_free((th), (bucket), "openmp/runtime/src/kmp_runtime.cpp"
, 6265)
;
6266 bucket = next;
6267 }
6268 }
6269 }
6270 if (__kmp_itt_barrier_domains.count > 0) {
6271 for (int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6272 kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i];
6273 while (bucket) {
6274 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6275 __kmp_thread_free(th, bucket)___kmp_thread_free((th), (bucket), "openmp/runtime/src/kmp_runtime.cpp"
, 6275)
;
6276 bucket = next;
6277 }
6278 }
6279 }
6280#endif
6281}
6282
6283static void __kmp_internal_end(void) {
6284 int i;
6285
6286 /* First, unregister the library */
6287 __kmp_unregister_library();
6288
6289#if KMP_OS_WINDOWS0
6290 /* In Win static library, we can't tell when a root actually dies, so we
6291 reclaim the data structures for any root threads that have died but not
6292 unregistered themselves, in order to shut down cleanly.
6293 In Win dynamic library we also can't tell when a thread dies. */
6294 __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of
6295// dead roots
6296#endif
6297
6298 for (i = 0; i < __kmp_threads_capacity; i++)
6299 if (__kmp_root[i])
6300 if (__kmp_root[i]->r.r_active)
6301 break;
6302 KMP_MB(); /* Flush all pending memory write invalidates. */
6303 TCW_SYNC_4(__kmp_global.g.g_done, TRUE)(__kmp_global.g.g_done) = ((!0));
6304
6305 if (i < __kmp_threads_capacity) {
6306#if KMP_USE_MONITOR
6307 // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor??
6308 KMP_MB(); /* Flush all pending memory write invalidates. */
6309
6310 // Need to check that monitor was initialized before reaping it. If we are
6311 // called form __kmp_atfork_child (which sets __kmp_init_parallel = 0), then
6312 // __kmp_monitor will appear to contain valid data, but it is only valid in
6313 // the parent process, not the child.
6314 // New behavior (201008): instead of keying off of the flag
6315 // __kmp_init_parallel, the monitor thread creation is keyed off
6316 // of the new flag __kmp_init_monitor.
6317 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6318 if (TCR_4(__kmp_init_monitor)(__kmp_init_monitor)) {
6319 __kmp_reap_monitor(&__kmp_monitor);
6320 TCW_4(__kmp_init_monitor, 0)(__kmp_init_monitor) = (0);
6321 }
6322 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6323 KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end: monitor reaped\n"
); }
;
6324#endif // KMP_USE_MONITOR
6325 } else {
6326/* TODO move this to cleanup code */
6327#ifdef KMP_DEBUG1
6328 /* make sure that everything has properly ended */
6329 for (i = 0; i < __kmp_threads_capacity; i++) {
6330 if (__kmp_root[i]) {
6331 // KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC:
6332 // there can be uber threads alive here
6333 KMP_ASSERT(!__kmp_root[i]->r.r_active)if (!(!__kmp_root[i]->r.r_active)) { __kmp_debug_assert("!__kmp_root[i]->r.r_active"
, "openmp/runtime/src/kmp_runtime.cpp", 6333); }
; // TODO: can they be active?
6334 }
6335 }
6336#endif
6337
6338 KMP_MB();
6339
6340 // Reap the worker threads.
6341 // This is valid for now, but be careful if threads are reaped sooner.
6342 while (__kmp_thread_pool != NULL__null) { // Loop thru all the thread in the pool.
6343 // Get the next thread from the pool.
6344 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool)const_cast<kmp_info_t *>(__kmp_thread_pool);
6345 __kmp_thread_pool = thread->th.th_next_pool;
6346 // Reap it.
6347 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP)if (!(thread->th.th_reap_state == 1)) { __kmp_debug_assert
("thread->th.th_reap_state == 1", "openmp/runtime/src/kmp_runtime.cpp"
, 6347); }
;
6348 thread->th.th_next_pool = NULL__null;
6349 thread->th.th_in_pool = FALSE0;
6350 __kmp_reap_thread(thread, 0);
6351 }
6352 __kmp_thread_pool_insert_pt = NULL__null;
6353
6354 // Reap teams.
6355 while (__kmp_team_pool != NULL__null) { // Loop thru all the teams in the pool.
6356 // Get the next team from the pool.
6357 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool)const_cast<kmp_team_t *>(__kmp_team_pool);
6358 __kmp_team_pool = team->t.t_next_pool;
6359 // Reap it.
6360 team->t.t_next_pool = NULL__null;
6361 __kmp_reap_team(team);
6362 }
6363
6364 __kmp_reap_task_teams();
6365
6366#if KMP_OS_UNIX1
6367 // Threads that are not reaped should not access any resources since they
6368 // are going to be deallocated soon, so the shutdown sequence should wait
6369 // until all threads either exit the final spin-waiting loop or begin
6370 // sleeping after the given blocktime.
6371 for (i = 0; i < __kmp_threads_capacity; i++) {
6372 kmp_info_t *thr = __kmp_threads[i];
6373 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking)(&thr->th.th_blocking)->load(std::memory_order_acquire
)
)
6374 KMP_CPU_PAUSE()__kmp_x86_pause();
6375 }
6376#endif
6377
6378 for (i = 0; i < __kmp_threads_capacity; ++i) {
6379 // TBD: Add some checking...
6380 // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL );
6381 }
6382
6383 /* Make sure all threadprivate destructors get run by joining with all
6384 worker threads before resetting this flag */
6385 TCW_SYNC_4(__kmp_init_common, FALSE)(__kmp_init_common) = (0);
6386
6387 KA_TRACE(10, ("__kmp_internal_end: all workers reaped\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end: all workers reaped\n"
); }
;
6388 KMP_MB();
6389
6390#if KMP_USE_MONITOR
6391 // See note above: One of the possible fixes for CQ138434 / CQ140126
6392 //
6393 // FIXME: push both code fragments down and CSE them?
6394 // push them into __kmp_cleanup() ?
6395 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6396 if (TCR_4(__kmp_init_monitor)(__kmp_init_monitor)) {
6397 __kmp_reap_monitor(&__kmp_monitor);
6398 TCW_4(__kmp_init_monitor, 0)(__kmp_init_monitor) = (0);
6399 }
6400 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6401 KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end: monitor reaped\n"
); }
;
6402#endif
6403 } /* else !__kmp_global.t_active */
6404 TCW_4(__kmp_init_gtid, FALSE)(__kmp_init_gtid) = (0);
6405 KMP_MB(); /* Flush all pending memory write invalidates. */
6406
6407 __kmp_cleanup();
6408#if OMPT_SUPPORT1
6409 ompt_fini();
6410#endif
6411}
6412
6413void __kmp_internal_end_library(int gtid_req) {
6414 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
6415 /* this shouldn't be a race condition because __kmp_internal_end() is the
6416 only place to clear __kmp_serial_init */
6417 /* we'll check this later too, after we get the lock */
6418 // 2009-09-06: We do not set g_abort without setting g_done. This check looks
6419 // redundant, because the next check will work in any case.
6420 if (__kmp_global.g.g_abort) {
6421 KA_TRACE(11, ("__kmp_internal_end_library: abort, exiting\n"))if (kmp_a_debug >= 11) { __kmp_debug_printf ("__kmp_internal_end_library: abort, exiting\n"
); }
;
6422 /* TODO abort? */
6423 return;
6424 }
6425 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done) || !__kmp_init_serial) {
6426 KA_TRACE(10, ("__kmp_internal_end_library: already finished\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: already finished\n"
); }
;
6427 return;
6428 }
6429
6430 // If hidden helper team has been initialized, we need to deinit it
6431 if (TCR_4(__kmp_init_hidden_helper)(__kmp_init_hidden_helper) &&
6432 !TCR_4(__kmp_hidden_helper_team_done)(__kmp_hidden_helper_team_done)) {
6433 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE)(__kmp_hidden_helper_team_done) = ((!0));
6434 // First release the main thread to let it continue its work
6435 __kmp_hidden_helper_main_thread_release();
6436 // Wait until the hidden helper team has been destroyed
6437 __kmp_hidden_helper_threads_deinitz_wait();
6438 }
6439
6440 KMP_MB(); /* Flush all pending memory write invalidates. */
6441 /* find out who we are and what we should do */
6442 {
6443 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6444 KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: enter T#%d (%d)\n"
, gtid, gtid_req); }
6445 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: enter T#%d (%d)\n"
, gtid, gtid_req); }
;
6446 if (gtid == KMP_GTID_SHUTDOWN(-3)) {
6447 KA_TRACE(10, ("__kmp_internal_end_library: !__kmp_init_runtime, system "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: !__kmp_init_runtime, system "
"already shutdown\n"); }
6448 "already shutdown\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: !__kmp_init_runtime, system "
"already shutdown\n"); }
;
6449 return;
6450 } else if (gtid == KMP_GTID_MONITOR(-4)) {
6451 KA_TRACE(10, ("__kmp_internal_end_library: monitor thread, gtid not "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: monitor thread, gtid not "
"registered, or system shutdown\n"); }
6452 "registered, or system shutdown\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: monitor thread, gtid not "
"registered, or system shutdown\n"); }
;
6453 return;
6454 } else if (gtid == KMP_GTID_DNE(-2)) {
6455 KA_TRACE(10, ("__kmp_internal_end_library: gtid not registered or system "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: gtid not registered or system "
"shutdown\n"); }
6456 "shutdown\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: gtid not registered or system "
"shutdown\n"); }
;
6457 /* we don't know who we are, but we may still shutdown the library */
6458 } else if (KMP_UBER_GTID(gtid)) {
6459 /* unregister ourselves as an uber thread. gtid is no longer valid */
6460 if (__kmp_root[gtid]->r.r_active) {
6461 __kmp_global.g.g_abort = -1;
6462 TCW_SYNC_4(__kmp_global.g.g_done, TRUE)(__kmp_global.g.g_done) = ((!0));
6463 __kmp_unregister_library();
6464 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: root still active, abort T#%d\n"
, gtid); }
6465 ("__kmp_internal_end_library: root still active, abort T#%d\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: root still active, abort T#%d\n"
, gtid); }
6466 gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: root still active, abort T#%d\n"
, gtid); }
;
6467 return;
6468 } else {
6469 __kmp_itthash_clean(__kmp_threads[gtid]);
6470 KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: unregistering sibling T#%d\n"
, gtid); }
6471 10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: unregistering sibling T#%d\n"
, gtid); }
6472 ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: unregistering sibling T#%d\n"
, gtid); }
;
6473 __kmp_unregister_root_current_thread(gtid);
6474 }
6475 } else {
6476/* worker threads may call this function through the atexit handler, if they
6477 * call exit() */
6478/* For now, skip the usual subsequent processing and just dump the debug buffer.
6479 TODO: do a thorough shutdown instead */
6480#ifdef DUMP_DEBUG_ON_EXIT
6481 if (__kmp_debug_buf)
6482 __kmp_dump_debug_buffer();
6483#endif
6484 // added unregister library call here when we switch to shm linux
6485 // if we don't, it will leave lots of files in /dev/shm
6486 // cleanup shared memory file before exiting.
6487 __kmp_unregister_library();
6488 return;
6489 }
6490 }
6491 /* synchronize the termination process */
6492 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6493
6494 /* have we already finished */
6495 if (__kmp_global.g.g_abort) {
6496 KA_TRACE(10, ("__kmp_internal_end_library: abort, exiting\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: abort, exiting\n"
); }
;
6497 /* TODO abort? */
6498 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6499 return;
6500 }
6501 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done) || !__kmp_init_serial) {
6502 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6503 return;
6504 }
6505
6506 /* We need this lock to enforce mutex between this reading of
6507 __kmp_threads_capacity and the writing by __kmp_register_root.
6508 Alternatively, we can use a counter of roots that is atomically updated by
6509 __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and
6510 __kmp_internal_end_*. */
6511 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6512
6513 /* now we can safely conduct the actual termination */
6514 __kmp_internal_end();
6515
6516 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6517 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6518
6519 KA_TRACE(10, ("__kmp_internal_end_library: exit\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: exit\n"
); }
;
6520
6521#ifdef DUMP_DEBUG_ON_EXIT
6522 if (__kmp_debug_buf)
6523 __kmp_dump_debug_buffer();
6524#endif
6525
6526#if KMP_OS_WINDOWS0
6527 __kmp_close_console();
6528#endif
6529
6530 __kmp_fini_allocator();
6531
6532} // __kmp_internal_end_library
6533
6534void __kmp_internal_end_thread(int gtid_req) {
6535 int i;
6536
6537 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
6538 /* this shouldn't be a race condition because __kmp_internal_end() is the
6539 * only place to clear __kmp_serial_init */
6540 /* we'll check this later too, after we get the lock */
6541 // 2009-09-06: We do not set g_abort without setting g_done. This check looks
6542 // redundant, because the next check will work in any case.
6543 if (__kmp_global.g.g_abort) {
6544 KA_TRACE(11, ("__kmp_internal_end_thread: abort, exiting\n"))if (kmp_a_debug >= 11) { __kmp_debug_printf ("__kmp_internal_end_thread: abort, exiting\n"
); }
;
6545 /* TODO abort? */
6546 return;
6547 }
6548 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done) || !__kmp_init_serial) {
6549 KA_TRACE(10, ("__kmp_internal_end_thread: already finished\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: already finished\n"
); }
;
6550 return;
6551 }
6552
6553 // If hidden helper team has been initialized, we need to deinit it
6554 if (TCR_4(__kmp_init_hidden_helper)(__kmp_init_hidden_helper) &&
6555 !TCR_4(__kmp_hidden_helper_team_done)(__kmp_hidden_helper_team_done)) {
6556 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE)(__kmp_hidden_helper_team_done) = ((!0));
6557 // First release the main thread to let it continue its work
6558 __kmp_hidden_helper_main_thread_release();
6559 // Wait until the hidden helper team has been destroyed
6560 __kmp_hidden_helper_threads_deinitz_wait();
6561 }
6562
6563 KMP_MB(); /* Flush all pending memory write invalidates. */
6564
6565 /* find out who we are and what we should do */
6566 {
6567 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6568 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: enter T#%d (%d)\n"
, gtid, gtid_req); }
6569 ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: enter T#%d (%d)\n"
, gtid, gtid_req); }
;
6570 if (gtid == KMP_GTID_SHUTDOWN(-3)) {
6571 KA_TRACE(10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: !__kmp_init_runtime, system "
"already shutdown\n"); }
6572 "already shutdown\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: !__kmp_init_runtime, system "
"already shutdown\n"); }
;
6573 return;
6574 } else if (gtid == KMP_GTID_MONITOR(-4)) {
6575 KA_TRACE(10, ("__kmp_internal_end_thread: monitor thread, gtid not "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: monitor thread, gtid not "
"registered, or system shutdown\n"); }
6576 "registered, or system shutdown\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: monitor thread, gtid not "
"registered, or system shutdown\n"); }
;
6577 return;
6578 } else if (gtid == KMP_GTID_DNE(-2)) {
6579 KA_TRACE(10, ("__kmp_internal_end_thread: gtid not registered or system "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: gtid not registered or system "
"shutdown\n"); }
6580 "shutdown\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: gtid not registered or system "
"shutdown\n"); }
;
6581 return;
6582 /* we don't know who we are */
6583 } else if (KMP_UBER_GTID(gtid)) {
6584 /* unregister ourselves as an uber thread. gtid is no longer valid */
6585 if (__kmp_root[gtid]->r.r_active) {
6586 __kmp_global.g.g_abort = -1;
6587 TCW_SYNC_4(__kmp_global.g.g_done, TRUE)(__kmp_global.g.g_done) = ((!0));
6588 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: root still active, abort T#%d\n"
, gtid); }
6589 ("__kmp_internal_end_thread: root still active, abort T#%d\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: root still active, abort T#%d\n"
, gtid); }
6590 gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: root still active, abort T#%d\n"
, gtid); }
;
6591 return;
6592 } else {
6593 KA_TRACE(10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: unregistering sibling T#%d\n"
, gtid); }
6594 gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: unregistering sibling T#%d\n"
, gtid); }
;
6595 __kmp_unregister_root_current_thread(gtid);
6596 }
6597 } else {
6598 /* just a worker thread, let's leave */
6599 KA_TRACE(10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: worker thread T#%d\n"
, gtid); }
;
6600
6601 if (gtid >= 0) {
6602 __kmp_threads[gtid]->th.th_task_team = NULL__null;
6603 }
6604
6605 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n"
, gtid); }
6606 ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n"
, gtid); }
6607 gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n"
, gtid); }
;
6608 return;
6609 }
6610 }
6611#if KMP_DYNAMIC_LIB1
6612 if (__kmp_pause_status != kmp_hard_paused)
6613 // AC: lets not shutdown the dynamic library at the exit of uber thread,
6614 // because we will better shutdown later in the library destructor.
6615 {
6616 KA_TRACE(10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: exiting T#%d\n"
, gtid_req); }
;
6617 return;
6618 }
6619#endif
6620 /* synchronize the termination process */
6621 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6622
6623 /* have we already finished */
6624 if (__kmp_global.g.g_abort) {
6625 KA_TRACE(10, ("__kmp_internal_end_thread: abort, exiting\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: abort, exiting\n"
); }
;
6626 /* TODO abort? */
6627 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6628 return;
6629 }
6630 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done) || !__kmp_init_serial) {
6631 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6632 return;
6633 }
6634
6635 /* We need this lock to enforce mutex between this reading of
6636 __kmp_threads_capacity and the writing by __kmp_register_root.
6637 Alternatively, we can use a counter of roots that is atomically updated by
6638 __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and
6639 __kmp_internal_end_*. */
6640
6641 /* should we finish the run-time? are all siblings done? */
6642 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6643
6644 for (i = 0; i < __kmp_threads_capacity; ++i) {
6645 if (KMP_UBER_GTID(i)) {
6646 KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n"
, i); }
6647 10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n"
, i); }
6648 ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n"
, i); }
;
6649 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6650 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6651 return;
6652 }
6653 }
6654
6655 /* now we can safely conduct the actual termination */
6656
6657 __kmp_internal_end();
6658
6659 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6660 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6661
6662 KA_TRACE(10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: exit T#%d\n"
, gtid_req); }
;
6663
6664#ifdef DUMP_DEBUG_ON_EXIT
6665 if (__kmp_debug_buf)
6666 __kmp_dump_debug_buffer();
6667#endif
6668} // __kmp_internal_end_thread
6669
6670// -----------------------------------------------------------------------------
6671// Library registration stuff.
6672
6673static long __kmp_registration_flag = 0;
6674// Random value used to indicate library initialization.
6675static char *__kmp_registration_str = NULL__null;
6676// Value to be saved in env var __KMP_REGISTERED_LIB_<pid>.
6677
6678static inline char *__kmp_reg_status_name() {
6679/* On RHEL 3u5 if linked statically, getpid() returns different values in
6680 each thread. If registration and unregistration go in different threads
6681 (omp_misc_other_root_exit.cpp test case), the name of registered_lib_env
6682 env var can not be found, because the name will contain different pid. */
6683// macOS* complains about name being too long with additional getuid()
6684#if KMP_OS_UNIX1 && !KMP_OS_DARWIN0 && KMP_DYNAMIC_LIB1
6685 return __kmp_str_format("__KMP_REGISTERED_LIB_%d_%d", (int)getpid(),
6686 (int)getuid());
6687#else
6688 return __kmp_str_format("__KMP_REGISTERED_LIB_%d", (int)getpid());
6689#endif
6690} // __kmp_reg_status_get
6691
6692void __kmp_register_library_startup(void) {
6693
6694 char *name = __kmp_reg_status_name(); // Name of the environment variable.
6695 int done = 0;
6696 union {
6697 double dtime;
6698 long ltime;
6699 } time;
6700#if KMP_ARCH_X860 || KMP_ARCH_X86_641
6701 __kmp_initialize_system_tick();
6702#endif
6703 __kmp_read_system_time(&time.dtime);
6704 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6705 __kmp_registration_str =
6706 __kmp_str_format("%p-%lx-%s", &__kmp_registration_flag,
6707 __kmp_registration_flag, KMP_LIBRARY_FILE"libomp.so.5");
6708
6709 KA_TRACE(50, ("__kmp_register_library_startup: %s=\"%s\"\n", name,if (kmp_a_debug >= 50) { __kmp_debug_printf ("__kmp_register_library_startup: %s=\"%s\"\n"
, name, __kmp_registration_str); }
6710 __kmp_registration_str))if (kmp_a_debug >= 50) { __kmp_debug_printf ("__kmp_register_library_startup: %s=\"%s\"\n"
, name, __kmp_registration_str); }
;
6711
6712 while (!done) {
6713
6714 char *value = NULL__null; // Actual value of the environment variable.
6715
6716#if defined(KMP_USE_SHM)
6717 char *shm_name = __kmp_str_format("/%s", name);
6718 int shm_preexist = 0;
6719 char *data1;
6720 int fd1 = shm_open(shm_name, O_CREAT0100 | O_EXCL0200 | O_RDWR02, 0666);
6721 if ((fd1 == -1) && (errno(*__errno_location ()) == EEXIST17)) {
6722 // file didn't open because it already exists.
6723 // try opening existing file
6724 fd1 = shm_open(shm_name, O_RDWR02, 0666);
6725 if (fd1 == -1) { // file didn't open
6726 // error out here
6727 __kmp_fatal(KMP_MSG(FunctionError, "Can't open SHM")__kmp_msg_format(kmp_i18n_msg_FunctionError, "Can't open SHM"
)
, KMP_ERR(0)__kmp_msg_error_code(0),
6728 __kmp_msg_null);
6729 } else {
6730 // able to open existing file
6731 shm_preexist = 1;
6732 }
6733 } else if (fd1 == -1) { // SHM didn't open; it was due to error other than
6734 // already exists.
6735 // error out here.
6736 __kmp_fatal(KMP_MSG(FunctionError, "Can't open SHM2")__kmp_msg_format(kmp_i18n_msg_FunctionError, "Can't open SHM2"
)
, KMP_ERR(errno)__kmp_msg_error_code((*__errno_location ())),
6737 __kmp_msg_null);
6738 }
6739 if (shm_preexist == 0) {
6740 // we created SHM now set size
6741 if (ftruncate(fd1, SHM_SIZE1024) == -1) {
6742 // error occured setting size;
6743 __kmp_fatal(KMP_MSG(FunctionError, "Can't set size of SHM")__kmp_msg_format(kmp_i18n_msg_FunctionError, "Can't set size of SHM"
)
,
6744 KMP_ERR(errno)__kmp_msg_error_code((*__errno_location ())), __kmp_msg_null);
6745 }
6746 }
6747 data1 =
6748 (char *)mmap(0, SHM_SIZE1024, PROT_READ0x1 | PROT_WRITE0x2, MAP_SHARED0x01, fd1, 0);
6749 if (data1 == MAP_FAILED((void *) -1)) {
6750 // failed to map shared memory
6751 __kmp_fatal(KMP_MSG(FunctionError, "Can't map SHM")__kmp_msg_format(kmp_i18n_msg_FunctionError, "Can't map SHM"), KMP_ERR(errno)__kmp_msg_error_code((*__errno_location ())),
6752 __kmp_msg_null);
6753 }
6754 if (shm_preexist == 0) { // set data to SHM, set value
6755 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str)strcpy(data1, __kmp_registration_str);
6756 }
6757 // Read value from either what we just wrote or existing file.
6758 value = __kmp_str_format("%s", data1); // read value from SHM
6759 munmap(data1, SHM_SIZE1024);
6760 close(fd1);
6761#else // Windows and unix with static library
6762 // Set environment variable, but do not overwrite if it is exist.
6763 __kmp_env_set(name, __kmp_registration_str, 0);
6764 // read value to see if it got set
6765 value = __kmp_env_get(name);
6766#endif
6767
6768 if (value != NULL__null && strcmp(value, __kmp_registration_str) == 0) {
6769 done = 1; // Ok, environment variable set successfully, exit the loop.
6770 } else {
6771 // Oops. Write failed. Another copy of OpenMP RTL is in memory.
6772 // Check whether it alive or dead.
6773 int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead.
6774 char *tail = value;
6775 char *flag_addr_str = NULL__null;
6776 char *flag_val_str = NULL__null;
6777 char const *file_name = NULL__null;
6778 __kmp_str_split(tail, '-', &flag_addr_str, &tail);
6779 __kmp_str_split(tail, '-', &flag_val_str, &tail);
6780 file_name = tail;
6781 if (tail != NULL__null) {
6782 unsigned long *flag_addr = 0;
6783 unsigned long flag_val = 0;
6784 KMP_SSCANFsscanf(flag_addr_str, "%p", RCAST(void **, &flag_addr)reinterpret_cast<void **>(&flag_addr));
6785 KMP_SSCANFsscanf(flag_val_str, "%lx", &flag_val);
6786 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name, "") != 0) {
6787 // First, check whether environment-encoded address is mapped into
6788 // addr space.
6789 // If so, dereference it to see if it still has the right value.
6790 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6791 neighbor = 1;
6792 } else {
6793 // If not, then we know the other copy of the library is no longer
6794 // running.
6795 neighbor = 2;
6796 }
6797 }
6798 }
6799 switch (neighbor) {
6800 case 0: // Cannot parse environment variable -- neighbor status unknown.
6801 // Assume it is the incompatible format of future version of the
6802 // library. Assume the other library is alive.
6803 // WARN( ... ); // TODO: Issue a warning.
6804 file_name = "unknown library";
6805 KMP_FALLTHROUGH()[[fallthrough]];
6806 // Attention! Falling to the next case. That's intentional.
6807 case 1: { // Neighbor is alive.
6808 // Check it is allowed.
6809 char *duplicate_ok = __kmp_env_get("KMP_DUPLICATE_LIB_OK");
6810 if (!__kmp_str_match_true(duplicate_ok)) {
6811 // That's not allowed. Issue fatal error.
6812 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name)__kmp_msg_format(kmp_i18n_msg_DuplicateLibrary, "libomp.so.5"
, file_name)
,
6813 KMP_HNT(DuplicateLibrary)__kmp_msg_format(kmp_i18n_hnt_DuplicateLibrary), __kmp_msg_null);
6814 }
6815 KMP_INTERNAL_FREE(duplicate_ok)free(duplicate_ok);
6816 __kmp_duplicate_library_ok = 1;
6817 done = 1; // Exit the loop.
6818 } break;
6819 case 2: { // Neighbor is dead.
6820
6821#if defined(KMP_USE_SHM)
6822 // close shared memory.
6823 shm_unlink(shm_name); // this removes file in /dev/shm
6824#else
6825 // Clear the variable and try to register library again.
6826 __kmp_env_unset(name);
6827#endif
6828 } break;
6829 default: {
6830 KMP_DEBUG_ASSERT(0)if (!(0)) { __kmp_debug_assert("0", "openmp/runtime/src/kmp_runtime.cpp"
, 6830); }
;
6831 } break;
6832 }
6833 }
6834 KMP_INTERNAL_FREE((void *)value)free((void *)value);
6835#if defined(KMP_USE_SHM)
6836 KMP_INTERNAL_FREE((void *)shm_name)free((void *)shm_name);
6837#endif
6838 } // while
6839 KMP_INTERNAL_FREE((void *)name)free((void *)name);
6840
6841} // func __kmp_register_library_startup
6842
6843void __kmp_unregister_library(void) {
6844
6845 char *name = __kmp_reg_status_name();
6846 char *value = NULL__null;
6847
6848#if defined(KMP_USE_SHM)
6849 char *shm_name = __kmp_str_format("/%s", name);
6850 int fd1 = shm_open(shm_name, O_RDONLY00, 0666);
6851 if (fd1 == -1) {
6852 // file did not open. return.
6853 return;
6854 }
6855 char *data1 = (char *)mmap(0, SHM_SIZE1024, PROT_READ0x1, MAP_SHARED0x01, fd1, 0);
6856 if (data1 != MAP_FAILED((void *) -1)) {
6857 value = __kmp_str_format("%s", data1); // read value from SHM
6858 munmap(data1, SHM_SIZE1024);
6859 }
6860 close(fd1);
6861#else
6862 value = __kmp_env_get(name);
6863#endif
6864
6865 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0)if (!(__kmp_registration_flag != 0)) { __kmp_debug_assert("__kmp_registration_flag != 0"
, "openmp/runtime/src/kmp_runtime.cpp", 6865); }
;
6866 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL)if (!(__kmp_registration_str != __null)) { __kmp_debug_assert
("__kmp_registration_str != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 6866); }
;
6867 if (value != NULL__null && strcmp(value, __kmp_registration_str) == 0) {
6868// Ok, this is our variable. Delete it.
6869#if defined(KMP_USE_SHM)
6870 shm_unlink(shm_name); // this removes file in /dev/shm
6871#else
6872 __kmp_env_unset(name);
6873#endif
6874 }
6875
6876#if defined(KMP_USE_SHM)
6877 KMP_INTERNAL_FREE(shm_name)free(shm_name);
6878#endif
6879
6880 KMP_INTERNAL_FREE(__kmp_registration_str)free(__kmp_registration_str);
6881 KMP_INTERNAL_FREE(value)free(value);
6882 KMP_INTERNAL_FREE(name)free(name);
6883
6884 __kmp_registration_flag = 0;
6885 __kmp_registration_str = NULL__null;
6886
6887} // __kmp_unregister_library
6888
6889// End of Library registration stuff.
6890// -----------------------------------------------------------------------------
6891
6892#if KMP_MIC_SUPPORTED((0 || 1) && (1 || 0))
6893
6894static void __kmp_check_mic_type() {
6895 kmp_cpuid_t cpuid_state = {0};
6896 kmp_cpuid_t *cs_p = &cpuid_state;
6897 __kmp_x86_cpuid(1, 0, cs_p);
6898 // We don't support mic1 at the moment
6899 if ((cs_p->eax & 0xff0) == 0xB10) {
6900 __kmp_mic_type = mic2;
6901 } else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6902 __kmp_mic_type = mic3;
6903 } else {
6904 __kmp_mic_type = non_mic;
6905 }
6906}
6907
6908#endif /* KMP_MIC_SUPPORTED */
6909
6910#if KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0)
6911static void __kmp_user_level_mwait_init() {
6912 struct kmp_cpuid buf;
6913 __kmp_x86_cpuid(7, 0, &buf);
6914 __kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1);
6915 __kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait;
6916 __kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0);
6917 KF_TRACE(30, ("__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",if (kmp_f_debug >= 30) { __kmp_debug_printf ("__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n"
, __kmp_umwait_enabled); }
6918 __kmp_umwait_enabled))if (kmp_f_debug >= 30) { __kmp_debug_printf ("__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n"
, __kmp_umwait_enabled); }
;
6919}
6920#elif KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0)
6921#ifndef AT_INTELPHIUSERMWAIT
6922// Spurious, non-existent value that should always fail to return anything.
6923// Will be replaced with the correct value when we know that.
6924#define AT_INTELPHIUSERMWAIT 10000
6925#endif
6926// getauxval() function is available in RHEL7 and SLES12. If a system with an
6927// earlier OS is used to build the RTL, we'll use the following internal
6928// function when the entry is not found.
6929unsigned long getauxval(unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
6930unsigned long getauxval(unsigned long) { return 0; }
6931
6932static void __kmp_user_level_mwait_init() {
6933 // When getauxval() and correct value of AT_INTELPHIUSERMWAIT are available
6934 // use them to find if the user-level mwait is enabled. Otherwise, forcibly
6935 // set __kmp_mwait_enabled=TRUE on Intel MIC if the environment variable
6936 // KMP_USER_LEVEL_MWAIT was set to TRUE.
6937 if (__kmp_mic_type == mic3) {
6938 unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
6939 if ((res & 0x1) || __kmp_user_level_mwait) {
6940 __kmp_mwait_enabled = TRUE(!0);
6941 if (__kmp_user_level_mwait) {
6942 KMP_INFORM(EnvMwaitWarn)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_EnvMwaitWarn
), __kmp_msg_null)
;
6943 }
6944 } else {
6945 __kmp_mwait_enabled = FALSE0;
6946 }
6947 }
6948 KF_TRACE(30, ("__kmp_user_level_mwait_init: __kmp_mic_type = %d, "if (kmp_f_debug >= 30) { __kmp_debug_printf ("__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
"__kmp_mwait_enabled = %d\n", __kmp_mic_type, __kmp_mwait_enabled
); }
6949 "__kmp_mwait_enabled = %d\n",if (kmp_f_debug >= 30) { __kmp_debug_printf ("__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
"__kmp_mwait_enabled = %d\n", __kmp_mic_type, __kmp_mwait_enabled
); }
6950 __kmp_mic_type, __kmp_mwait_enabled))if (kmp_f_debug >= 30) { __kmp_debug_printf ("__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
"__kmp_mwait_enabled = %d\n", __kmp_mic_type, __kmp_mwait_enabled
); }
;
6951}
6952#endif /* KMP_HAVE_UMWAIT */
6953
6954static void __kmp_do_serial_initialize(void) {
6955 int i, gtid;
6956 size_t size;
6957
6958 KA_TRACE(10, ("__kmp_do_serial_initialize: enter\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_do_serial_initialize: enter\n"
); }
;
6959
6960 KMP_DEBUG_ASSERT(sizeof(kmp_int32) == 4)if (!(sizeof(kmp_int32) == 4)) { __kmp_debug_assert("sizeof(kmp_int32) == 4"
, "openmp/runtime/src/kmp_runtime.cpp", 6960); }
;
6961 KMP_DEBUG_ASSERT(sizeof(kmp_uint32) == 4)if (!(sizeof(kmp_uint32) == 4)) { __kmp_debug_assert("sizeof(kmp_uint32) == 4"
, "openmp/runtime/src/kmp_runtime.cpp", 6961); }
;
6962 KMP_DEBUG_ASSERT(sizeof(kmp_int64) == 8)if (!(sizeof(kmp_int64) == 8)) { __kmp_debug_assert("sizeof(kmp_int64) == 8"
, "openmp/runtime/src/kmp_runtime.cpp", 6962); }
;
6963 KMP_DEBUG_ASSERT(sizeof(kmp_uint64) == 8)if (!(sizeof(kmp_uint64) == 8)) { __kmp_debug_assert("sizeof(kmp_uint64) == 8"
, "openmp/runtime/src/kmp_runtime.cpp", 6963); }
;
6964 KMP_DEBUG_ASSERT(sizeof(kmp_intptr_t) == sizeof(void *))if (!(sizeof(kmp_intptr_t) == sizeof(void *))) { __kmp_debug_assert
("sizeof(kmp_intptr_t) == sizeof(void *)", "openmp/runtime/src/kmp_runtime.cpp"
, 6964); }
;
6965
6966#if OMPT_SUPPORT1
6967 ompt_pre_init();
6968#endif
6969#if OMPD_SUPPORT1
6970 __kmp_env_dump();
6971 ompd_init();
6972#endif
6973
6974 __kmp_validate_locks();
6975
6976 /* Initialize internal memory allocator */
6977 __kmp_init_allocator();
6978
6979 /* Register the library startup via an environment variable or via mapped
6980 shared memory file and check to see whether another copy of the library is
6981 already registered. Since forked child process is often terminated, we
6982 postpone the registration till middle initialization in the child */
6983 if (__kmp_need_register_serial)
6984 __kmp_register_library_startup();
6985
6986 /* TODO reinitialization of library */
6987 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) {
6988 KA_TRACE(10, ("__kmp_do_serial_initialize: reinitialization of library\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_do_serial_initialize: reinitialization of library\n"
); }
;
6989 }
6990
6991 __kmp_global.g.g_abort = 0;
6992 TCW_SYNC_4(__kmp_global.g.g_done, FALSE)(__kmp_global.g.g_done) = (0);
6993
6994/* initialize the locks */
6995#if KMP_USE_ADAPTIVE_LOCKS(0 || 1) && !0
6996#if KMP_DEBUG_ADAPTIVE_LOCKS0
6997 __kmp_init_speculative_stats();
6998#endif
6999#endif
7000#if KMP_STATS_ENABLED0
7001 __kmp_stats_init();
7002#endif
7003 __kmp_init_lock(&__kmp_global_lock);
7004 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
7005 __kmp_init_lock(&__kmp_debug_lock);
7006 __kmp_init_atomic_lock(&__kmp_atomic_lock);
7007 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
7008 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
7009 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
7010 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
7011 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
7012 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
7013 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
7014 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
7015 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
7016 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
7017 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
7018 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
7019 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
7020 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
7021#if KMP_USE_MONITOR
7022 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
7023#endif
7024 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
7025
7026 /* conduct initialization and initial setup of configuration */
7027
7028 __kmp_runtime_initialize();
7029
7030#if KMP_MIC_SUPPORTED((0 || 1) && (1 || 0))
7031 __kmp_check_mic_type();
7032#endif
7033
7034// Some global variable initialization moved here from kmp_env_initialize()
7035#ifdef KMP_DEBUG1
7036 kmp_diag = 0;
7037#endif
7038 __kmp_abort_delay = 0;
7039
7040 // From __kmp_init_dflt_team_nth()
7041 /* assume the entire machine will be used */
7042 __kmp_dflt_team_nth_ub = __kmp_xproc;
7043 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH1) {
7044 __kmp_dflt_team_nth_ub = KMP_MIN_NTH1;
7045 }
7046 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
7047 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
7048 }
7049 __kmp_max_nth = __kmp_sys_max_nth;
7050 __kmp_cg_max_nth = __kmp_sys_max_nth;
7051 __kmp_teams_max_nth = __kmp_xproc; // set a "reasonable" default
7052 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
7053 __kmp_teams_max_nth = __kmp_sys_max_nth;
7054 }
7055
7056 // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME"
7057 // part
7058 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME(__kmp_is_hybrid_cpu() ? (0) : (200));
7059#if KMP_USE_MONITOR
7060 __kmp_monitor_wakeups =
7061 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7062 __kmp_bt_intervals =
7063 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7064#endif
7065 // From "KMP_LIBRARY" part of __kmp_env_initialize()
7066 __kmp_library = library_throughput;
7067 // From KMP_SCHEDULE initialization
7068 __kmp_static = kmp_sch_static_balanced;
7069// AC: do not use analytical here, because it is non-monotonous
7070//__kmp_guided = kmp_sch_guided_iterative_chunked;
7071//__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no
7072// need to repeat assignment
7073// Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch
7074// bit control and barrier method control parts
7075#if KMP_FAST_REDUCTION_BARRIER1
7076#define kmp_reduction_barrier_gather_bb ((int)1)
7077#define kmp_reduction_barrier_release_bb ((int)1)
7078#define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
7079#define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
7080#endif // KMP_FAST_REDUCTION_BARRIER
7081 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
7082 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
7083 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
7084 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
7085 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
7086#if KMP_FAST_REDUCTION_BARRIER1
7087 if (i == bs_reduction_barrier) { // tested and confirmed on ALTIX only (
7088 // lin_64 ): hyper,1
7089 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
7090 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
7091 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
7092 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
7093 }
7094#endif // KMP_FAST_REDUCTION_BARRIER
7095 }
7096#if KMP_FAST_REDUCTION_BARRIER1
7097#undef kmp_reduction_barrier_release_pat
7098#undef kmp_reduction_barrier_gather_pat
7099#undef kmp_reduction_barrier_release_bb
7100#undef kmp_reduction_barrier_gather_bb
7101#endif // KMP_FAST_REDUCTION_BARRIER
7102#if KMP_MIC_SUPPORTED((0 || 1) && (1 || 0))
7103 if (__kmp_mic_type == mic2) { // KNC
7104 // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC
7105 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3; // plain gather
7106 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
7107 1; // forkjoin release
7108 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7109 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7110 }
7111#if KMP_FAST_REDUCTION_BARRIER1
7112 if (__kmp_mic_type == mic2) { // KNC
7113 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7114 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7115 }
7116#endif // KMP_FAST_REDUCTION_BARRIER
7117#endif // KMP_MIC_SUPPORTED
7118
7119// From KMP_CHECKS initialization
7120#ifdef KMP_DEBUG1
7121 __kmp_env_checks = TRUE(!0); /* development versions have the extra checks */
7122#else
7123 __kmp_env_checks = FALSE0; /* port versions do not have the extra checks */
7124#endif
7125
7126 // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization
7127 __kmp_foreign_tp = TRUE(!0);
7128
7129 __kmp_global.g.g_dynamic = FALSE0;
7130 __kmp_global.g.g_dynamic_mode = dynamic_default;
7131
7132 __kmp_init_nesting_mode();
7133
7134 __kmp_env_initialize(NULL__null);
7135
7136#if KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0) || KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0)
7137 __kmp_user_level_mwait_init();
7138#endif
7139// Print all messages in message catalog for testing purposes.
7140#ifdef KMP_DEBUG1
7141 char const *val = __kmp_env_get("KMP_DUMP_CATALOG");
7142 if (__kmp_str_match_true(val)) {
7143 kmp_str_buf_t buffer;
7144 __kmp_str_buf_init(&buffer){ (&buffer)->str = (&buffer)->bulk; (&buffer
)->size = sizeof((&buffer)->bulk); (&buffer)->
used = 0; (&buffer)->bulk[0] = 0; }
;
7145 __kmp_i18n_dump_catalog(&buffer);
7146 __kmp_printf("%s", buffer.str);
7147 __kmp_str_buf_free(&buffer);
7148 }
7149 __kmp_env_free(&val);
7150#endif
7151
7152 __kmp_threads_capacity =
7153 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
7154 // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part
7155 __kmp_tp_capacity = __kmp_default_tp_capacity(
7156 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
7157
7158 // If the library is shut down properly, both pools must be NULL. Just in
7159 // case, set them to NULL -- some memory may leak, but subsequent code will
7160 // work even if pools are not freed.
7161 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL)if (!(__kmp_thread_pool == __null)) { __kmp_debug_assert("__kmp_thread_pool == __null"
, "openmp/runtime/src/kmp_runtime.cpp", 7161); }
;
7162 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL)if (!(__kmp_thread_pool_insert_pt == __null)) { __kmp_debug_assert
("__kmp_thread_pool_insert_pt == __null", "openmp/runtime/src/kmp_runtime.cpp"
, 7162); }
;
7163 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL)if (!(__kmp_team_pool == __null)) { __kmp_debug_assert("__kmp_team_pool == __null"
, "openmp/runtime/src/kmp_runtime.cpp", 7163); }
;
7164 __kmp_thread_pool = NULL__null;
7165 __kmp_thread_pool_insert_pt = NULL__null;
7166 __kmp_team_pool = NULL__null;
7167
7168 /* Allocate all of the variable sized records */
7169 /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are
7170 * expandable */
7171 /* Since allocation is cache-aligned, just add extra padding at the end */
7172 size =
7173 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * __kmp_threads_capacity +
7174 CACHE_LINE64;
7175 __kmp_threads = (kmp_info_t **)__kmp_allocate(size)___kmp_allocate((size), "openmp/runtime/src/kmp_runtime.cpp",
7175)
;
7176 __kmp_root = (kmp_root_t **)((char *)__kmp_threads +
7177 sizeof(kmp_info_t *) * __kmp_threads_capacity);
7178
7179 /* init thread counts */
7180 KMP_DEBUG_ASSERT(__kmp_all_nth ==if (!(__kmp_all_nth == 0)) { __kmp_debug_assert("__kmp_all_nth == 0"
, "openmp/runtime/src/kmp_runtime.cpp", 7181); }
7181 0)if (!(__kmp_all_nth == 0)) { __kmp_debug_assert("__kmp_all_nth == 0"
, "openmp/runtime/src/kmp_runtime.cpp", 7181); }
; // Asserts fail if the library is reinitializing and
7182 KMP_DEBUG_ASSERT(__kmp_nth == 0)if (!(__kmp_nth == 0)) { __kmp_debug_assert("__kmp_nth == 0",
"openmp/runtime/src/kmp_runtime.cpp", 7182); }
; // something was wrong in termination.
7183 __kmp_all_nth = 0;
7184 __kmp_nth = 0;
7185
7186 /* setup the uber master thread and hierarchy */
7187 gtid = __kmp_register_root(TRUE(!0));
7188 KA_TRACE(10, ("__kmp_do_serial_initialize T#%d\n", gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_do_serial_initialize T#%d\n"
, gtid); }
;
7189 KMP_ASSERT(KMP_UBER_GTID(gtid))if (!(KMP_UBER_GTID(gtid))) { __kmp_debug_assert("KMP_UBER_GTID(gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 7189); }
;
7190 KMP_ASSERT(KMP_INITIAL_GTID(gtid))if (!((0 == (gtid)))) { __kmp_debug_assert("KMP_INITIAL_GTID(gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 7190); }
;
7191
7192 KMP_MB(); /* Flush all pending memory write invalidates. */
7193
7194 __kmp_common_initialize();
7195
7196#if KMP_OS_UNIX1
7197 /* invoke the child fork handler */
7198 __kmp_register_atfork();
7199#endif
7200
7201#if !KMP_DYNAMIC_LIB1
7202 {
7203 /* Invoke the exit handler when the program finishes, only for static
7204 library. For dynamic library, we already have _fini and DllMain. */
7205 int rc = atexit(__kmp_internal_end_atexit);
7206 if (rc != 0) {
7207 __kmp_fatal(KMP_MSG(FunctionError, "atexit()")__kmp_msg_format(kmp_i18n_msg_FunctionError, "atexit()"), KMP_ERR(rc)__kmp_msg_error_code(rc),
7208 __kmp_msg_null);
7209 }
7210 }
7211#endif
7212
7213#if KMP_HANDLE_SIGNALS(1 || 0)
7214#if KMP_OS_UNIX1
7215 /* NOTE: make sure that this is called before the user installs their own
7216 signal handlers so that the user handlers are called first. this way they
7217 can return false, not call our handler, avoid terminating the library, and
7218 continue execution where they left off. */
7219 __kmp_install_signals(FALSE0);
7220#endif /* KMP_OS_UNIX */
7221#if KMP_OS_WINDOWS0
7222 __kmp_install_signals(TRUE(!0));
7223#endif /* KMP_OS_WINDOWS */
7224#endif
7225
7226 /* we have finished the serial initialization */
7227 __kmp_init_counter++;
7228
7229 __kmp_init_serial = TRUE(!0);
7230
7231 if (__kmp_settings) {
7232 __kmp_env_print();
7233 }
7234
7235 if (__kmp_display_env || __kmp_display_env_verbose) {
7236 __kmp_env_print_2();
7237 }
7238
7239#if OMPT_SUPPORT1
7240 ompt_post_init();
7241#endif
7242
7243 KMP_MB();
7244
7245 KA_TRACE(10, ("__kmp_do_serial_initialize: exit\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_do_serial_initialize: exit\n"
); }
;
7246}
7247
7248void __kmp_serial_initialize(void) {
7249 if (__kmp_init_serial) {
7250 return;
7251 }
7252 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7253 if (__kmp_init_serial) {
7254 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7255 return;
7256 }
7257 __kmp_do_serial_initialize();
7258 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7259}
7260
7261static void __kmp_do_middle_initialize(void) {
7262 int i, j;
7263 int prev_dflt_team_nth;
7264
7265 if (!__kmp_init_serial) {
7266 __kmp_do_serial_initialize();
7267 }
7268
7269 KA_TRACE(10, ("__kmp_middle_initialize: enter\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_middle_initialize: enter\n"
); }
;
7270
7271 if (UNLIKELY(!__kmp_need_register_serial)__builtin_expect(!!(!__kmp_need_register_serial), 0)) {
7272 // We are in a forked child process. The registration was skipped during
7273 // serial initialization in __kmp_atfork_child handler. Do it here.
7274 __kmp_register_library_startup();
7275 }
7276
7277 // Save the previous value for the __kmp_dflt_team_nth so that
7278 // we can avoid some reinitialization if it hasn't changed.
7279 prev_dflt_team_nth = __kmp_dflt_team_nth;
7280
7281#if KMP_AFFINITY_SUPPORTED1
7282 // __kmp_affinity_initialize() will try to set __kmp_ncores to the
7283 // number of cores on the machine.
7284 __kmp_affinity_initialize(__kmp_affinity);
7285
7286#endif /* KMP_AFFINITY_SUPPORTED */
7287
7288 KMP_ASSERT(__kmp_xproc > 0)if (!(__kmp_xproc > 0)) { __kmp_debug_assert("__kmp_xproc > 0"
, "openmp/runtime/src/kmp_runtime.cpp", 7288); }
;
7289 if (__kmp_avail_proc == 0) {
7290 __kmp_avail_proc = __kmp_xproc;
7291 }
7292
7293 // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3),
7294 // correct them now
7295 j = 0;
7296 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
7297 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
7298 __kmp_avail_proc;
7299 j++;
7300 }
7301
7302 if (__kmp_dflt_team_nth == 0) {
7303#ifdef KMP_DFLT_NTH_CORES
7304 // Default #threads = #cores
7305 __kmp_dflt_team_nth = __kmp_ncores;
7306 KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
"__kmp_ncores (%d)\n", __kmp_dflt_team_nth); }
7307 "__kmp_ncores (%d)\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
"__kmp_ncores (%d)\n", __kmp_dflt_team_nth); }
7308 __kmp_dflt_team_nth))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
"__kmp_ncores (%d)\n", __kmp_dflt_team_nth); }
;
7309#else
7310 // Default #threads = #available OS procs
7311 __kmp_dflt_team_nth = __kmp_avail_proc;
7312 KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
"__kmp_avail_proc(%d)\n", __kmp_dflt_team_nth); }
7313 "__kmp_avail_proc(%d)\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
"__kmp_avail_proc(%d)\n", __kmp_dflt_team_nth); }
7314 __kmp_dflt_team_nth))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
"__kmp_avail_proc(%d)\n", __kmp_dflt_team_nth); }
;
7315#endif /* KMP_DFLT_NTH_CORES */
7316 }
7317
7318 if (__kmp_dflt_team_nth < KMP_MIN_NTH1) {
7319 __kmp_dflt_team_nth = KMP_MIN_NTH1;
7320 }
7321 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
7322 __kmp_dflt_team_nth = __kmp_sys_max_nth;
7323 }
7324
7325 if (__kmp_nesting_mode > 0)
7326 __kmp_set_nesting_mode_threads();
7327
7328 // There's no harm in continuing if the following check fails,
7329 // but it indicates an error in the previous logic.
7330 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub)if (!(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub)) { __kmp_debug_assert
("__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub", "openmp/runtime/src/kmp_runtime.cpp"
, 7330); }
;
7331
7332 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
7333 // Run through the __kmp_threads array and set the num threads icv for each
7334 // root thread that is currently registered with the RTL (which has not
7335 // already explicitly set its nthreads-var with a call to
7336 // omp_set_num_threads()).
7337 for (i = 0; i < __kmp_threads_capacity; i++) {
7338 kmp_info_t *thread = __kmp_threads[i];
7339 if (thread == NULL__null)
7340 continue;
7341 if (thread->th.th_current_task->td_icvs.nproc != 0)
7342 continue;
7343
7344 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth)(((__kmp_threads[i])->th.th_current_task->td_icvs.nproc
) = (__kmp_dflt_team_nth))
;
7345 }
7346 }
7347 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n"
, __kmp_dflt_team_nth); }
7348 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n"
, __kmp_dflt_team_nth); }
7349 ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n"
, __kmp_dflt_team_nth); }
7350 __kmp_dflt_team_nth))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n"
, __kmp_dflt_team_nth); }
;
7351
7352#ifdef KMP_ADJUST_BLOCKTIME1
7353 /* Adjust blocktime to zero if necessary now that __kmp_avail_proc is set */
7354 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
7355 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0)if (!(__kmp_avail_proc > 0)) { __kmp_debug_assert("__kmp_avail_proc > 0"
, "openmp/runtime/src/kmp_runtime.cpp", 7355); }
;
7356 if (__kmp_nth > __kmp_avail_proc) {
7357 __kmp_zero_bt = TRUE(!0);
7358 }
7359 }
7360#endif /* KMP_ADJUST_BLOCKTIME */
7361
7362 /* we have finished middle initialization */
7363 TCW_SYNC_4(__kmp_init_middle, TRUE)(__kmp_init_middle) = ((!0));
7364
7365 KA_TRACE(10, ("__kmp_do_middle_initialize: exit\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_do_middle_initialize: exit\n"
); }
;
7366}
7367
7368void __kmp_middle_initialize(void) {
7369 if (__kmp_init_middle) {
7370 return;
7371 }
7372 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7373 if (__kmp_init_middle) {
7374 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7375 return;
7376 }
7377 __kmp_do_middle_initialize();
7378 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7379}
7380
7381void __kmp_parallel_initialize(void) {
7382 int gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg(); // this might be a new root
7383
7384 /* synchronize parallel initialization (for sibling) */
7385 if (TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
7386 return;
7387 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7388 if (TCR_4(__kmp_init_parallel)(__kmp_init_parallel)) {
7389 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7390 return;
7391 }
7392
7393 /* TODO reinitialization after we have already shut down */
7394 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) {
7395 KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_parallel_initialize: attempt to init while shutting down\n"
); }
7396 10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_parallel_initialize: attempt to init while shutting down\n"
); }
7397 ("__kmp_parallel_initialize: attempt to init while shutting down\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_parallel_initialize: attempt to init while shutting down\n"
); }
;
7398 __kmp_infinite_loop();
7399 }
7400
7401 /* jc: The lock __kmp_initz_lock is already held, so calling
7402 __kmp_serial_initialize would cause a deadlock. So we call
7403 __kmp_do_serial_initialize directly. */
7404 if (!__kmp_init_middle) {
7405 __kmp_do_middle_initialize();
7406 }
7407 __kmp_assign_root_init_mask();
7408 __kmp_resume_if_hard_paused();
7409
7410 /* begin initialization */
7411 KA_TRACE(10, ("__kmp_parallel_initialize: enter\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_parallel_initialize: enter\n"
); }
;
7412 KMP_ASSERT(KMP_UBER_GTID(gtid))if (!(KMP_UBER_GTID(gtid))) { __kmp_debug_assert("KMP_UBER_GTID(gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 7412); }
;
7413
7414#if KMP_ARCH_X860 || KMP_ARCH_X86_641
7415 // Save the FP control regs.
7416 // Worker threads will set theirs to these values at thread startup.
7417 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7418 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7419 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK0xffffffc0;
7420#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
7421
7422#if KMP_OS_UNIX1
7423#if KMP_HANDLE_SIGNALS(1 || 0)
7424 /* must be after __kmp_serial_initialize */
7425 __kmp_install_signals(TRUE(!0));
7426#endif
7427#endif
7428
7429 __kmp_suspend_initialize();
7430
7431#if defined(USE_LOAD_BALANCE1)
7432 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7433 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7434 }
7435#else
7436 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7437 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7438 }
7439#endif
7440
7441 if (__kmp_version) {
7442 __kmp_print_version_2();
7443 }
7444
7445 /* we have finished parallel initialization */
7446 TCW_SYNC_4(__kmp_init_parallel, TRUE)(__kmp_init_parallel) = ((!0));
7447
7448 KMP_MB();
7449 KA_TRACE(10, ("__kmp_parallel_initialize: exit\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_parallel_initialize: exit\n"
); }
;
7450
7451 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7452}
7453
7454void __kmp_hidden_helper_initialize() {
7455 if (TCR_4(__kmp_init_hidden_helper)(__kmp_init_hidden_helper))
7456 return;
7457
7458 // __kmp_parallel_initialize is required before we initialize hidden helper
7459 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
7460 __kmp_parallel_initialize();
7461
7462 // Double check. Note that this double check should not be placed before
7463 // __kmp_parallel_initialize as it will cause dead lock.
7464 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7465 if (TCR_4(__kmp_init_hidden_helper)(__kmp_init_hidden_helper)) {
7466 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7467 return;
7468 }
7469
7470#if KMP_AFFINITY_SUPPORTED1
7471 // Initialize hidden helper affinity settings.
7472 // The above __kmp_parallel_initialize() will initialize
7473 // regular affinity (and topology) if not already done.
7474 if (!__kmp_hh_affinity.flags.initialized)
7475 __kmp_affinity_initialize(__kmp_hh_affinity);
7476#endif
7477
7478 // Set the count of hidden helper tasks to be executed to zero
7479 KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0)(&__kmp_unexecuted_hidden_helper_tasks)->store(0, std::
memory_order_release)
;
7480
7481 // Set the global variable indicating that we're initializing hidden helper
7482 // team/threads
7483 TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE)(__kmp_init_hidden_helper_threads) = ((!0));
7484
7485 // Platform independent initialization
7486 __kmp_do_initialize_hidden_helper_threads();
7487
7488 // Wait here for the finish of initialization of hidden helper teams
7489 __kmp_hidden_helper_threads_initz_wait();
7490
7491 // We have finished hidden helper initialization
7492 TCW_SYNC_4(__kmp_init_hidden_helper, TRUE)(__kmp_init_hidden_helper) = ((!0));
7493
7494 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7495}
7496
7497/* ------------------------------------------------------------------------ */
7498
7499void __kmp_run_before_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
7500 kmp_team_t *team) {
7501 kmp_disp_t *dispatch;
7502
7503 KMP_MB();
7504
7505 /* none of the threads have encountered any constructs, yet. */
7506 this_thr->th.th_local.this_construct = 0;
7507#if KMP_CACHE_MANAGE
7508 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7509#endif /* KMP_CACHE_MANAGE */
7510 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch)((void *)(this_thr->th.th_dispatch));
7511 KMP_DEBUG_ASSERT(dispatch)if (!(dispatch)) { __kmp_debug_assert("dispatch", "openmp/runtime/src/kmp_runtime.cpp"
, 7511); }
;
7512 KMP_DEBUG_ASSERT(team->t.t_dispatch)if (!(team->t.t_dispatch)) { __kmp_debug_assert("team->t.t_dispatch"
, "openmp/runtime/src/kmp_runtime.cpp", 7512); }
;
7513 // KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[
7514 // this_thr->th.th_info.ds.ds_tid ] );
7515
7516 dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */
7517 dispatch->th_doacross_buf_idx = 0; // reset doacross dispatch buffer counter
7518 if (__kmp_env_consistency_check)
7519 __kmp_push_parallel(gtid, team->t.t_ident);
7520
7521 KMP_MB(); /* Flush all pending memory write invalidates. */
7522}
7523
7524void __kmp_run_after_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
7525 kmp_team_t *team) {
7526 if (__kmp_env_consistency_check)
7527 __kmp_pop_parallel(gtid, team->t.t_ident);
7528
7529 __kmp_finish_implicit_task(this_thr);
7530}
7531
7532int __kmp_invoke_task_func(int gtid) {
7533 int rc;
7534 int tid = __kmp_tid_from_gtid(gtid);
7535 kmp_info_t *this_thr = __kmp_threads[gtid];
7536 kmp_team_t *team = this_thr->th.th_team;
7537
7538 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7539#if USE_ITT_BUILD1
7540 if (__itt_stack_caller_create_ptr__kmp_itt_stack_caller_create_ptr__3_0) {
7541 // inform ittnotify about entering user's code
7542 if (team->t.t_stack_id != NULL__null) {
7543 __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id);
7544 } else {
7545 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL)if (!(team->t.t_parent->t.t_stack_id != __null)) { __kmp_debug_assert
("team->t.t_parent->t.t_stack_id != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 7545); }
;
7546 __kmp_itt_stack_callee_enter(
7547 (__itt_caller)team->t.t_parent->t.t_stack_id);
7548 }
7549 }
7550#endif /* USE_ITT_BUILD */
7551#if INCLUDE_SSC_MARKS(1 && 1)
7552 SSC_MARK_INVOKING()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0xd695) : "%ebx")
;
7553#endif
7554
7555#if OMPT_SUPPORT1
7556 void *dummy;
7557 void **exit_frame_p;
7558 ompt_data_t *my_task_data;
7559 ompt_data_t *my_parallel_data;
7560 int ompt_team_size;
7561
7562 if (ompt_enabled.enabled) {
7563 exit_frame_p = &(team->t.t_implicit_task_taskdata[tid]
7564 .ompt_task_info.frame.exit_frame.ptr);
7565 } else {
7566 exit_frame_p = &dummy;
7567 }
7568
7569 my_task_data =
7570 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7571 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7572 if (ompt_enabled.ompt_callback_implicit_task) {
7573 ompt_team_size = team->t.t_nproc;
7574 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
7575 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7576 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7577 OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info))->thread_num = __kmp_tid_from_gtid(gtid);
7578 }
7579#endif
7580
7581#if KMP_STATS_ENABLED0
7582 stats_state_e previous_state = KMP_GET_THREAD_STATE()((void)0);
7583 if (previous_state == stats_state_e::TEAMS_REGION) {
7584 KMP_PUSH_PARTITIONED_TIMER(OMP_teams)((void)0);
7585 } else {
7586 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel)((void)0);
7587 }
7588 KMP_SET_THREAD_STATE(IMPLICIT_TASK)((void)0);
7589#endif
7590
7591 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn)((void *)(team->t.t_pkfn)), gtid,
7592 tid, (int)team->t.t_argc, (void **)team->t.t_argv
7593#if OMPT_SUPPORT1
7594 ,
7595 exit_frame_p
7596#endif
7597 );
7598#if OMPT_SUPPORT1
7599 *exit_frame_p = NULL__null;
7600 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
7601#endif
7602
7603#if KMP_STATS_ENABLED0
7604 if (previous_state == stats_state_e::TEAMS_REGION) {
7605 KMP_SET_THREAD_STATE(previous_state)((void)0);
7606 }
7607 KMP_POP_PARTITIONED_TIMER()((void)0);
7608#endif
7609
7610#if USE_ITT_BUILD1
7611 if (__itt_stack_caller_create_ptr__kmp_itt_stack_caller_create_ptr__3_0) {
7612 // inform ittnotify about leaving user's code
7613 if (team->t.t_stack_id != NULL__null) {
7614 __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id);
7615 } else {
7616 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL)if (!(team->t.t_parent->t.t_stack_id != __null)) { __kmp_debug_assert
("team->t.t_parent->t.t_stack_id != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 7616); }
;
7617 __kmp_itt_stack_callee_leave(
7618 (__itt_caller)team->t.t_parent->t.t_stack_id);
7619 }
7620 }
7621#endif /* USE_ITT_BUILD */
7622 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7623
7624 return rc;
7625}
7626
7627void __kmp_teams_master(int gtid) {
7628 // This routine is called by all primary threads in teams construct
7629 kmp_info_t *thr = __kmp_threads[gtid];
7630 kmp_team_t *team = thr->th.th_team;
7631 ident_t *loc = team->t.t_ident;
7632 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7633 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask)if (!(thr->th.th_teams_microtask)) { __kmp_debug_assert("thr->th.th_teams_microtask"
, "openmp/runtime/src/kmp_runtime.cpp", 7633); }
;
7634 KMP_DEBUG_ASSERT(thr->th.th_set_nproc)if (!(thr->th.th_set_nproc)) { __kmp_debug_assert("thr->th.th_set_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 7634); }
;
7635 KA_TRACE(20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n"
, gtid, __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask
); }
7636 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n"
, gtid, __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask
); }
;
7637
7638 // This thread is a new CG root. Set up the proper variables.
7639 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(sizeof(kmp_cg_root_t))___kmp_allocate((sizeof(kmp_cg_root_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 7639)
;
7640 tmp->cg_root = thr; // Make thr the CG root
7641 // Init to thread limit stored when league primary threads were forked
7642 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7643 tmp->cg_nthreads = 1; // Init counter to one active thread, this one
7644 KA_TRACE(100, ("__kmp_teams_master: Thread %p created node %p and init"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_teams_master: Thread %p created node %p and init"
" cg_nthreads to 1\n", thr, tmp); }
7645 " cg_nthreads to 1\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_teams_master: Thread %p created node %p and init"
" cg_nthreads to 1\n", thr, tmp); }
7646 thr, tmp))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_teams_master: Thread %p created node %p and init"
" cg_nthreads to 1\n", thr, tmp); }
;
7647 tmp->up = thr->th.th_cg_roots;
7648 thr->th.th_cg_roots = tmp;
7649
7650// Launch league of teams now, but not let workers execute
7651// (they hang on fork barrier until next parallel)
7652#if INCLUDE_SSC_MARKS(1 && 1)
7653 SSC_MARK_FORKING()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0xd693) : "%ebx")
;
7654#endif
7655 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7656 (microtask_t)thr->th.th_teams_microtask, // "wrapped" task
7657 VOLATILE_CAST(launch_t)(launch_t) __kmp_invoke_task_func, NULL__null);
7658#if INCLUDE_SSC_MARKS(1 && 1)
7659 SSC_MARK_JOINING()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0xd694) : "%ebx")
;
7660#endif
7661 // If the team size was reduced from the limit, set it to the new size
7662 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7663 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7664 // AC: last parameter "1" eliminates join barrier which won't work because
7665 // worker threads are in a fork barrier waiting for more parallel regions
7666 __kmp_join_call(loc, gtid
7667#if OMPT_SUPPORT1
7668 ,
7669 fork_context_intel
7670#endif
7671 ,
7672 1);
7673}
7674
7675int __kmp_invoke_teams_master(int gtid) {
7676 kmp_info_t *this_thr = __kmp_threads[gtid];
7677 kmp_team_t *team = this_thr->th.th_team;
7678#if KMP_DEBUG1
7679 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7680 KMP_DEBUG_ASSERT((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==if (!((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn
== (void *)__kmp_teams_master)) { __kmp_debug_assert("(void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn == (void *)__kmp_teams_master"
, "openmp/runtime/src/kmp_runtime.cpp", 7681); }
7681 (void *)__kmp_teams_master)if (!((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn
== (void *)__kmp_teams_master)) { __kmp_debug_assert("(void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn == (void *)__kmp_teams_master"
, "openmp/runtime/src/kmp_runtime.cpp", 7681); }
;
7682#endif
7683 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7684#if OMPT_SUPPORT1
7685 int tid = __kmp_tid_from_gtid(gtid);
7686 ompt_data_t *task_data =
7687 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7688 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7689 if (ompt_enabled.ompt_callback_implicit_task) {
7690 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
7691 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7692 ompt_task_initial);
7693 OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info))->thread_num = tid;
7694 }
7695#endif
7696 __kmp_teams_master(gtid);
7697#if OMPT_SUPPORT1
7698 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
7699#endif
7700 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7701 return 1;
7702}
7703
7704/* this sets the requested number of threads for the next parallel region
7705 encountered by this team. since this should be enclosed in the forkjoin
7706 critical section it should avoid race conditions with asymmetrical nested
7707 parallelism */
7708
7709void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) {
7710 kmp_info_t *thr = __kmp_threads[gtid];
7711
7712 if (num_threads > 0)
7713 thr->th.th_set_nproc = num_threads;
7714}
7715
7716static void __kmp_push_thread_limit(kmp_info_t *thr, int num_teams,
7717 int num_threads) {
7718 KMP_DEBUG_ASSERT(thr)if (!(thr)) { __kmp_debug_assert("thr", "openmp/runtime/src/kmp_runtime.cpp"
, 7718); }
;
7719 // Remember the number of threads for inner parallel regions
7720 if (!TCR_4(__kmp_init_middle)(__kmp_init_middle))
7721 __kmp_middle_initialize(); // get internal globals calculated
7722 __kmp_assign_root_init_mask();
7723 KMP_DEBUG_ASSERT(__kmp_avail_proc)if (!(__kmp_avail_proc)) { __kmp_debug_assert("__kmp_avail_proc"
, "openmp/runtime/src/kmp_runtime.cpp", 7723); }
;
7724 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth)if (!(__kmp_dflt_team_nth)) { __kmp_debug_assert("__kmp_dflt_team_nth"
, "openmp/runtime/src/kmp_runtime.cpp", 7724); }
;
7725
7726 if (num_threads == 0) {
7727 if (__kmp_teams_thread_limit > 0) {
7728 num_threads = __kmp_teams_thread_limit;
7729 } else {
7730 num_threads = __kmp_avail_proc / num_teams;
7731 }
7732 // adjust num_threads w/o warning as it is not user setting
7733 // num_threads = min(num_threads, nthreads-var, thread-limit-var)
7734 // no thread_limit clause specified - do not change thread-limit-var ICV
7735 if (num_threads > __kmp_dflt_team_nth) {
7736 num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV
7737 }
7738 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7739 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7740 } // prevent team size to exceed thread-limit-var
7741 if (num_teams * num_threads > __kmp_teams_max_nth) {
7742 num_threads = __kmp_teams_max_nth / num_teams;
7743 }
7744 if (num_threads == 0) {
7745 num_threads = 1;
7746 }
7747 } else {
7748 if (num_threads < 0) {
7749 __kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, 1)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, num_threads, 1
)
,
7750 __kmp_msg_null);
7751 num_threads = 1;
7752 }
7753 // This thread will be the primary thread of the league primary threads
7754 // Store new thread limit; old limit is saved in th_cg_roots list
7755 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7756 // num_threads = min(num_threads, nthreads-var)
7757 if (num_threads > __kmp_dflt_team_nth) {
7758 num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV
7759 }
7760 if (num_teams * num_threads > __kmp_teams_max_nth) {
7761 int new_threads = __kmp_teams_max_nth / num_teams;
7762 if (new_threads == 0) {
7763 new_threads = 1;
7764 }
7765 if (new_threads != num_threads) {
7766 if (!__kmp_reserve_warn) { // user asked for too many threads
7767 __kmp_reserve_warn = 1; // conflicts with KMP_TEAMS_THREAD_LIMIT
7768 __kmp_msg(kmp_ms_warning,
7769 KMP_MSG(CantFormThrTeam, num_threads, new_threads)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, num_threads, new_threads
)
,
7770 KMP_HNT(Unset_ALL_THREADS)__kmp_msg_format(kmp_i18n_hnt_Unset_ALL_THREADS), __kmp_msg_null);
7771 }
7772 }
7773 num_threads = new_threads;
7774 }
7775 }
7776 thr->th.th_teams_size.nth = num_threads;
7777}
7778
7779/* this sets the requested number of teams for the teams region and/or
7780 the number of threads for the next parallel region encountered */
7781void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
7782 int num_threads) {
7783 kmp_info_t *thr = __kmp_threads[gtid];
7784 if (num_teams < 0) {
7785 // OpenMP specification requires requested values to be positive,
7786 // but people can send us any value, so we'd better check
7787 __kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, 1)__kmp_msg_format(kmp_i18n_msg_NumTeamsNotPositive, num_teams,
1)
,
7788 __kmp_msg_null);
7789 num_teams = 1;
7790 }
7791 if (num_teams == 0) {
7792 if (__kmp_nteams > 0) {
7793 num_teams = __kmp_nteams;
7794 } else {
7795 num_teams = 1; // default number of teams is 1.
7796 }
7797 }
7798 if (num_teams > __kmp_teams_max_nth) { // if too many teams requested?
7799 if (!__kmp_reserve_warn) {
7800 __kmp_reserve_warn = 1;
7801 __kmp_msg(kmp_ms_warning,
7802 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, num_teams, __kmp_teams_max_nth
)
,
7803 KMP_HNT(Unset_ALL_THREADS)__kmp_msg_format(kmp_i18n_hnt_Unset_ALL_THREADS), __kmp_msg_null);
7804 }
7805 num_teams = __kmp_teams_max_nth;
7806 }
7807 // Set number of teams (number of threads in the outer "parallel" of the
7808 // teams)
7809 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7810
7811 __kmp_push_thread_limit(thr, num_teams, num_threads);
7812}
7813
7814/* This sets the requested number of teams for the teams region and/or
7815 the number of threads for the next parallel region encountered */
7816void __kmp_push_num_teams_51(ident_t *id, int gtid, int num_teams_lb,
7817 int num_teams_ub, int num_threads) {
7818 kmp_info_t *thr = __kmp_threads[gtid];
7819 KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0)if (!(num_teams_lb >= 0 && num_teams_ub >= 0)) {
__kmp_debug_assert("num_teams_lb >= 0 && num_teams_ub >= 0"
, "openmp/runtime/src/kmp_runtime.cpp", 7819); }
;
7820 KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb)if (!(num_teams_ub >= num_teams_lb)) { __kmp_debug_assert(
"num_teams_ub >= num_teams_lb", "openmp/runtime/src/kmp_runtime.cpp"
, 7820); }
;
7821 KMP_DEBUG_ASSERT(num_threads >= 0)if (!(num_threads >= 0)) { __kmp_debug_assert("num_threads >= 0"
, "openmp/runtime/src/kmp_runtime.cpp", 7821); }
;
7822
7823 if (num_teams_lb > num_teams_ub) {
7824 __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub)__kmp_msg_format(kmp_i18n_msg_FailedToCreateTeam, num_teams_lb
, num_teams_ub)
,
7825 KMP_HNT(SetNewBound, __kmp_teams_max_nth)__kmp_msg_format(kmp_i18n_hnt_SetNewBound, __kmp_teams_max_nth
)
, __kmp_msg_null);
7826 }
7827
7828 int num_teams = 1; // defalt number of teams is 1.
7829
7830 if (num_teams_lb == 0 && num_teams_ub > 0)
7831 num_teams_lb = num_teams_ub;
7832
7833 if (num_teams_lb == 0 && num_teams_ub == 0) { // no num_teams clause
7834 num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
7835 if (num_teams > __kmp_teams_max_nth) {
7836 if (!__kmp_reserve_warn) {
7837 __kmp_reserve_warn = 1;
7838 __kmp_msg(kmp_ms_warning,
7839 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, num_teams, __kmp_teams_max_nth
)
,
7840 KMP_HNT(Unset_ALL_THREADS)__kmp_msg_format(kmp_i18n_hnt_Unset_ALL_THREADS), __kmp_msg_null);
7841 }
7842 num_teams = __kmp_teams_max_nth;
7843 }
7844 } else if (num_teams_lb == num_teams_ub) { // requires exact number of teams
7845 num_teams = num_teams_ub;
7846 } else { // num_teams_lb <= num_teams <= num_teams_ub
7847 if (num_threads <= 0) {
7848 if (num_teams_ub > __kmp_teams_max_nth) {
7849 num_teams = num_teams_lb;
7850 } else {
7851 num_teams = num_teams_ub;
7852 }
7853 } else {
7854 num_teams = (num_threads > __kmp_teams_max_nth)
7855 ? num_teams
7856 : __kmp_teams_max_nth / num_threads;
7857 if (num_teams < num_teams_lb) {
7858 num_teams = num_teams_lb;
7859 } else if (num_teams > num_teams_ub) {
7860 num_teams = num_teams_ub;
7861 }
7862 }
7863 }
7864 // Set number of teams (number of threads in the outer "parallel" of the
7865 // teams)
7866 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7867
7868 __kmp_push_thread_limit(thr, num_teams, num_threads);
7869}
7870
7871// Set the proc_bind var to use in the following parallel region.
7872void __kmp_push_proc_bind(ident_t *id, int gtid, kmp_proc_bind_t proc_bind) {
7873 kmp_info_t *thr = __kmp_threads[gtid];
7874 thr->th.th_set_proc_bind = proc_bind;
7875}
7876
7877/* Launch the worker threads into the microtask. */
7878
7879void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team) {
7880 kmp_info_t *this_thr = __kmp_threads[gtid];
7881
7882#ifdef KMP_DEBUG1
7883 int f;
7884#endif /* KMP_DEBUG */
7885
7886 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 7886); }
;
7887 KMP_DEBUG_ASSERT(this_thr->th.th_team == team)if (!(this_thr->th.th_team == team)) { __kmp_debug_assert(
"this_thr->th.th_team == team", "openmp/runtime/src/kmp_runtime.cpp"
, 7887); }
;
7888 KMP_ASSERT(KMP_MASTER_GTID(gtid))if (!((0 == __kmp_tid_from_gtid((gtid))))) { __kmp_debug_assert
("KMP_MASTER_GTID(gtid)", "openmp/runtime/src/kmp_runtime.cpp"
, 7888); }
;
7889 KMP_MB(); /* Flush all pending memory write invalidates. */
7890
7891 team->t.t_construct = 0; /* no single directives seen yet */
7892 team->t.t_ordered.dt.t_value =
7893 0; /* thread 0 enters the ordered section first */
7894
7895 /* Reset the identifiers on the dispatch buffer */
7896 KMP_DEBUG_ASSERT(team->t.t_disp_buffer)if (!(team->t.t_disp_buffer)) { __kmp_debug_assert("team->t.t_disp_buffer"
, "openmp/runtime/src/kmp_runtime.cpp", 7896); }
;
7897 if (team->t.t_max_nproc > 1) {
7898 int i;
7899 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7900 team->t.t_disp_buffer[i].buffer_index = i;
7901 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7902 }
7903 } else {
7904 team->t.t_disp_buffer[0].buffer_index = 0;
7905 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7906 }
7907
7908 KMP_MB(); /* Flush all pending memory write invalidates. */
7909 KMP_ASSERT(this_thr->th.th_team == team)if (!(this_thr->th.th_team == team)) { __kmp_debug_assert(
"this_thr->th.th_team == team", "openmp/runtime/src/kmp_runtime.cpp"
, 7909); }
;
7910
7911#ifdef KMP_DEBUG1
7912 for (f = 0; f < team->t.t_nproc; f++) {
7913 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&if (!(team->t.t_threads[f] && team->t.t_threads
[f]->th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("team->t.t_threads[f] && team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 7914); }
7914 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc)if (!(team->t.t_threads[f] && team->t.t_threads
[f]->th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("team->t.t_threads[f] && team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 7914); }
;
7915 }
7916#endif /* KMP_DEBUG */
7917
7918 /* release the worker threads so they may begin working */
7919 __kmp_fork_barrier(gtid, 0);
7920}
7921
7922void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) {
7923 kmp_info_t *this_thr = __kmp_threads[gtid];
7924
7925 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 7925); }
;
7926 KMP_DEBUG_ASSERT(this_thr->th.th_team == team)if (!(this_thr->th.th_team == team)) { __kmp_debug_assert(
"this_thr->th.th_team == team", "openmp/runtime/src/kmp_runtime.cpp"
, 7926); }
;
7927 KMP_ASSERT(KMP_MASTER_GTID(gtid))if (!((0 == __kmp_tid_from_gtid((gtid))))) { __kmp_debug_assert
("KMP_MASTER_GTID(gtid)", "openmp/runtime/src/kmp_runtime.cpp"
, 7927); }
;
7928 KMP_MB(); /* Flush all pending memory write invalidates. */
7929
7930 /* Join barrier after fork */
7931
7932#ifdef KMP_DEBUG1
7933 if (__kmp_threads[gtid] &&
7934 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
7935 __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
7936 __kmp_threads[gtid]);
7937 __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
7938 "team->t.t_nproc=%d\n",
7939 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
7940 team->t.t_nproc);
7941 __kmp_print_structure();
7942 }
7943 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&if (!(__kmp_threads[gtid] && __kmp_threads[gtid]->
th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 7944); }
7944 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc)if (!(__kmp_threads[gtid] && __kmp_threads[gtid]->
th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 7944); }
;
7945#endif /* KMP_DEBUG */
7946
7947 __kmp_join_barrier(gtid); /* wait for everyone */
7948#if OMPT_SUPPORT1
7949 if (ompt_enabled.enabled &&
7950 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
7951 int ds_tid = this_thr->th.th_info.ds.ds_tid;
7952 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data
))
;
7953 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
7954#if OMPT_OPTIONAL1
7955 void *codeptr = NULL__null;
7956 if (KMP_MASTER_TID(ds_tid)(0 == (ds_tid)) &&
7957 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback ||
7958 ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback))
7959 codeptr = OMPT_CUR_TEAM_INFO(this_thr)(&(this_thr->th.th_team->t.ompt_team_info))->master_return_address;
7960
7961 if (ompt_enabled.ompt_callback_sync_region_wait) {
7962 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback(
7963 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL__null, task_data,
7964 codeptr);
7965 }
7966 if (ompt_enabled.ompt_callback_sync_region) {
7967 ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback(
7968 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL__null, task_data,
7969 codeptr);
7970 }
7971#endif
7972 if (!KMP_MASTER_TID(ds_tid)(0 == (ds_tid)) && ompt_enabled.ompt_callback_implicit_task) {
7973 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
7974 ompt_scope_end, NULL__null, task_data, 0, ds_tid,
7975 ompt_task_implicit); // TODO: Can this be ompt_task_initial?
7976 }
7977 }
7978#endif
7979
7980 KMP_MB(); /* Flush all pending memory write invalidates. */
7981 KMP_ASSERT(this_thr->th.th_team == team)if (!(this_thr->th.th_team == team)) { __kmp_debug_assert(
"this_thr->th.th_team == team", "openmp/runtime/src/kmp_runtime.cpp"
, 7981); }
;
7982}
7983
7984/* ------------------------------------------------------------------------ */
7985
7986#ifdef USE_LOAD_BALANCE1
7987
7988// Return the worker threads actively spinning in the hot team, if we
7989// are at the outermost level of parallelism. Otherwise, return 0.
7990static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
7991 int i;
7992 int retval;
7993 kmp_team_t *hot_team;
7994
7995 if (root->r.r_active) {
7996 return 0;
7997 }
7998 hot_team = root->r.r_hot_team;
7999 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME(2147483647)) {
8000 return hot_team->t.t_nproc - 1; // Don't count primary thread
8001 }
8002
8003 // Skip the primary thread - it is accounted for elsewhere.
8004 retval = 0;
8005 for (i = 1; i < hot_team->t.t_nproc; i++) {
8006 if (hot_team->t.t_threads[i]->th.th_active) {
8007 retval++;
8008 }
8009 }
8010 return retval;
8011}
8012
8013// Perform an automatic adjustment to the number of
8014// threads used by the next parallel region.
8015static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc) {
8016 int retval;
8017 int pool_active;
8018 int hot_team_active;
8019 int team_curr_active;
8020 int system_active;
8021
8022 KB_TRACE(20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,if (kmp_b_debug >= 20) { __kmp_debug_printf ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n"
, root, set_nproc); }
8023 set_nproc))if (kmp_b_debug >= 20) { __kmp_debug_printf ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n"
, root, set_nproc); }
;
8024 KMP_DEBUG_ASSERT(root)if (!(root)) { __kmp_debug_assert("root", "openmp/runtime/src/kmp_runtime.cpp"
, 8024); }
;
8025 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]if (!(root->r.r_root_team->t.t_threads[0] ->th.th_current_task
->td_icvs.dynamic == (!0))) { __kmp_debug_assert("root->r.r_root_team->t.t_threads[0] ->th.th_current_task->td_icvs.dynamic == (!0)"
, "openmp/runtime/src/kmp_runtime.cpp", 8026); }
8026 ->th.th_current_task->td_icvs.dynamic == TRUE)if (!(root->r.r_root_team->t.t_threads[0] ->th.th_current_task
->td_icvs.dynamic == (!0))) { __kmp_debug_assert("root->r.r_root_team->t.t_threads[0] ->th.th_current_task->td_icvs.dynamic == (!0)"
, "openmp/runtime/src/kmp_runtime.cpp", 8026); }
;
8027 KMP_DEBUG_ASSERT(set_nproc > 1)if (!(set_nproc > 1)) { __kmp_debug_assert("set_nproc > 1"
, "openmp/runtime/src/kmp_runtime.cpp", 8027); }
;
8028
8029 if (set_nproc == 1) {
8030 KB_TRACE(20, ("__kmp_load_balance_nproc: serial execution.\n"))if (kmp_b_debug >= 20) { __kmp_debug_printf ("__kmp_load_balance_nproc: serial execution.\n"
); }
;
8031 return 1;
8032 }
8033
8034 // Threads that are active in the thread pool, active in the hot team for this
8035 // particular root (if we are at the outer par level), and the currently
8036 // executing thread (to become the primary thread) are available to add to the
8037 // new team, but are currently contributing to the system load, and must be
8038 // accounted for.
8039 pool_active = __kmp_thread_pool_active_nth;
8040 hot_team_active = __kmp_active_hot_team_nproc(root);
8041 team_curr_active = pool_active + hot_team_active + 1;
8042
8043 // Check the system load.
8044 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
8045 KB_TRACE(30, ("__kmp_load_balance_nproc: system active = %d pool active = %d "if (kmp_b_debug >= 30) { __kmp_debug_printf ("__kmp_load_balance_nproc: system active = %d pool active = %d "
"hot team active = %d\n", system_active, pool_active, hot_team_active
); }
8046 "hot team active = %d\n",if (kmp_b_debug >= 30) { __kmp_debug_printf ("__kmp_load_balance_nproc: system active = %d pool active = %d "
"hot team active = %d\n", system_active, pool_active, hot_team_active
); }
8047 system_active, pool_active, hot_team_active))if (kmp_b_debug >= 30) { __kmp_debug_printf ("__kmp_load_balance_nproc: system active = %d pool active = %d "
"hot team active = %d\n", system_active, pool_active, hot_team_active
); }
;
8048
8049 if (system_active < 0) {
8050 // There was an error reading the necessary info from /proc, so use the
8051 // thread limit algorithm instead. Once we set __kmp_global.g.g_dynamic_mode
8052 // = dynamic_thread_limit, we shouldn't wind up getting back here.
8053 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
8054 KMP_WARNING(CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit")__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_CantLoadBalUsing
, "KMP_DYNAMIC_MODE=thread limit"), __kmp_msg_null)
;
8055
8056 // Make this call behave like the thread limit algorithm.
8057 retval = __kmp_avail_proc - __kmp_nth +
8058 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
8059 if (retval > set_nproc) {
8060 retval = set_nproc;
8061 }
8062 if (retval < KMP_MIN_NTH1) {
8063 retval = KMP_MIN_NTH1;
8064 }
8065
8066 KB_TRACE(20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n",if (kmp_b_debug >= 20) { __kmp_debug_printf ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n"
, retval); }
8067 retval))if (kmp_b_debug >= 20) { __kmp_debug_printf ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n"
, retval); }
;
8068 return retval;
8069 }
8070
8071 // There is a slight delay in the load balance algorithm in detecting new
8072 // running procs. The real system load at this instant should be at least as
8073 // large as the #active omp thread that are available to add to the team.
8074 if (system_active < team_curr_active) {
8075 system_active = team_curr_active;
8076 }
8077 retval = __kmp_avail_proc - system_active + team_curr_active;
8078 if (retval > set_nproc) {
8079 retval = set_nproc;
8080 }
8081 if (retval < KMP_MIN_NTH1) {
8082 retval = KMP_MIN_NTH1;
8083 }
8084
8085 KB_TRACE(20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval))if (kmp_b_debug >= 20) { __kmp_debug_printf ("__kmp_load_balance_nproc: exit. retval:%d\n"
, retval); }
;
8086 return retval;
8087} // __kmp_load_balance_nproc()
8088
8089#endif /* USE_LOAD_BALANCE */
8090
8091/* ------------------------------------------------------------------------ */
8092
8093/* NOTE: this is called with the __kmp_init_lock held */
8094void __kmp_cleanup(void) {
8095 int f;
8096
8097 KA_TRACE(10, ("__kmp_cleanup: enter\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_cleanup: enter\n"
); }
;
8098
8099 if (TCR_4(__kmp_init_parallel)(__kmp_init_parallel)) {
8100#if KMP_HANDLE_SIGNALS(1 || 0)
8101 __kmp_remove_signals();
8102#endif
8103 TCW_4(__kmp_init_parallel, FALSE)(__kmp_init_parallel) = (0);
8104 }
8105
8106 if (TCR_4(__kmp_init_middle)(__kmp_init_middle)) {
8107#if KMP_AFFINITY_SUPPORTED1
8108 __kmp_affinity_uninitialize();
8109#endif /* KMP_AFFINITY_SUPPORTED */
8110 __kmp_cleanup_hierarchy();
8111 TCW_4(__kmp_init_middle, FALSE)(__kmp_init_middle) = (0);
8112 }
8113
8114 KA_TRACE(10, ("__kmp_cleanup: go serial cleanup\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_cleanup: go serial cleanup\n"
); }
;
8115
8116 if (__kmp_init_serial) {
8117 __kmp_runtime_destroy();
8118 __kmp_init_serial = FALSE0;
8119 }
8120
8121 __kmp_cleanup_threadprivate_caches();
8122
8123 for (f = 0; f < __kmp_threads_capacity; f++) {
8124 if (__kmp_root[f] != NULL__null) {
8125 __kmp_free(__kmp_root[f])___kmp_free((__kmp_root[f]), "openmp/runtime/src/kmp_runtime.cpp"
, 8125)
;
8126 __kmp_root[f] = NULL__null;
8127 }
8128 }
8129 __kmp_free(__kmp_threads)___kmp_free((__kmp_threads), "openmp/runtime/src/kmp_runtime.cpp"
, 8129)
;
8130 // __kmp_threads and __kmp_root were allocated at once, as single block, so
8131 // there is no need in freeing __kmp_root.
8132 __kmp_threads = NULL__null;
8133 __kmp_root = NULL__null;
8134 __kmp_threads_capacity = 0;
8135
8136 // Free old __kmp_threads arrays if they exist.
8137 kmp_old_threads_list_t *ptr = __kmp_old_threads_list;
8138 while (ptr) {
8139 kmp_old_threads_list_t *next = ptr->next;
8140 __kmp_free(ptr->threads)___kmp_free((ptr->threads), "openmp/runtime/src/kmp_runtime.cpp"
, 8140)
;
8141 __kmp_free(ptr)___kmp_free((ptr), "openmp/runtime/src/kmp_runtime.cpp", 8141
)
;
8142 ptr = next;
8143 }
8144
8145#if KMP_USE_DYNAMIC_LOCK1
8146 __kmp_cleanup_indirect_user_locks();
8147#else
8148 __kmp_cleanup_user_locks();
8149#endif
8150#if OMPD_SUPPORT1
8151 if (ompd_state) {
8152 __kmp_free(ompd_env_block)___kmp_free((ompd_env_block), "openmp/runtime/src/kmp_runtime.cpp"
, 8152)
;
8153 ompd_env_block = NULL__null;
8154 ompd_env_block_size = 0;
8155 }
8156#endif
8157
8158#if KMP_AFFINITY_SUPPORTED1
8159 KMP_INTERNAL_FREE(CCAST(char *, __kmp_cpuinfo_file))free(const_cast<char *>(__kmp_cpuinfo_file));
8160 __kmp_cpuinfo_file = NULL__null;
8161#endif /* KMP_AFFINITY_SUPPORTED */
8162
8163#if KMP_USE_ADAPTIVE_LOCKS(0 || 1) && !0
8164#if KMP_DEBUG_ADAPTIVE_LOCKS0
8165 __kmp_print_speculative_stats();
8166#endif
8167#endif
8168 KMP_INTERNAL_FREE(__kmp_nested_nth.nth)free(__kmp_nested_nth.nth);
8169 __kmp_nested_nth.nth = NULL__null;
8170 __kmp_nested_nth.size = 0;
8171 __kmp_nested_nth.used = 0;
8172 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types)free(__kmp_nested_proc_bind.bind_types);
8173 __kmp_nested_proc_bind.bind_types = NULL__null;
8174 __kmp_nested_proc_bind.size = 0;
8175 __kmp_nested_proc_bind.used = 0;
8176 if (__kmp_affinity_format) {
8177 KMP_INTERNAL_FREE(__kmp_affinity_format)free(__kmp_affinity_format);
8178 __kmp_affinity_format = NULL__null;
8179 }
8180
8181 __kmp_i18n_catclose();
8182
8183#if KMP_USE_HIER_SCHED0
8184 __kmp_hier_scheds.deallocate();
8185#endif
8186
8187#if KMP_STATS_ENABLED0
8188 __kmp_stats_fini();
8189#endif
8190
8191 KA_TRACE(10, ("__kmp_cleanup: exit\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_cleanup: exit\n"
); }
;
8192}
8193
8194/* ------------------------------------------------------------------------ */
8195
8196int __kmp_ignore_mppbeg(void) {
8197 char *env;
8198
8199 if ((env = getenv("KMP_IGNORE_MPPBEG")) != NULL__null) {
8200 if (__kmp_str_match_false(env))
8201 return FALSE0;
8202 }
8203 // By default __kmpc_begin() is no-op.
8204 return TRUE(!0);
8205}
8206
8207int __kmp_ignore_mppend(void) {
8208 char *env;
8209
8210 if ((env = getenv("KMP_IGNORE_MPPEND")) != NULL__null) {
8211 if (__kmp_str_match_false(env))
8212 return FALSE0;
8213 }
8214 // By default __kmpc_end() is no-op.
8215 return TRUE(!0);
8216}
8217
8218void __kmp_internal_begin(void) {
8219 int gtid;
8220 kmp_root_t *root;
8221
8222 /* this is a very important step as it will register new sibling threads
8223 and assign these new uber threads a new gtid */
8224 gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
8225 root = __kmp_threads[gtid]->th.th_root;
8226 KMP_ASSERT(KMP_UBER_GTID(gtid))if (!(KMP_UBER_GTID(gtid))) { __kmp_debug_assert("KMP_UBER_GTID(gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 8226); }
;
8227
8228 if (root->r.r_begin)
8229 return;
8230 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
8231 if (root->r.r_begin) {
8232 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8233 return;
8234 }
8235
8236 root->r.r_begin = TRUE(!0);
8237
8238 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8239}
8240
8241/* ------------------------------------------------------------------------ */
8242
8243void __kmp_user_set_library(enum library_type arg) {
8244 int gtid;
8245 kmp_root_t *root;
8246 kmp_info_t *thread;
8247
8248 /* first, make sure we are initialized so we can get our gtid */
8249
8250 gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
8251 thread = __kmp_threads[gtid];
8252
8253 root = thread->th.th_root;
8254
8255 KA_TRACE(20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n"
, gtid, arg, library_serial); }
8256 library_serial))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n"
, gtid, arg, library_serial); }
;
8257 if (root->r.r_in_parallel) { /* Must be called in serial section of top-level
8258 thread */
8259 KMP_WARNING(SetLibraryIncorrectCall)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_SetLibraryIncorrectCall
), __kmp_msg_null)
;
8260 return;
8261 }
8262
8263 switch (arg) {
8264 case library_serial:
8265 thread->th.th_set_nproc = 0;
8266 set__nproc(thread, 1)(((thread)->th.th_current_task->td_icvs.nproc) = (1));
8267 break;
8268 case library_turnaround:
8269 thread->th.th_set_nproc = 0;
8270 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth(((thread)->th.th_current_task->td_icvs.nproc) = (__kmp_dflt_team_nth
? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub))
8271 : __kmp_dflt_team_nth_ub)(((thread)->th.th_current_task->td_icvs.nproc) = (__kmp_dflt_team_nth
? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub))
;
8272 break;
8273 case library_throughput:
8274 thread->th.th_set_nproc = 0;
8275 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth(((thread)->th.th_current_task->td_icvs.nproc) = (__kmp_dflt_team_nth
? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub))
8276 : __kmp_dflt_team_nth_ub)(((thread)->th.th_current_task->td_icvs.nproc) = (__kmp_dflt_team_nth
? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub))
;
8277 break;
8278 default:
8279 KMP_FATAL(UnknownLibraryType, arg)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_UnknownLibraryType,
arg), __kmp_msg_null)
;
8280 }
8281
8282 __kmp_aux_set_library(arg);
8283}
8284
8285void __kmp_aux_set_stacksize(size_t arg) {
8286 if (!__kmp_init_serial)
8287 __kmp_serial_initialize();
8288
8289#if KMP_OS_DARWIN0
8290 if (arg & (0x1000 - 1)) {
8291 arg &= ~(0x1000 - 1);
8292 if (arg + 0x1000) /* check for overflow if we round up */
8293 arg += 0x1000;
8294 }
8295#endif
8296 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8297
8298 /* only change the default stacksize before the first parallel region */
8299 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel)) {
8300 size_t value = arg; /* argument is in bytes */
8301
8302 if (value < __kmp_sys_min_stksize)
8303 value = __kmp_sys_min_stksize;
8304 else if (value > KMP_MAX_STKSIZE(~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)
))
)
8305 value = KMP_MAX_STKSIZE(~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)
))
;
8306
8307 __kmp_stksize = value;
8308
8309 __kmp_env_stksize = TRUE(!0); /* was KMP_STACKSIZE specified? */
8310 }
8311
8312 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8313}
8314
8315/* set the behaviour of the runtime library */
8316/* TODO this can cause some odd behaviour with sibling parallelism... */
8317void __kmp_aux_set_library(enum library_type arg) {
8318 __kmp_library = arg;
8319
8320 switch (__kmp_library) {
8321 case library_serial: {
8322 KMP_INFORM(LibraryIsSerial)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_LibraryIsSerial
), __kmp_msg_null)
;
8323 } break;
8324 case library_turnaround:
8325 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
8326 __kmp_use_yield = 2; // only yield when oversubscribed
8327 break;
8328 case library_throughput:
8329 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME(2147483647))
8330 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME(__kmp_is_hybrid_cpu() ? (0) : (200));
8331 break;
8332 default:
8333 KMP_FATAL(UnknownLibraryType, arg)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_UnknownLibraryType,
arg), __kmp_msg_null)
;
8334 }
8335}
8336
8337/* Getting team information common for all team API */
8338// Returns NULL if not in teams construct
8339static kmp_team_t *__kmp_aux_get_team_info(int &teams_serialized) {
8340 kmp_info_t *thr = __kmp_entry_thread();
8341 teams_serialized = 0;
8342 if (thr->th.th_teams_microtask) {
8343 kmp_team_t *team = thr->th.th_team;
8344 int tlevel = thr->th.th_teams_level; // the level of the teams construct
8345 int ii = team->t.t_level;
8346 teams_serialized = team->t.t_serialized;
8347 int level = tlevel + 1;
8348 KMP_DEBUG_ASSERT(ii >= tlevel)if (!(ii >= tlevel)) { __kmp_debug_assert("ii >= tlevel"
, "openmp/runtime/src/kmp_runtime.cpp", 8348); }
;
8349 while (ii > level) {
8350 for (teams_serialized = team->t.t_serialized;
8351 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
8352 }
8353 if (team->t.t_serialized && (!teams_serialized)) {
8354 team = team->t.t_parent;
8355 continue;
8356 }
8357 if (ii > level) {
8358 team = team->t.t_parent;
8359 ii--;
8360 }
8361 }
8362 return team;
8363 }
8364 return NULL__null;
8365}
8366
8367int __kmp_aux_get_team_num() {
8368 int serialized;
8369 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8370 if (team) {
8371 if (serialized > 1) {
8372 return 0; // teams region is serialized ( 1 team of 1 thread ).
8373 } else {
8374 return team->t.t_master_tid;
8375 }
8376 }
8377 return 0;
8378}
8379
8380int __kmp_aux_get_num_teams() {
8381 int serialized;
8382 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8383 if (team) {
8384 if (serialized > 1) {
8385 return 1;
8386 } else {
8387 return team->t.t_parent->t.t_nproc;
8388 }
8389 }
8390 return 1;
8391}
8392
8393/* ------------------------------------------------------------------------ */
8394
8395/*
8396 * Affinity Format Parser
8397 *
8398 * Field is in form of: %[[[0].]size]type
8399 * % and type are required (%% means print a literal '%')
8400 * type is either single char or long name surrounded by {},
8401 * e.g., N or {num_threads}
8402 * 0 => leading zeros
8403 * . => right justified when size is specified
8404 * by default output is left justified
8405 * size is the *minimum* field length
8406 * All other characters are printed as is
8407 *
8408 * Available field types:
8409 * L {thread_level} - omp_get_level()
8410 * n {thread_num} - omp_get_thread_num()
8411 * h {host} - name of host machine
8412 * P {process_id} - process id (integer)
8413 * T {thread_identifier} - native thread identifier (integer)
8414 * N {num_threads} - omp_get_num_threads()
8415 * A {ancestor_tnum} - omp_get_ancestor_thread_num(omp_get_level()-1)
8416 * a {thread_affinity} - comma separated list of integers or integer ranges
8417 * (values of affinity mask)
8418 *
8419 * Implementation-specific field types can be added
8420 * If a type is unknown, print "undefined"
8421 */
8422
8423// Structure holding the short name, long name, and corresponding data type
8424// for snprintf. A table of these will represent the entire valid keyword
8425// field types.
8426typedef struct kmp_affinity_format_field_t {
8427 char short_name; // from spec e.g., L -> thread level
8428 const char *long_name; // from spec thread_level -> thread level
8429 char field_format; // data type for snprintf (typically 'd' or 's'
8430 // for integer or string)
8431} kmp_affinity_format_field_t;
8432
8433static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
8434#if KMP_AFFINITY_SUPPORTED1
8435 {'A', "thread_affinity", 's'},
8436#endif
8437 {'t', "team_num", 'd'},
8438 {'T', "num_teams", 'd'},
8439 {'L', "nesting_level", 'd'},
8440 {'n', "thread_num", 'd'},
8441 {'N', "num_threads", 'd'},
8442 {'a', "ancestor_tnum", 'd'},
8443 {'H', "host", 's'},
8444 {'P', "process_id", 'd'},
8445 {'i', "native_thread_id", 'd'}};
8446
8447// Return the number of characters it takes to hold field
8448static int __kmp_aux_capture_affinity_field(int gtid, const kmp_info_t *th,
8449 const char **ptr,
8450 kmp_str_buf_t *field_buffer) {
8451 int rc, format_index, field_value;
8452 const char *width_left, *width_right;
8453 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
8454 static const int FORMAT_SIZE = 20;
8455 char format[FORMAT_SIZE] = {0};
8456 char absolute_short_name = 0;
8457
8458 KMP_DEBUG_ASSERT(gtid >= 0)if (!(gtid >= 0)) { __kmp_debug_assert("gtid >= 0", "openmp/runtime/src/kmp_runtime.cpp"
, 8458); }
;
8459 KMP_DEBUG_ASSERT(th)if (!(th)) { __kmp_debug_assert("th", "openmp/runtime/src/kmp_runtime.cpp"
, 8459); }
;
8460 KMP_DEBUG_ASSERT(**ptr == '%')if (!(**ptr == '%')) { __kmp_debug_assert("**ptr == '%'", "openmp/runtime/src/kmp_runtime.cpp"
, 8460); }
;
8461 KMP_DEBUG_ASSERT(field_buffer)if (!(field_buffer)) { __kmp_debug_assert("field_buffer", "openmp/runtime/src/kmp_runtime.cpp"
, 8461); }
;
8462
8463 __kmp_str_buf_clear(field_buffer);
8464
8465 // Skip the initial %
8466 (*ptr)++;
8467
8468 // Check for %% first
8469 if (**ptr == '%') {
8470 __kmp_str_buf_cat(field_buffer, "%", 1);
8471 (*ptr)++; // skip over the second %
8472 return 1;
8473 }
8474
8475 // Parse field modifiers if they are present
8476 pad_zeros = false;
8477 if (**ptr == '0') {
8478 pad_zeros = true;
8479 (*ptr)++; // skip over 0
8480 }
8481 right_justify = false;
8482 if (**ptr == '.') {
8483 right_justify = true;
8484 (*ptr)++; // skip over .
8485 }
8486 // Parse width of field: [width_left, width_right)
8487 width_left = width_right = NULL__null;
8488 if (**ptr >= '0' && **ptr <= '9') {
8489 width_left = *ptr;
8490 SKIP_DIGITS(*ptr){ while (*(*ptr) >= '0' && *(*ptr) <= '9') (*ptr
)++; }
;
8491 width_right = *ptr;
8492 }
8493
8494 // Create the format for KMP_SNPRINTF based on flags parsed above
8495 format_index = 0;
8496 format[format_index++] = '%';
8497 if (!right_justify)
8498 format[format_index++] = '-';
8499 if (pad_zeros)
8500 format[format_index++] = '0';
8501 if (width_left && width_right) {
8502 int i = 0;
8503 // Only allow 8 digit number widths.
8504 // This also prevents overflowing format variable
8505 while (i < 8 && width_left < width_right) {
8506 format[format_index++] = *width_left;
8507 width_left++;
8508 i++;
8509 }
8510 }
8511
8512 // Parse a name (long or short)
8513 // Canonicalize the name into absolute_short_name
8514 found_valid_name = false;
8515 parse_long_name = (**ptr == '{');
8516 if (parse_long_name)
8517 (*ptr)++; // skip initial left brace
8518 for (size_t i = 0; i < sizeof(__kmp_affinity_format_table) /
8519 sizeof(__kmp_affinity_format_table[0]);
8520 ++i) {
8521 char short_name = __kmp_affinity_format_table[i].short_name;
8522 const char *long_name = __kmp_affinity_format_table[i].long_name;
8523 char field_format = __kmp_affinity_format_table[i].field_format;
8524 if (parse_long_name) {
8525 size_t length = KMP_STRLENstrlen(long_name);
8526 if (strncmp(*ptr, long_name, length) == 0) {
8527 found_valid_name = true;
8528 (*ptr) += length; // skip the long name
8529 }
8530 } else if (**ptr == short_name) {
8531 found_valid_name = true;
8532 (*ptr)++; // skip the short name
8533 }
8534 if (found_valid_name) {
8535 format[format_index++] = field_format;
8536 format[format_index++] = '\0';
8537 absolute_short_name = short_name;
8538 break;
8539 }
8540 }
8541 if (parse_long_name) {
8542 if (**ptr != '}') {
8543 absolute_short_name = 0;
8544 } else {
8545 (*ptr)++; // skip over the right brace
8546 }
8547 }
8548
8549 // Attempt to fill the buffer with the requested
8550 // value using snprintf within __kmp_str_buf_print()
8551 switch (absolute_short_name) {
8552 case 't':
8553 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
8554 break;
8555 case 'T':
8556 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
8557 break;
8558 case 'L':
8559 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8560 break;
8561 case 'n':
8562 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
8563 break;
8564 case 'H': {
8565 static const int BUFFER_SIZE = 256;
8566 char buf[BUFFER_SIZE];
8567 __kmp_expand_host_name(buf, BUFFER_SIZE);
8568 rc = __kmp_str_buf_print(field_buffer, format, buf);
8569 } break;
8570 case 'P':
8571 rc = __kmp_str_buf_print(field_buffer, format, getpid());
8572 break;
8573 case 'i':
8574 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid()syscall(186));
8575 break;
8576 case 'N':
8577 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8578 break;
8579 case 'a':
8580 field_value =
8581 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8582 rc = __kmp_str_buf_print(field_buffer, format, field_value);
8583 break;
8584#if KMP_AFFINITY_SUPPORTED1
8585 case 'A': {
8586 kmp_str_buf_t buf;
8587 __kmp_str_buf_init(&buf){ (&buf)->str = (&buf)->bulk; (&buf)->size
= sizeof((&buf)->bulk); (&buf)->used = 0; (&
buf)->bulk[0] = 0; }
;
8588 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8589 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8590 __kmp_str_buf_free(&buf);
8591 } break;
8592#endif
8593 default:
8594 // According to spec, If an implementation does not have info for field
8595 // type, then "undefined" is printed
8596 rc = __kmp_str_buf_print(field_buffer, "%s", "undefined");
8597 // Skip the field
8598 if (parse_long_name) {
8599 SKIP_TOKEN(*ptr){ while ((*(*ptr) >= '0' && *(*ptr) <= '9') || (
*(*ptr) >= 'a' && *(*ptr) <= 'z') || (*(*ptr) >=
'A' && *(*ptr) <= 'Z') || *(*ptr) == '_') (*ptr)++
; }
;
8600 if (**ptr == '}')
8601 (*ptr)++;
8602 } else {
8603 (*ptr)++;
8604 }
8605 }
8606
8607 KMP_ASSERT(format_index <= FORMAT_SIZE)if (!(format_index <= FORMAT_SIZE)) { __kmp_debug_assert("format_index <= FORMAT_SIZE"
, "openmp/runtime/src/kmp_runtime.cpp", 8607); }
;
8608 return rc;
8609}
8610
8611/*
8612 * Return number of characters needed to hold the affinity string
8613 * (not including null byte character)
8614 * The resultant string is printed to buffer, which the caller can then
8615 * handle afterwards
8616 */
8617size_t __kmp_aux_capture_affinity(int gtid, const char *format,
8618 kmp_str_buf_t *buffer) {
8619 const char *parse_ptr;
8620 size_t retval;
8621 const kmp_info_t *th;
8622 kmp_str_buf_t field;
8623
8624 KMP_DEBUG_ASSERT(buffer)if (!(buffer)) { __kmp_debug_assert("buffer", "openmp/runtime/src/kmp_runtime.cpp"
, 8624); }
;
8625 KMP_DEBUG_ASSERT(gtid >= 0)if (!(gtid >= 0)) { __kmp_debug_assert("gtid >= 0", "openmp/runtime/src/kmp_runtime.cpp"
, 8625); }
;
8626
8627 __kmp_str_buf_init(&field){ (&field)->str = (&field)->bulk; (&field)->
size = sizeof((&field)->bulk); (&field)->used =
0; (&field)->bulk[0] = 0; }
;
8628 __kmp_str_buf_clear(buffer);
8629
8630 th = __kmp_threads[gtid];
8631 retval = 0;
8632
8633 // If format is NULL or zero-length string, then we use
8634 // affinity-format-var ICV
8635 parse_ptr = format;
8636 if (parse_ptr == NULL__null || *parse_ptr == '\0') {
8637 parse_ptr = __kmp_affinity_format;
8638 }
8639 KMP_DEBUG_ASSERT(parse_ptr)if (!(parse_ptr)) { __kmp_debug_assert("parse_ptr", "openmp/runtime/src/kmp_runtime.cpp"
, 8639); }
;
8640
8641 while (*parse_ptr != '\0') {
8642 // Parse a field
8643 if (*parse_ptr == '%') {
8644 // Put field in the buffer
8645 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8646 __kmp_str_buf_catbuf(buffer, &field);
8647 retval += rc;
8648 } else {
8649 // Put literal character in buffer
8650 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8651 retval++;
8652 parse_ptr++;
8653 }
8654 }
8655 __kmp_str_buf_free(&field);
8656 return retval;
8657}
8658
8659// Displays the affinity string to stdout
8660void __kmp_aux_display_affinity(int gtid, const char *format) {
8661 kmp_str_buf_t buf;
8662 __kmp_str_buf_init(&buf){ (&buf)->str = (&buf)->bulk; (&buf)->size
= sizeof((&buf)->bulk); (&buf)->used = 0; (&
buf)->bulk[0] = 0; }
;
8663 __kmp_aux_capture_affinity(gtid, format, &buf);
8664 __kmp_fprintf(kmp_out, "%s" KMP_END_OF_LINE"\n", buf.str);
8665 __kmp_str_buf_free(&buf);
8666}
8667
8668/* ------------------------------------------------------------------------ */
8669
8670void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid) {
8671 int blocktime = arg; /* argument is in milliseconds */
8672#if KMP_USE_MONITOR
8673 int bt_intervals;
8674#endif
8675 kmp_int8 bt_set;
8676
8677 __kmp_save_internal_controls(thread);
8678
8679 /* Normalize and set blocktime for the teams */
8680 if (blocktime < KMP_MIN_BLOCKTIME(0))
8681 blocktime = KMP_MIN_BLOCKTIME(0);
8682 else if (blocktime > KMP_MAX_BLOCKTIME(2147483647))
8683 blocktime = KMP_MAX_BLOCKTIME(2147483647);
8684
8685 set__blocktime_team(thread->th.th_team, tid, blocktime)(((thread->th.th_team)->t.t_threads[(tid)]->th.th_current_task
->td_icvs.blocktime) = (blocktime))
;
8686 set__blocktime_team(thread->th.th_serial_team, 0, blocktime)(((thread->th.th_serial_team)->t.t_threads[(0)]->th.
th_current_task->td_icvs.blocktime) = (blocktime))
;
8687
8688#if KMP_USE_MONITOR
8689 /* Calculate and set blocktime intervals for the teams */
8690 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8691
8692 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8693 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8694#endif
8695
8696 /* Set whether blocktime has been set to "TRUE" */
8697 bt_set = TRUE(!0);
8698
8699 set__bt_set_team(thread->th.th_team, tid, bt_set)(((thread->th.th_team)->t.t_threads[(tid)]->th.th_current_task
->td_icvs.bt_set) = (bt_set))
;
8700 set__bt_set_team(thread->th.th_serial_team, 0, bt_set)(((thread->th.th_serial_team)->t.t_threads[(0)]->th.
th_current_task->td_icvs.bt_set) = (bt_set))
;
8701#if KMP_USE_MONITOR
8702 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
"bt_intervals=%d, monitor_updates=%d\n", __kmp_gtid_from_tid
(tid, thread->th.th_team), thread->th.th_team->t.t_id
, tid, blocktime, bt_intervals, __kmp_monitor_wakeups); }
8703 "bt_intervals=%d, monitor_updates=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
"bt_intervals=%d, monitor_updates=%d\n", __kmp_gtid_from_tid
(tid, thread->th.th_team), thread->th.th_team->t.t_id
, tid, blocktime, bt_intervals, __kmp_monitor_wakeups); }
8704 __kmp_gtid_from_tid(tid, thread->th.th_team),if (kmp_f_debug >= 10) { __kmp_debug_printf ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
"bt_intervals=%d, monitor_updates=%d\n", __kmp_gtid_from_tid
(tid, thread->th.th_team), thread->th.th_team->t.t_id
, tid, blocktime, bt_intervals, __kmp_monitor_wakeups); }
8705 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,if (kmp_f_debug >= 10) { __kmp_debug_printf ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
"bt_intervals=%d, monitor_updates=%d\n", __kmp_gtid_from_tid
(tid, thread->th.th_team), thread->th.th_team->t.t_id
, tid, blocktime, bt_intervals, __kmp_monitor_wakeups); }
8706 __kmp_monitor_wakeups))if (kmp_f_debug >= 10) { __kmp_debug_printf ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
"bt_intervals=%d, monitor_updates=%d\n", __kmp_gtid_from_tid
(tid, thread->th.th_team), thread->th.th_team->t.t_id
, tid, blocktime, bt_intervals, __kmp_monitor_wakeups); }
;
8707#else
8708 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n"
, __kmp_gtid_from_tid(tid, thread->th.th_team), thread->
th.th_team->t.t_id, tid, blocktime); }
8709 __kmp_gtid_from_tid(tid, thread->th.th_team),if (kmp_f_debug >= 10) { __kmp_debug_printf ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n"
, __kmp_gtid_from_tid(tid, thread->th.th_team), thread->
th.th_team->t.t_id, tid, blocktime); }
8710 thread->th.th_team->t.t_id, tid, blocktime))if (kmp_f_debug >= 10) { __kmp_debug_printf ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n"
, __kmp_gtid_from_tid(tid, thread->th.th_team), thread->
th.th_team->t.t_id, tid, blocktime); }
;
8711#endif
8712}
8713
8714void __kmp_aux_set_defaults(char const *str, size_t len) {
8715 if (!__kmp_init_serial) {
8716 __kmp_serial_initialize();
8717 }
8718 __kmp_env_initialize(str);
8719
8720 if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {
8721 __kmp_env_print();
8722 }
8723} // __kmp_aux_set_defaults
8724
8725/* ------------------------------------------------------------------------ */
8726/* internal fast reduction routines */
8727
8728PACKED_REDUCTION_METHOD_T
8729__kmp_determine_reduction_method(
8730 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
8731 void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
8732 kmp_critical_name *lck) {
8733
8734 // Default reduction method: critical construct ( lck != NULL, like in current
8735 // PAROPT )
8736 // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method
8737 // can be selected by RTL
8738 // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method
8739 // can be selected by RTL
8740 // Finally, it's up to OpenMP RTL to make a decision on which method to select
8741 // among generated by PAROPT.
8742
8743 PACKED_REDUCTION_METHOD_T retval;
8744
8745 int team_size;
8746
8747 KMP_DEBUG_ASSERT(loc)if (!(loc)) { __kmp_debug_assert("loc", "openmp/runtime/src/kmp_runtime.cpp"
, 8747); }
; // it would be nice to test ( loc != 0 )
8748 KMP_DEBUG_ASSERT(lck)if (!(lck)) { __kmp_debug_assert("lck", "openmp/runtime/src/kmp_runtime.cpp"
, 8748); }
; // it would be nice to test ( lck != 0 )
8749
8750#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8751 (loc && \
8752 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)))
8753#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8754
8755 retval = critical_reduce_block;
8756
8757 // another choice of getting a team size (with 1 dynamic deference) is slower
8758 team_size = __kmp_get_team_num_threads(global_tid)(__kmp_threads[(global_tid)]->th.th_team->t.t_nproc);
8759 if (team_size == 1) {
8760
8761 retval = empty_reduce_block;
8762
8763 } else {
8764
8765 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8766
8767#if KMP_ARCH_X86_641 || KMP_ARCH_PPC64(0 || 0) || KMP_ARCH_AARCH640 || \
8768 KMP_ARCH_MIPS640 || KMP_ARCH_RISCV640 || KMP_ARCH_LOONGARCH640
8769
8770#if KMP_OS_LINUX1 || KMP_OS_DRAGONFLY0 || KMP_OS_FREEBSD0 || KMP_OS_NETBSD0 || \
8771 KMP_OS_OPENBSD0 || KMP_OS_WINDOWS0 || KMP_OS_DARWIN0 || KMP_OS_HURD0
8772
8773 int teamsize_cutoff = 4;
8774
8775#if KMP_MIC_SUPPORTED((0 || 1) && (1 || 0))
8776 if (__kmp_mic_type != non_mic) {
8777 teamsize_cutoff = 8;
8778 }
8779#endif
8780 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8781 if (tree_available) {
8782 if (team_size <= teamsize_cutoff) {
8783 if (atomic_available) {
8784 retval = atomic_reduce_block;
8785 }
8786 } else {
8787 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER(((tree_reduce_block) | (bs_reduction_barrier)));
8788 }
8789 } else if (atomic_available) {
8790 retval = atomic_reduce_block;
8791 }
8792#else
8793#error "Unknown or unsupported OS"
8794#endif // KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||
8795 // KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
8796
8797#elif KMP_ARCH_X860 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS0
8798
8799#if KMP_OS_LINUX1 || KMP_OS_FREEBSD0 || KMP_OS_WINDOWS0 || KMP_OS_HURD0
8800
8801 // basic tuning
8802
8803 if (atomic_available) {
8804 if (num_vars <= 2) { // && ( team_size <= 8 ) due to false-sharing ???
8805 retval = atomic_reduce_block;
8806 }
8807 } // otherwise: use critical section
8808
8809#elif KMP_OS_DARWIN0
8810
8811 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8812 if (atomic_available && (num_vars <= 3)) {
8813 retval = atomic_reduce_block;
8814 } else if (tree_available) {
8815 if ((reduce_size > (9 * sizeof(kmp_real64))) &&
8816 (reduce_size < (2000 * sizeof(kmp_real64)))) {
8817 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER(((tree_reduce_block) | (bs_plain_barrier)));
8818 }
8819 } // otherwise: use critical section
8820
8821#else
8822#error "Unknown or unsupported OS"
8823#endif
8824
8825#else
8826#error "Unknown or unsupported architecture"
8827#endif
8828 }
8829
8830 // KMP_FORCE_REDUCTION
8831
8832 // If the team is serialized (team_size == 1), ignore the forced reduction
8833 // method and stay with the unsynchronized method (empty_reduce_block)
8834 if (__kmp_force_reduction_method != reduction_method_not_defined &&
8835 team_size != 1) {
8836
8837 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
8838
8839 int atomic_available, tree_available;
8840
8841 switch ((forced_retval = __kmp_force_reduction_method)) {
8842 case critical_reduce_block:
8843 KMP_ASSERT(lck)if (!(lck)) { __kmp_debug_assert("lck", "openmp/runtime/src/kmp_runtime.cpp"
, 8843); }
; // lck should be != 0
8844 break;
8845
8846 case atomic_reduce_block:
8847 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8848 if (!atomic_available) {
8849 KMP_WARNING(RedMethodNotSupported, "atomic")__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_RedMethodNotSupported
, "atomic"), __kmp_msg_null)
;
8850 forced_retval = critical_reduce_block;
8851 }
8852 break;
8853
8854 case tree_reduce_block:
8855 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8856 if (!tree_available) {
8857 KMP_WARNING(RedMethodNotSupported, "tree")__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_RedMethodNotSupported
, "tree"), __kmp_msg_null)
;
8858 forced_retval = critical_reduce_block;
8859 } else {
8860#if KMP_FAST_REDUCTION_BARRIER1
8861 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER(((tree_reduce_block) | (bs_reduction_barrier)));
8862#endif
8863 }
8864 break;
8865
8866 default:
8867 KMP_ASSERT(0)if (!(0)) { __kmp_debug_assert("0", "openmp/runtime/src/kmp_runtime.cpp"
, 8867); }
; // "unsupported method specified"
8868 }
8869
8870 retval = forced_retval;
8871 }
8872
8873 KA_TRACE(10, ("reduction method selected=%08x\n", retval))if (kmp_a_debug >= 10) { __kmp_debug_printf ("reduction method selected=%08x\n"
, retval); }
;
8874
8875#undef FAST_REDUCTION_TREE_METHOD_GENERATED
8876#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
8877
8878 return (retval);
8879}
8880// this function is for testing set/get/determine reduce method
8881kmp_int32 __kmp_get_reduce_method(void) {
8882 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
8883}
8884
8885// Soft pause sets up threads to ignore blocktime and just go to sleep.
8886// Spin-wait code checks __kmp_pause_status and reacts accordingly.
8887void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
8888
8889// Hard pause shuts down the runtime completely. Resume happens naturally when
8890// OpenMP is used subsequently.
8891void __kmp_hard_pause() {
8892 __kmp_pause_status = kmp_hard_paused;
8893 __kmp_internal_end_thread(-1);
8894}
8895
8896// Soft resume sets __kmp_pause_status, and wakes up all threads.
8897void __kmp_resume_if_soft_paused() {
8898 if (__kmp_pause_status == kmp_soft_paused) {
8899 __kmp_pause_status = kmp_not_paused;
8900
8901 for (int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
8902 kmp_info_t *thread = __kmp_threads[gtid];
8903 if (thread) { // Wake it if sleeping
8904 kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
8905 thread);
8906 if (fl.is_sleeping())
8907 fl.resume(gtid);
8908 else if (__kmp_try_suspend_mx(thread)) { // got suspend lock
8909 __kmp_unlock_suspend_mx(thread); // unlock it; it won't sleep
8910 } else { // thread holds the lock and may sleep soon
8911 do { // until either the thread sleeps, or we can get the lock
8912 if (fl.is_sleeping()) {
8913 fl.resume(gtid);
8914 break;
8915 } else if (__kmp_try_suspend_mx(thread)) {
8916 __kmp_unlock_suspend_mx(thread);
8917 break;
8918 }
8919 } while (1);
8920 }
8921 }
8922 }
8923 }
8924}
8925
8926// This function is called via __kmpc_pause_resource. Returns 0 if successful.
8927// TODO: add warning messages
8928int __kmp_pause_resource(kmp_pause_status_t level) {
8929 if (level == kmp_not_paused) { // requesting resume
8930 if (__kmp_pause_status == kmp_not_paused) {
8931 // error message about runtime not being paused, so can't resume
8932 return 1;
8933 } else {
8934 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||if (!(__kmp_pause_status == kmp_soft_paused || __kmp_pause_status
== kmp_hard_paused)) { __kmp_debug_assert("__kmp_pause_status == kmp_soft_paused || __kmp_pause_status == kmp_hard_paused"
, "openmp/runtime/src/kmp_runtime.cpp", 8935); }
8935 __kmp_pause_status == kmp_hard_paused)if (!(__kmp_pause_status == kmp_soft_paused || __kmp_pause_status
== kmp_hard_paused)) { __kmp_debug_assert("__kmp_pause_status == kmp_soft_paused || __kmp_pause_status == kmp_hard_paused"
, "openmp/runtime/src/kmp_runtime.cpp", 8935); }
;
8936 __kmp_pause_status = kmp_not_paused;
8937 return 0;
8938 }
8939 } else if (level == kmp_soft_paused) { // requesting soft pause
8940 if (__kmp_pause_status != kmp_not_paused) {
8941 // error message about already being paused
8942 return 1;
8943 } else {
8944 __kmp_soft_pause();
8945 return 0;
8946 }
8947 } else if (level == kmp_hard_paused) { // requesting hard pause
8948 if (__kmp_pause_status != kmp_not_paused) {
8949 // error message about already being paused
8950 return 1;
8951 } else {
8952 __kmp_hard_pause();
8953 return 0;
8954 }
8955 } else {
8956 // error message about invalid level
8957 return 1;
8958 }
8959}
8960
8961void __kmp_omp_display_env(int verbose) {
8962 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8963 if (__kmp_init_serial == 0)
8964 __kmp_do_serial_initialize();
8965 __kmp_display_env_impl(!verbose, verbose);
8966 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8967}
8968
8969// The team size is changing, so distributed barrier must be modified
8970void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads,
8971 int new_nthreads) {
8972 KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==if (!(__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar
)) { __kmp_debug_assert("__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar"
, "openmp/runtime/src/kmp_runtime.cpp", 8973); }
8973 bp_dist_bar)if (!(__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar
)) { __kmp_debug_assert("__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar"
, "openmp/runtime/src/kmp_runtime.cpp", 8973); }
;
8974 kmp_info_t **other_threads = team->t.t_threads;
8975
8976 // We want all the workers to stop waiting on the barrier while we adjust the
8977 // size of the team.
8978 for (int f = 1; f < old_nthreads; ++f) {
8979 KMP_DEBUG_ASSERT(other_threads[f] != NULL)if (!(other_threads[f] != __null)) { __kmp_debug_assert("other_threads[f] != __null"
, "openmp/runtime/src/kmp_runtime.cpp", 8979); }
;
8980 // Ignore threads that are already inactive or not present in the team
8981 if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
8982 // teams construct causes thread_limit to get passed in, and some of
8983 // those could be inactive; just ignore them
8984 continue;
8985 }
8986 // If thread is transitioning still to in_use state, wait for it
8987 if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
8988 while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
8989 KMP_CPU_PAUSE()__kmp_x86_pause();
8990 }
8991 // The thread should be in_use now
8992 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1)if (!(team->t.t_threads[f]->th.th_used_in_team.load() ==
1)) { __kmp_debug_assert("team->t.t_threads[f]->th.th_used_in_team.load() == 1"
, "openmp/runtime/src/kmp_runtime.cpp", 8992); }
;
8993 // Transition to unused state
8994 team->t.t_threads[f]->th.th_used_in_team.store(2);
8995 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2)if (!(team->t.t_threads[f]->th.th_used_in_team.load() ==
2)) { __kmp_debug_assert("team->t.t_threads[f]->th.th_used_in_team.load() == 2"
, "openmp/runtime/src/kmp_runtime.cpp", 8995); }
;
8996 }
8997 // Release all the workers
8998 team->t.b->go_release();
8999
9000 KMP_MFENCE()if (__builtin_expect(!!(!__kmp_cpuinfo.initialized), 0)) { __kmp_query_cpuid
(&__kmp_cpuinfo); } if (__kmp_cpuinfo.flags.sse2) { __sync_synchronize
(); }
;
9001
9002 // Workers should see transition status 2 and move to 0; but may need to be
9003 // woken up first
9004 int count = old_nthreads - 1;
9005 while (count > 0) {
9006 count = old_nthreads - 1;
9007 for (int f = 1; f < old_nthreads; ++f) {
9008 if (other_threads[f]->th.th_used_in_team.load() != 0) {
9009 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) { // Wake up the workers
9010 kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(const_cast<void *>(other_threads[f]->th.th_sleep_loc
)
9011 void *, other_threads[f]->th.th_sleep_loc)const_cast<void *>(other_threads[f]->th.th_sleep_loc
)
;
9012 __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
9013 }
9014 } else {
9015 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0)if (!(team->t.t_threads[f]->th.th_used_in_team.load() ==
0)) { __kmp_debug_assert("team->t.t_threads[f]->th.th_used_in_team.load() == 0"
, "openmp/runtime/src/kmp_runtime.cpp", 9015); }
;
9016 count--;
9017 }
9018 }
9019 }
9020 // Now update the barrier size
9021 team->t.b->update_num_threads(new_nthreads);
9022 team->t.b->go_reset();
9023}
9024
9025void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads) {
9026 // Add the threads back to the team
9027 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 9027); }
;
9028 // Threads were paused and pointed at th_used_in_team temporarily during a
9029 // resize of the team. We're going to set th_used_in_team to 3 to indicate to
9030 // the thread that it should transition itself back into the team. Then, if
9031 // blocktime isn't infinite, the thread could be sleeping, so we send a resume
9032 // to wake it up.
9033 for (int f = 1; f < new_nthreads; ++f) {
9034 KMP_DEBUG_ASSERT(team->t.t_threads[f])if (!(team->t.t_threads[f])) { __kmp_debug_assert("team->t.t_threads[f]"
, "openmp/runtime/src/kmp_runtime.cpp", 9034); }
;
9035 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0,__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(team
->t.t_threads[f]->th.th_used_in_team)), (kmp_uint32)(0)
, (kmp_uint32)(3))
9036 3)__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(team
->t.t_threads[f]->th.th_used_in_team)), (kmp_uint32)(0)
, (kmp_uint32)(3))
;
9037 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) { // Wake up sleeping threads
9038 __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
9039 (kmp_flag_32<false, false> *)NULL__null);
9040 }
9041 }
9042 // The threads should be transitioning to the team; when they are done, they
9043 // should have set th_used_in_team to 1. This loop forces master to wait until
9044 // all threads have moved into the team and are waiting in the barrier.
9045 int count = new_nthreads - 1;
9046 while (count > 0) {
9047 count = new_nthreads - 1;
9048 for (int f = 1; f < new_nthreads; ++f) {
9049 if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
9050 count--;
9051 }
9052 }
9053 }
9054}
9055
9056// Globals and functions for hidden helper task
9057kmp_info_t **__kmp_hidden_helper_threads;
9058kmp_info_t *__kmp_hidden_helper_main_thread;
9059std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
9060#if KMP_OS_LINUX1
9061kmp_int32 __kmp_hidden_helper_threads_num = 8;
9062kmp_int32 __kmp_enable_hidden_helper = TRUE(!0);
9063#else
9064kmp_int32 __kmp_hidden_helper_threads_num = 0;
9065kmp_int32 __kmp_enable_hidden_helper = FALSE0;
9066#endif
9067
9068namespace {
9069std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;
9070
9071void __kmp_hidden_helper_wrapper_fn(int *gtid, int *, ...) {
9072 // This is an explicit synchronization on all hidden helper threads in case
9073 // that when a regular thread pushes a hidden helper task to one hidden
9074 // helper thread, the thread has not been awaken once since they're released
9075 // by the main thread after creating the team.
9076 KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num)(&__kmp_hit_hidden_helper_threads_num)->fetch_add(1, std
::memory_order_acq_rel)
;
9077 while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num)(&__kmp_hit_hidden_helper_threads_num)->load(std::memory_order_acquire
)
!=
9078 __kmp_hidden_helper_threads_num)
9079 ;
9080
9081 // If main thread, then wait for signal
9082 if (__kmpc_master(nullptr, *gtid)) {
9083 // First, unset the initial state and release the initial thread
9084 TCW_4(__kmp_init_hidden_helper_threads, FALSE)(__kmp_init_hidden_helper_threads) = (0);
9085 __kmp_hidden_helper_initz_release();
9086 __kmp_hidden_helper_main_thread_wait();
9087 // Now wake up all worker threads
9088 for (int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) {
9089 __kmp_hidden_helper_worker_thread_signal();
9090 }
9091 }
9092}
9093} // namespace
9094
9095void __kmp_hidden_helper_threads_initz_routine() {
9096 // Create a new root for hidden helper team/threads
9097 const int gtid = __kmp_register_root(TRUE(!0));
9098 __kmp_hidden_helper_main_thread = __kmp_threads[gtid];
9099 __kmp_hidden_helper_threads = &__kmp_threads[gtid];
9100 __kmp_hidden_helper_main_thread->th.th_set_nproc =
9101 __kmp_hidden_helper_threads_num;
9102
9103 KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0)(&__kmp_hit_hidden_helper_threads_num)->store(0, std::
memory_order_release)
;
9104
9105 __kmpc_fork_call(nullptr, 0, __kmp_hidden_helper_wrapper_fn);
9106
9107 // Set the initialization flag to FALSE
9108 TCW_SYNC_4(__kmp_init_hidden_helper, FALSE)(__kmp_init_hidden_helper) = (0);
9109
9110 __kmp_hidden_helper_threads_deinitz_release();
9111}
9112
9113/* Nesting Mode:
9114 Set via KMP_NESTING_MODE, which takes an integer.
9115 Note: we skip duplicate topology levels, and skip levels with only
9116 one entity.
9117 KMP_NESTING_MODE=0 is the default, and doesn't use nesting mode.
9118 KMP_NESTING_MODE=1 sets as many nesting levels as there are distinct levels
9119 in the topology, and initializes the number of threads at each of those
9120 levels to the number of entities at each level, respectively, below the
9121 entity at the parent level.
9122 KMP_NESTING_MODE=N, where N>1, attempts to create up to N nesting levels,
9123 but starts with nesting OFF -- max-active-levels-var is 1 -- and requires
9124 the user to turn nesting on explicitly. This is an even more experimental
9125 option to this experimental feature, and may change or go away in the
9126 future.
9127*/
9128
9129// Allocate space to store nesting levels
9130void __kmp_init_nesting_mode() {
9131 int levels = KMP_HW_LAST;
9132 __kmp_nesting_mode_nlevels = levels;
9133 __kmp_nesting_nth_level = (int *)KMP_INTERNAL_MALLOC(levels * sizeof(int))malloc(levels * sizeof(int));
9134 for (int i = 0; i < levels; ++i)
9135 __kmp_nesting_nth_level[i] = 0;
9136 if (__kmp_nested_nth.size < levels) {
9137 __kmp_nested_nth.nth =
9138 (int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels * sizeof(int))realloc((__kmp_nested_nth.nth), (levels * sizeof(int)));
9139 __kmp_nested_nth.size = levels;
9140 }
9141}
9142
9143// Set # threads for top levels of nesting; must be called after topology set
9144void __kmp_set_nesting_mode_threads() {
9145 kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()__kmp_get_global_thread_id_reg()];
9146
9147 if (__kmp_nesting_mode == 1)
9148 __kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT2147483647;
9149 else if (__kmp_nesting_mode > 1)
9150 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9151
9152 if (__kmp_topology) { // use topology info
9153 int loc, hw_level;
9154 for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() &&
9155 loc < __kmp_nesting_mode_nlevels;
9156 loc++, hw_level++) {
9157 __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level);
9158 if (__kmp_nesting_nth_level[loc] == 1)
9159 loc--;
9160 }
9161 // Make sure all cores are used
9162 if (__kmp_nesting_mode > 1 && loc > 1) {
9163 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
9164 int num_cores = __kmp_topology->get_count(core_level);
9165 int upper_levels = 1;
9166 for (int level = 0; level < loc - 1; ++level)
9167 upper_levels *= __kmp_nesting_nth_level[level];
9168 if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores)
9169 __kmp_nesting_nth_level[loc - 1] =
9170 num_cores / __kmp_nesting_nth_level[loc - 2];
9171 }
9172 __kmp_nesting_mode_nlevels = loc;
9173 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9174 } else { // no topology info available; provide a reasonable guesstimation
9175 if (__kmp_avail_proc >= 4) {
9176 __kmp_nesting_nth_level[0] = __kmp_avail_proc / 2;
9177 __kmp_nesting_nth_level[1] = 2;
9178 __kmp_nesting_mode_nlevels = 2;
9179 } else {
9180 __kmp_nesting_nth_level[0] = __kmp_avail_proc;
9181 __kmp_nesting_mode_nlevels = 1;
9182 }
9183 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9184 }
9185 for (int i = 0; i < __kmp_nesting_mode_nlevels; ++i) {
9186 __kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i];
9187 }
9188 set__nproc(thread, __kmp_nesting_nth_level[0])(((thread)->th.th_current_task->td_icvs.nproc) = (__kmp_nesting_nth_level
[0]))
;
9189 if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode)
9190 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9191 if (get__max_active_levels(thread)((thread)->th.th_current_task->td_icvs.max_active_levels
)
> 1) {
9192 // if max levels was set, set nesting mode levels to same
9193 __kmp_nesting_mode_nlevels = get__max_active_levels(thread)((thread)->th.th_current_task->td_icvs.max_active_levels
)
;
9194 }
9195 if (__kmp_nesting_mode == 1) // turn on nesting for this case only
9196 set__max_active_levels(thread, __kmp_nesting_mode_nlevels)(((thread)->th.th_current_task->td_icvs.max_active_levels
) = (__kmp_nesting_mode_nlevels))
;
9197}
9198
9199// Empty symbols to export (see exports_so.txt) when feature is disabled
9200extern "C" {
9201#if !KMP_STATS_ENABLED0
9202void __kmp_reset_stats() {}
9203#endif
9204#if !USE_DEBUGGER0
9205int __kmp_omp_debug_struct_info = FALSE0;
9206int __kmp_debugging = FALSE0;
9207#endif
9208#if !USE_ITT_BUILD1 || !USE_ITT_NOTIFY1
9209void __kmp_itt_fini_ittlib() {}
9210void __kmp_itt_init_ittlib() {}
9211#endif
9212}
9213
9214// end of file

/build/source/openmp/runtime/src/kmp.h

1/*! \file */
2/*
3 * kmp.h -- KPTS runtime header file.
4 */
5
6//===----------------------------------------------------------------------===//
7//
8// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9// See https://llvm.org/LICENSE.txt for license information.
10// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef KMP_H
15#define KMP_H
16
17#include "kmp_config.h"
18
19/* #define BUILD_PARALLEL_ORDERED 1 */
20
21/* This fix replaces gettimeofday with clock_gettime for better scalability on
22 the Altix. Requires user code to be linked with -lrt. */
23//#define FIX_SGI_CLOCK
24
25/* Defines for OpenMP 3.0 tasking and auto scheduling */
26
27#ifndef KMP_STATIC_STEAL_ENABLED1
28#define KMP_STATIC_STEAL_ENABLED1 1
29#endif
30
31#define TASK_CURRENT_NOT_QUEUED0 0
32#define TASK_CURRENT_QUEUED1 1
33
34#ifdef BUILD_TIED_TASK_STACK
35#define TASK_STACK_EMPTY 0 // entries when the stack is empty
36#define TASK_STACK_BLOCK_BITS 5 // Used in TASK_STACK_SIZE and TASK_STACK_MASK
37// Number of entries in each task stack array
38#define TASK_STACK_BLOCK_SIZE (1 << TASK_STACK_BLOCK_BITS)
39// Mask for determining index into stack block
40#define TASK_STACK_INDEX_MASK (TASK_STACK_BLOCK_SIZE - 1)
41#endif // BUILD_TIED_TASK_STACK
42
43#define TASK_NOT_PUSHED1 1
44#define TASK_SUCCESSFULLY_PUSHED0 0
45#define TASK_TIED1 1
46#define TASK_UNTIED0 0
47#define TASK_EXPLICIT1 1
48#define TASK_IMPLICIT0 0
49#define TASK_PROXY1 1
50#define TASK_FULL0 0
51#define TASK_DETACHABLE1 1
52#define TASK_UNDETACHABLE0 0
53
54#define KMP_CANCEL_THREADS
55#define KMP_THREAD_ATTR
56
57// Android does not have pthread_cancel. Undefine KMP_CANCEL_THREADS if being
58// built on Android
59#if defined(__ANDROID__)
60#undef KMP_CANCEL_THREADS
61#endif
62
63#include <signal.h>
64#include <stdarg.h>
65#include <stddef.h>
66#include <stdio.h>
67#include <stdlib.h>
68#include <string.h>
69#include <limits>
70#include <type_traits>
71/* include <ctype.h> don't use; problems with /MD on Windows* OS NT due to bad
72 Microsoft library. Some macros provided below to replace these functions */
73#ifndef __ABSOFT_WIN
74#include <sys/types.h>
75#endif
76#include <limits.h>
77#include <time.h>
78
79#include <errno(*__errno_location ()).h>
80
81#include "kmp_os.h"
82
83#include "kmp_safe_c_api.h"
84
85#if KMP_STATS_ENABLED0
86class kmp_stats_list;
87#endif
88
89#if KMP_USE_HIER_SCHED0
90// Only include hierarchical scheduling if affinity is supported
91#undef KMP_USE_HIER_SCHED0
92#define KMP_USE_HIER_SCHED0 KMP_AFFINITY_SUPPORTED1
93#endif
94
95#if KMP_USE_HWLOC0 && KMP_AFFINITY_SUPPORTED1
96#include "hwloc.h"
97#ifndef HWLOC_OBJ_NUMANODE
98#define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE
99#endif
100#ifndef HWLOC_OBJ_PACKAGE
101#define HWLOC_OBJ_PACKAGE HWLOC_OBJ_SOCKET
102#endif
103#endif
104
105#if KMP_ARCH_X860 || KMP_ARCH_X86_641
106#include <xmmintrin.h>
107#endif
108
109// The below has to be defined before including "kmp_barrier.h".
110#define KMP_INTERNAL_MALLOC(sz)malloc(sz) malloc(sz)
111#define KMP_INTERNAL_FREE(p)free(p) free(p)
112#define KMP_INTERNAL_REALLOC(p, sz)realloc((p), (sz)) realloc((p), (sz))
113#define KMP_INTERNAL_CALLOC(n, sz)calloc((n), (sz)) calloc((n), (sz))
114
115#include "kmp_debug.h"
116#include "kmp_lock.h"
117#include "kmp_version.h"
118#include "kmp_barrier.h"
119#if USE_DEBUGGER0
120#include "kmp_debugger.h"
121#endif
122#include "kmp_i18n.h"
123
124#define KMP_HANDLE_SIGNALS(1 || 0) (KMP_OS_UNIX1 || KMP_OS_WINDOWS0)
125
126#include "kmp_wrapper_malloc.h"
127#if KMP_OS_UNIX1
128#include <unistd.h>
129#if !defined NSIG(64 + 1) && defined _NSIG(64 + 1)
130#define NSIG(64 + 1) _NSIG(64 + 1)
131#endif
132#endif
133
134#if KMP_OS_LINUX1
135#pragma weak clock_gettime
136#endif
137
138#if OMPT_SUPPORT1
139#include "ompt-internal.h"
140#endif
141
142#if OMPD_SUPPORT1
143#include "ompd-specific.h"
144#endif
145
146#ifndef UNLIKELY
147#define UNLIKELY(x)__builtin_expect(!!(x), 0) (x)
148#endif
149
150// Affinity format function
151#include "kmp_str.h"
152
153// 0 - no fast memory allocation, alignment: 8-byte on x86, 16-byte on x64.
154// 3 - fast allocation using sync, non-sync free lists of any size, non-self
155// free lists of limited size.
156#ifndef USE_FAST_MEMORY3
157#define USE_FAST_MEMORY3 3
158#endif
159
160#ifndef KMP_NESTED_HOT_TEAMS1
161#define KMP_NESTED_HOT_TEAMS1 0
162#define USE_NESTED_HOT_ARG(x), x
163#else
164#if KMP_NESTED_HOT_TEAMS1
165#define USE_NESTED_HOT_ARG(x), x , x
166#else
167#define USE_NESTED_HOT_ARG(x), x
168#endif
169#endif
170
171// Assume using BGET compare_exchange instruction instead of lock by default.
172#ifndef USE_CMP_XCHG_FOR_BGET1
173#define USE_CMP_XCHG_FOR_BGET1 1
174#endif
175
176// Test to see if queuing lock is better than bootstrap lock for bget
177// #ifndef USE_QUEUING_LOCK_FOR_BGET
178// #define USE_QUEUING_LOCK_FOR_BGET
179// #endif
180
181#define KMP_NSEC_PER_SEC1000000000L 1000000000L
182#define KMP_USEC_PER_SEC1000000L 1000000L
183
184/*!
185@ingroup BASIC_TYPES
186@{
187*/
188
189/*!
190Values for bit flags used in the ident_t to describe the fields.
191*/
192enum {
193 /*! Use trampoline for internal microtasks */
194 KMP_IDENT_IMB = 0x01,
195 /*! Use c-style ident structure */
196 KMP_IDENT_KMPC = 0x02,
197 /* 0x04 is no longer used */
198 /*! Entry point generated by auto-parallelization */
199 KMP_IDENT_AUTOPAR = 0x08,
200 /*! Compiler generates atomic reduction option for kmpc_reduce* */
201 KMP_IDENT_ATOMIC_REDUCE = 0x10,
202 /*! To mark a 'barrier' directive in user code */
203 KMP_IDENT_BARRIER_EXPL = 0x20,
204 /*! To Mark implicit barriers. */
205 KMP_IDENT_BARRIER_IMPL = 0x0040,
206 KMP_IDENT_BARRIER_IMPL_MASK = 0x01C0,
207 KMP_IDENT_BARRIER_IMPL_FOR = 0x0040,
208 KMP_IDENT_BARRIER_IMPL_SECTIONS = 0x00C0,
209
210 KMP_IDENT_BARRIER_IMPL_SINGLE = 0x0140,
211 KMP_IDENT_BARRIER_IMPL_WORKSHARE = 0x01C0,
212
213 /*! To mark a static loop in OMPT callbacks */
214 KMP_IDENT_WORK_LOOP = 0x200,
215 /*! To mark a sections directive in OMPT callbacks */
216 KMP_IDENT_WORK_SECTIONS = 0x400,
217 /*! To mark a distribute construct in OMPT callbacks */
218 KMP_IDENT_WORK_DISTRIBUTE = 0x800,
219 /*! Atomic hint; bottom four bits as omp_sync_hint_t. Top four reserved and
220 not currently used. If one day we need more bits, then we can use
221 an invalid combination of hints to mean that another, larger field
222 should be used in a different flag. */
223 KMP_IDENT_ATOMIC_HINT_MASK = 0xFF0000,
224 KMP_IDENT_ATOMIC_HINT_UNCONTENDED = 0x010000,
225 KMP_IDENT_ATOMIC_HINT_CONTENDED = 0x020000,
226 KMP_IDENT_ATOMIC_HINT_NONSPECULATIVE = 0x040000,
227 KMP_IDENT_ATOMIC_HINT_SPECULATIVE = 0x080000,
228 KMP_IDENT_OPENMP_SPEC_VERSION_MASK = 0xFF000000
229};
230
231/*!
232 * The ident structure that describes a source location.
233 */
234typedef struct ident {
235 kmp_int32 reserved_1; /**< might be used in Fortran; see above */
236 kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC
237 identifies this union member */
238 kmp_int32 reserved_2; /**< not really used in Fortran any more; see above */
239#if USE_ITT_BUILD1
240/* but currently used for storing region-specific ITT */
241/* contextual information. */
242#endif /* USE_ITT_BUILD */
243 kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for C++ */
244 char const *psource; /**< String describing the source location.
245 The string is composed of semi-colon separated fields
246 which describe the source file, the function and a pair
247 of line numbers that delimit the construct. */
248 // Returns the OpenMP version in form major*10+minor (e.g., 50 for 5.0)
249 kmp_int32 get_openmp_version() {
250 return (((flags & KMP_IDENT_OPENMP_SPEC_VERSION_MASK) >> 24) & 0xFF);
251 }
252} ident_t;
253/*!
254@}
255*/
256
257// Some forward declarations.
258typedef union kmp_team kmp_team_t;
259typedef struct kmp_taskdata kmp_taskdata_t;
260typedef union kmp_task_team kmp_task_team_t;
261typedef union kmp_team kmp_team_p;
262typedef union kmp_info kmp_info_p;
263typedef union kmp_root kmp_root_p;
264
265template <bool C = false, bool S = true> class kmp_flag_32;
266template <bool C = false, bool S = true> class kmp_flag_64;
267template <bool C = false, bool S = true> class kmp_atomic_flag_64;
268class kmp_flag_oncore;
269
270#ifdef __cplusplus201703L
271extern "C" {
272#endif
273
274/* ------------------------------------------------------------------------ */
275
276/* Pack two 32-bit signed integers into a 64-bit signed integer */
277/* ToDo: Fix word ordering for big-endian machines. */
278#define KMP_PACK_64(HIGH_32, LOW_32)((kmp_int64)((((kmp_uint64)(HIGH_32)) << 32) | (kmp_uint64
)(LOW_32)))
\
279 ((kmp_int64)((((kmp_uint64)(HIGH_32)) << 32) | (kmp_uint64)(LOW_32)))
280
281// Generic string manipulation macros. Assume that _x is of type char *
282#define SKIP_WS(_x){ while (*(_x) == ' ' || *(_x) == '\t') (_x)++; } \
283 { \
284 while (*(_x) == ' ' || *(_x) == '\t') \
285 (_x)++; \
286 }
287#define SKIP_DIGITS(_x){ while (*(_x) >= '0' && *(_x) <= '9') (_x)++; } \
288 { \
289 while (*(_x) >= '0' && *(_x) <= '9') \
290 (_x)++; \
291 }
292#define SKIP_TOKEN(_x){ while ((*(_x) >= '0' && *(_x) <= '9') || (*(_x
) >= 'a' && *(_x) <= 'z') || (*(_x) >= 'A' &&
*(_x) <= 'Z') || *(_x) == '_') (_x)++; }
\
293 { \
294 while ((*(_x) >= '0' && *(_x) <= '9') || (*(_x) >= 'a' && *(_x) <= 'z') || \
295 (*(_x) >= 'A' && *(_x) <= 'Z') || *(_x) == '_') \
296 (_x)++; \
297 }
298#define SKIP_TO(_x, _c){ while (*(_x) != '\0' && *(_x) != (_c)) (_x)++; } \
299 { \
300 while (*(_x) != '\0' && *(_x) != (_c)) \
301 (_x)++; \
302 }
303
304/* ------------------------------------------------------------------------ */
305
306#define KMP_MAX(x, y)((x) > (y) ? (x) : (y)) ((x) > (y) ? (x) : (y))
307#define KMP_MIN(x, y)((x) < (y) ? (x) : (y)) ((x) < (y) ? (x) : (y))
308
309/* ------------------------------------------------------------------------ */
310/* Enumeration types */
311
312enum kmp_state_timer {
313 ts_stop,
314 ts_start,
315 ts_pause,
316
317 ts_last_state
318};
319
320enum dynamic_mode {
321 dynamic_default,
322#ifdef USE_LOAD_BALANCE1
323 dynamic_load_balance,
324#endif /* USE_LOAD_BALANCE */
325 dynamic_random,
326 dynamic_thread_limit,
327 dynamic_max
328};
329
330/* external schedule constants, duplicate enum omp_sched in omp.h in order to
331 * not include it here */
332#ifndef KMP_SCHED_TYPE_DEFINED
333#define KMP_SCHED_TYPE_DEFINED
334typedef enum kmp_sched {
335 kmp_sched_lower = 0, // lower and upper bounds are for routine parameter check
336 // Note: need to adjust __kmp_sch_map global array in case enum is changed
337 kmp_sched_static = 1, // mapped to kmp_sch_static_chunked (33)
338 kmp_sched_dynamic = 2, // mapped to kmp_sch_dynamic_chunked (35)
339 kmp_sched_guided = 3, // mapped to kmp_sch_guided_chunked (36)
340 kmp_sched_auto = 4, // mapped to kmp_sch_auto (38)
341 kmp_sched_upper_std = 5, // upper bound for standard schedules
342 kmp_sched_lower_ext = 100, // lower bound of Intel extension schedules
343 kmp_sched_trapezoidal = 101, // mapped to kmp_sch_trapezoidal (39)
344#if KMP_STATIC_STEAL_ENABLED1
345 kmp_sched_static_steal = 102, // mapped to kmp_sch_static_steal (44)
346#endif
347 kmp_sched_upper,
348 kmp_sched_default = kmp_sched_static, // default scheduling
349 kmp_sched_monotonic = 0x80000000
350} kmp_sched_t;
351#endif
352
353/*!
354 @ingroup WORK_SHARING
355 * Describes the loop schedule to be used for a parallel for loop.
356 */
357enum sched_type : kmp_int32 {
358 kmp_sch_lower = 32, /**< lower bound for unordered values */
359 kmp_sch_static_chunked = 33,
360 kmp_sch_static = 34, /**< static unspecialized */
361 kmp_sch_dynamic_chunked = 35,
362 kmp_sch_guided_chunked = 36, /**< guided unspecialized */
363 kmp_sch_runtime = 37,
364 kmp_sch_auto = 38, /**< auto */
365 kmp_sch_trapezoidal = 39,
366
367 /* accessible only through KMP_SCHEDULE environment variable */
368 kmp_sch_static_greedy = 40,
369 kmp_sch_static_balanced = 41,
370 /* accessible only through KMP_SCHEDULE environment variable */
371 kmp_sch_guided_iterative_chunked = 42,
372 kmp_sch_guided_analytical_chunked = 43,
373 /* accessible only through KMP_SCHEDULE environment variable */
374 kmp_sch_static_steal = 44,
375
376 /* static with chunk adjustment (e.g., simd) */
377 kmp_sch_static_balanced_chunked = 45,
378 kmp_sch_guided_simd = 46, /**< guided with chunk adjustment */
379 kmp_sch_runtime_simd = 47, /**< runtime with chunk adjustment */
380
381 /* accessible only through KMP_SCHEDULE environment variable */
382 kmp_sch_upper, /**< upper bound for unordered values */
383
384 kmp_ord_lower = 64, /**< lower bound for ordered values, must be power of 2 */
385 kmp_ord_static_chunked = 65,
386 kmp_ord_static = 66, /**< ordered static unspecialized */
387 kmp_ord_dynamic_chunked = 67,
388 kmp_ord_guided_chunked = 68,
389 kmp_ord_runtime = 69,
390 kmp_ord_auto = 70, /**< ordered auto */
391 kmp_ord_trapezoidal = 71,
392 kmp_ord_upper, /**< upper bound for ordered values */
393
394 /* Schedules for Distribute construct */
395 kmp_distribute_static_chunked = 91, /**< distribute static chunked */
396 kmp_distribute_static = 92, /**< distribute static unspecialized */
397
398 /* For the "nomerge" versions, kmp_dispatch_next*() will always return a
399 single iteration/chunk, even if the loop is serialized. For the schedule
400 types listed above, the entire iteration vector is returned if the loop is
401 serialized. This doesn't work for gcc/gcomp sections. */
402 kmp_nm_lower = 160, /**< lower bound for nomerge values */
403
404 kmp_nm_static_chunked =
405 (kmp_sch_static_chunked - kmp_sch_lower + kmp_nm_lower),
406 kmp_nm_static = 162, /**< static unspecialized */
407 kmp_nm_dynamic_chunked = 163,
408 kmp_nm_guided_chunked = 164, /**< guided unspecialized */
409 kmp_nm_runtime = 165,
410 kmp_nm_auto = 166, /**< auto */
411 kmp_nm_trapezoidal = 167,
412
413 /* accessible only through KMP_SCHEDULE environment variable */
414 kmp_nm_static_greedy = 168,
415 kmp_nm_static_balanced = 169,
416 /* accessible only through KMP_SCHEDULE environment variable */
417 kmp_nm_guided_iterative_chunked = 170,
418 kmp_nm_guided_analytical_chunked = 171,
419 kmp_nm_static_steal =
420 172, /* accessible only through OMP_SCHEDULE environment variable */
421
422 kmp_nm_ord_static_chunked = 193,
423 kmp_nm_ord_static = 194, /**< ordered static unspecialized */
424 kmp_nm_ord_dynamic_chunked = 195,
425 kmp_nm_ord_guided_chunked = 196,
426 kmp_nm_ord_runtime = 197,
427 kmp_nm_ord_auto = 198, /**< auto */
428 kmp_nm_ord_trapezoidal = 199,
429 kmp_nm_upper, /**< upper bound for nomerge values */
430
431 /* Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. Since
432 we need to distinguish the three possible cases (no modifier, monotonic
433 modifier, nonmonotonic modifier), we need separate bits for each modifier.
434 The absence of monotonic does not imply nonmonotonic, especially since 4.5
435 says that the behaviour of the "no modifier" case is implementation defined
436 in 4.5, but will become "nonmonotonic" in 5.0.
437
438 Since we're passing a full 32 bit value, we can use a couple of high bits
439 for these flags; out of paranoia we avoid the sign bit.
440
441 These modifiers can be or-ed into non-static schedules by the compiler to
442 pass the additional information. They will be stripped early in the
443 processing in __kmp_dispatch_init when setting up schedules, so most of the
444 code won't ever see schedules with these bits set. */
445 kmp_sch_modifier_monotonic =
446 (1 << 29), /**< Set if the monotonic schedule modifier was present */
447 kmp_sch_modifier_nonmonotonic =
448 (1 << 30), /**< Set if the nonmonotonic schedule modifier was present */
449
450#define SCHEDULE_WITHOUT_MODIFIERS(s)(enum sched_type)( (s) & ~(kmp_sch_modifier_nonmonotonic |
kmp_sch_modifier_monotonic))
\
451 (enum sched_type)( \
452 (s) & ~(kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic))
453#define SCHEDULE_HAS_MONOTONIC(s)(((s)&kmp_sch_modifier_monotonic) != 0) (((s)&kmp_sch_modifier_monotonic) != 0)
454#define SCHEDULE_HAS_NONMONOTONIC(s)(((s)&kmp_sch_modifier_nonmonotonic) != 0) (((s)&kmp_sch_modifier_nonmonotonic) != 0)
455#define SCHEDULE_HAS_NO_MODIFIERS(s)(((s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic
)) == 0)
\
456 (((s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)) == 0)
457#define SCHEDULE_GET_MODIFIERS(s)((enum sched_type)( (s) & (kmp_sch_modifier_nonmonotonic |
kmp_sch_modifier_monotonic)))
\
458 ((enum sched_type)( \
459 (s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)))
460#define SCHEDULE_SET_MODIFIERS(s, m)(s = (enum sched_type)((kmp_int32)s | (kmp_int32)m)) \
461 (s = (enum sched_type)((kmp_int32)s | (kmp_int32)m))
462#define SCHEDULE_NONMONOTONIC0 0
463#define SCHEDULE_MONOTONIC1 1
464
465 kmp_sch_default = kmp_sch_static /**< default scheduling algorithm */
466};
467
468// Apply modifiers on internal kind to standard kind
469static inline void
470__kmp_sched_apply_mods_stdkind(kmp_sched_t *kind,
471 enum sched_type internal_kind) {
472 if (SCHEDULE_HAS_MONOTONIC(internal_kind)(((internal_kind)&kmp_sch_modifier_monotonic) != 0)) {
473 *kind = (kmp_sched_t)((int)*kind | (int)kmp_sched_monotonic);
474 }
475}
476
477// Apply modifiers on standard kind to internal kind
478static inline void
479__kmp_sched_apply_mods_intkind(kmp_sched_t kind,
480 enum sched_type *internal_kind) {
481 if ((int)kind & (int)kmp_sched_monotonic) {
482 *internal_kind = (enum sched_type)((int)*internal_kind |
483 (int)kmp_sch_modifier_monotonic);
484 }
485}
486
487// Get standard schedule without modifiers
488static inline kmp_sched_t __kmp_sched_without_mods(kmp_sched_t kind) {
489 return (kmp_sched_t)((int)kind & ~((int)kmp_sched_monotonic));
490}
491
492/* Type to keep runtime schedule set via OMP_SCHEDULE or omp_set_schedule() */
493typedef union kmp_r_sched {
494 struct {
495 enum sched_type r_sched_type;
496 int chunk;
497 };
498 kmp_int64 sched;
499} kmp_r_sched_t;
500
501extern enum sched_type __kmp_sch_map[]; // map OMP 3.0 schedule types with our
502// internal schedule types
503
504enum library_type {
505 library_none,
506 library_serial,
507 library_turnaround,
508 library_throughput
509};
510
511#if KMP_OS_LINUX1
512enum clock_function_type {
513 clock_function_gettimeofday,
514 clock_function_clock_gettime
515};
516#endif /* KMP_OS_LINUX */
517
518#if KMP_MIC_SUPPORTED((0 || 1) && (1 || 0))
519enum mic_type { non_mic, mic1, mic2, mic3, dummy };
520#endif
521
522/* -- fast reduction stuff ------------------------------------------------ */
523
524#undef KMP_FAST_REDUCTION_BARRIER1
525#define KMP_FAST_REDUCTION_BARRIER1 1
526
527#undef KMP_FAST_REDUCTION_CORE_DUO1
528#if KMP_ARCH_X860 || KMP_ARCH_X86_641
529#define KMP_FAST_REDUCTION_CORE_DUO1 1
530#endif
531
532enum _reduction_method {
533 reduction_method_not_defined = 0,
534 critical_reduce_block = (1 << 8),
535 atomic_reduce_block = (2 << 8),
536 tree_reduce_block = (3 << 8),
537 empty_reduce_block = (4 << 8)
538};
539
540// Description of the packed_reduction_method variable:
541// The packed_reduction_method variable consists of two enum types variables
542// that are packed together into 0-th byte and 1-st byte:
543// 0: (packed_reduction_method & 0x000000FF) is a 'enum barrier_type' value of
544// barrier that will be used in fast reduction: bs_plain_barrier or
545// bs_reduction_barrier
546// 1: (packed_reduction_method & 0x0000FF00) is a reduction method that will
547// be used in fast reduction;
548// Reduction method is of 'enum _reduction_method' type and it's defined the way
549// so that the bits of 0-th byte are empty, so no need to execute a shift
550// instruction while packing/unpacking
551
552#if KMP_FAST_REDUCTION_BARRIER1
553#define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method, barrier_type)((reduction_method) | (barrier_type)) \
554 ((reduction_method) | (barrier_type))
555
556#define UNPACK_REDUCTION_METHOD(packed_reduction_method)((enum _reduction_method)((packed_reduction_method) & (0x0000FF00
)))
\
557 ((enum _reduction_method)((packed_reduction_method) & (0x0000FF00)))
558
559#define UNPACK_REDUCTION_BARRIER(packed_reduction_method)((enum barrier_type)((packed_reduction_method) & (0x000000FF
)))
\
560 ((enum barrier_type)((packed_reduction_method) & (0x000000FF)))
561#else
562#define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method, barrier_type)((reduction_method) | (barrier_type)) \
563 (reduction_method)
564
565#define UNPACK_REDUCTION_METHOD(packed_reduction_method)((enum _reduction_method)((packed_reduction_method) & (0x0000FF00
)))
\
566 (packed_reduction_method)
567
568#define UNPACK_REDUCTION_BARRIER(packed_reduction_method)((enum barrier_type)((packed_reduction_method) & (0x000000FF
)))
(bs_plain_barrier)
569#endif
570
571#define TEST_REDUCTION_METHOD(packed_reduction_method, which_reduction_block)((((enum _reduction_method)((packed_reduction_method) & (
0x0000FF00)))) == (which_reduction_block))
\
572 ((UNPACK_REDUCTION_METHOD(packed_reduction_method)((enum _reduction_method)((packed_reduction_method) & (0x0000FF00
)))
) == \
573 (which_reduction_block))
574
575#if KMP_FAST_REDUCTION_BARRIER1
576#define TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER(((tree_reduce_block) | (bs_reduction_barrier))) \
577 (PACK_REDUCTION_METHOD_AND_BARRIER(tree_reduce_block, bs_reduction_barrier)((tree_reduce_block) | (bs_reduction_barrier)))
578
579#define TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER(((tree_reduce_block) | (bs_plain_barrier))) \
580 (PACK_REDUCTION_METHOD_AND_BARRIER(tree_reduce_block, bs_plain_barrier)((tree_reduce_block) | (bs_plain_barrier)))
581#endif
582
583typedef int PACKED_REDUCTION_METHOD_T;
584
585/* -- end of fast reduction stuff ----------------------------------------- */
586
587#if KMP_OS_WINDOWS0
588#define USE_CBLKDATA
589#if KMP_MSVC_COMPAT0
590#pragma warning(push)
591#pragma warning(disable : 271 310)
592#endif
593#include <windows.h>
594#if KMP_MSVC_COMPAT0
595#pragma warning(pop)
596#endif
597#endif
598
599#if KMP_OS_UNIX1
600#include <dlfcn.h>
601#include <pthread.h>
602#endif
603
604enum kmp_hw_t : int {
605 KMP_HW_UNKNOWN = -1,
606 KMP_HW_SOCKET = 0,
607 KMP_HW_PROC_GROUP,
608 KMP_HW_NUMA,
609 KMP_HW_DIE,
610 KMP_HW_LLC,
611 KMP_HW_L3,
612 KMP_HW_TILE,
613 KMP_HW_MODULE,
614 KMP_HW_L2,
615 KMP_HW_L1,
616 KMP_HW_CORE,
617 KMP_HW_THREAD,
618 KMP_HW_LAST
619};
620
621typedef enum kmp_hw_core_type_t {
622 KMP_HW_CORE_TYPE_UNKNOWN = 0x0,
623#if KMP_ARCH_X860 || KMP_ARCH_X86_641
624 KMP_HW_CORE_TYPE_ATOM = 0x20,
625 KMP_HW_CORE_TYPE_CORE = 0x40,
626 KMP_HW_MAX_NUM_CORE_TYPES = 3,
627#else
628 KMP_HW_MAX_NUM_CORE_TYPES = 1,
629#endif
630} kmp_hw_core_type_t;
631
632#define KMP_HW_MAX_NUM_CORE_EFFS8 8
633
634#define KMP_DEBUG_ASSERT_VALID_HW_TYPE(type)if (!(type >= (kmp_hw_t)0 && type < KMP_HW_LAST
)) { __kmp_debug_assert("type >= (kmp_hw_t)0 && type < KMP_HW_LAST"
, "openmp/runtime/src/kmp.h", 634); }
\
635 KMP_DEBUG_ASSERT(type >= (kmp_hw_t)0 && type < KMP_HW_LAST)if (!(type >= (kmp_hw_t)0 && type < KMP_HW_LAST
)) { __kmp_debug_assert("type >= (kmp_hw_t)0 && type < KMP_HW_LAST"
, "openmp/runtime/src/kmp.h", 635); }
636#define KMP_ASSERT_VALID_HW_TYPE(type)if (!(type >= (kmp_hw_t)0 && type < KMP_HW_LAST
)) { __kmp_debug_assert("type >= (kmp_hw_t)0 && type < KMP_HW_LAST"
, "openmp/runtime/src/kmp.h", 636); }
\
637 KMP_ASSERT(type >= (kmp_hw_t)0 && type < KMP_HW_LAST)if (!(type >= (kmp_hw_t)0 && type < KMP_HW_LAST
)) { __kmp_debug_assert("type >= (kmp_hw_t)0 && type < KMP_HW_LAST"
, "openmp/runtime/src/kmp.h", 637); }
638
639#define KMP_FOREACH_HW_TYPE(type)for (kmp_hw_t type = (kmp_hw_t)0; type < KMP_HW_LAST; type
= (kmp_hw_t)((int)type + 1))
\
640 for (kmp_hw_t type = (kmp_hw_t)0; type < KMP_HW_LAST; \
641 type = (kmp_hw_t)((int)type + 1))
642
643const char *__kmp_hw_get_keyword(kmp_hw_t type, bool plural = false);
644const char *__kmp_hw_get_catalog_string(kmp_hw_t type, bool plural = false);
645const char *__kmp_hw_get_core_type_string(kmp_hw_core_type_t type);
646
647/* Only Linux* OS and Windows* OS support thread affinity. */
648#if KMP_AFFINITY_SUPPORTED1
649
650// GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later).
651#if KMP_OS_WINDOWS0
652#if _MSC_VER < 1600 && KMP_MSVC_COMPAT0
653typedef struct GROUP_AFFINITY {
654 KAFFINITY Mask;
655 WORD Group;
656 WORD Reserved[3];
657} GROUP_AFFINITY;
658#endif /* _MSC_VER < 1600 */
659#if KMP_GROUP_AFFINITY0
660extern int __kmp_num_proc_groups;
661#else
662static const int __kmp_num_proc_groups = 1;
663#endif /* KMP_GROUP_AFFINITY */
664typedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD);
665extern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount;
666
667typedef WORD (*kmp_GetActiveProcessorGroupCount_t)(void);
668extern kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount;
669
670typedef BOOL (*kmp_GetThreadGroupAffinity_t)(HANDLE, GROUP_AFFINITY *);
671extern kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity;
672
673typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *,
674 GROUP_AFFINITY *);
675extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity;
676#endif /* KMP_OS_WINDOWS */
677
678#if KMP_USE_HWLOC0
679extern hwloc_topology_t __kmp_hwloc_topology;
680extern int __kmp_hwloc_error;
681#endif
682
683extern size_t __kmp_affin_mask_size;
684#define KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0) (__kmp_affin_mask_size > 0)
685#define KMP_AFFINITY_DISABLE()(__kmp_affin_mask_size = 0) (__kmp_affin_mask_size = 0)
686#define KMP_AFFINITY_ENABLE(mask_size)(__kmp_affin_mask_size = mask_size) (__kmp_affin_mask_size = mask_size)
687#define KMP_CPU_SET_ITERATE(i, mask)for (i = (mask)->begin(); (int)i != (mask)->end(); i = (
mask)->next(i))
\
688 for (i = (mask)->begin(); (int)i != (mask)->end(); i = (mask)->next(i))
689#define KMP_CPU_SET(i, mask)(mask)->set(i) (mask)->set(i)
690#define KMP_CPU_ISSET(i, mask)(mask)->is_set(i) (mask)->is_set(i)
691#define KMP_CPU_CLR(i, mask)(mask)->clear(i) (mask)->clear(i)
692#define KMP_CPU_ZERO(mask)(mask)->zero() (mask)->zero()
693#define KMP_CPU_COPY(dest, src)(dest)->copy(src) (dest)->copy(src)
694#define KMP_CPU_AND(dest, src)(dest)->bitwise_and(src) (dest)->bitwise_and(src)
695#define KMP_CPU_COMPLEMENT(max_bit_number, mask)(mask)->bitwise_not() (mask)->bitwise_not()
696#define KMP_CPU_UNION(dest, src)(dest)->bitwise_or(src) (dest)->bitwise_or(src)
697#define KMP_CPU_ALLOC(ptr)(ptr = __kmp_affinity_dispatch->allocate_mask()) (ptr = __kmp_affinity_dispatch->allocate_mask())
698#define KMP_CPU_FREE(ptr)__kmp_affinity_dispatch->deallocate_mask(ptr) __kmp_affinity_dispatch->deallocate_mask(ptr)
699#define KMP_CPU_ALLOC_ON_STACK(ptr)(ptr = __kmp_affinity_dispatch->allocate_mask()) KMP_CPU_ALLOC(ptr)(ptr = __kmp_affinity_dispatch->allocate_mask())
700#define KMP_CPU_FREE_FROM_STACK(ptr)__kmp_affinity_dispatch->deallocate_mask(ptr) KMP_CPU_FREE(ptr)__kmp_affinity_dispatch->deallocate_mask(ptr)
701#define KMP_CPU_INTERNAL_ALLOC(ptr)(ptr = __kmp_affinity_dispatch->allocate_mask()) KMP_CPU_ALLOC(ptr)(ptr = __kmp_affinity_dispatch->allocate_mask())
702#define KMP_CPU_INTERNAL_FREE(ptr)__kmp_affinity_dispatch->deallocate_mask(ptr) KMP_CPU_FREE(ptr)__kmp_affinity_dispatch->deallocate_mask(ptr)
703#define KMP_CPU_INDEX(arr, i)__kmp_affinity_dispatch->index_mask_array(arr, i) __kmp_affinity_dispatch->index_mask_array(arr, i)
704#define KMP_CPU_ALLOC_ARRAY(arr, n)(arr = __kmp_affinity_dispatch->allocate_mask_array(n)) \
705 (arr = __kmp_affinity_dispatch->allocate_mask_array(n))
706#define KMP_CPU_FREE_ARRAY(arr, n)__kmp_affinity_dispatch->deallocate_mask_array(arr) \
707 __kmp_affinity_dispatch->deallocate_mask_array(arr)
708#define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n)(arr = __kmp_affinity_dispatch->allocate_mask_array(n)) KMP_CPU_ALLOC_ARRAY(arr, n)(arr = __kmp_affinity_dispatch->allocate_mask_array(n))
709#define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n)__kmp_affinity_dispatch->deallocate_mask_array(arr) KMP_CPU_FREE_ARRAY(arr, n)__kmp_affinity_dispatch->deallocate_mask_array(arr)
710#define __kmp_get_system_affinity(mask, abort_bool)(mask)->get_system_affinity(abort_bool) \
711 (mask)->get_system_affinity(abort_bool)
712#define __kmp_set_system_affinity(mask, abort_bool)(mask)->set_system_affinity(abort_bool) \
713 (mask)->set_system_affinity(abort_bool)
714#define __kmp_get_proc_group(mask)(mask)->get_proc_group() (mask)->get_proc_group()
715
716class KMPAffinity {
717public:
718 class Mask {
719 public:
720 void *operator new(size_t n);
721 void operator delete(void *p);
722 void *operator new[](size_t n);
723 void operator delete[](void *p);
724 virtual ~Mask() {}
725 // Set bit i to 1
726 virtual void set(int i) {}
727 // Return bit i
728 virtual bool is_set(int i) const { return false; }
729 // Set bit i to 0
730 virtual void clear(int i) {}
731 // Zero out entire mask
732 virtual void zero() {}
733 // Copy src into this mask
734 virtual void copy(const Mask *src) {}
735 // this &= rhs
736 virtual void bitwise_and(const Mask *rhs) {}
737 // this |= rhs
738 virtual void bitwise_or(const Mask *rhs) {}
739 // this = ~this
740 virtual void bitwise_not() {}
741 // API for iterating over an affinity mask
742 // for (int i = mask->begin(); i != mask->end(); i = mask->next(i))
743 virtual int begin() const { return 0; }
744 virtual int end() const { return 0; }
745 virtual int next(int previous) const { return 0; }
746#if KMP_OS_WINDOWS0
747 virtual int set_process_affinity(bool abort_on_error) const { return -1; }
748#endif
749 // Set the system's affinity to this affinity mask's value
750 virtual int set_system_affinity(bool abort_on_error) const { return -1; }
751 // Set this affinity mask to the current system affinity
752 virtual int get_system_affinity(bool abort_on_error) { return -1; }
753 // Only 1 DWORD in the mask should have any procs set.
754 // Return the appropriate index, or -1 for an invalid mask.
755 virtual int get_proc_group() const { return -1; }
756 };
757 void *operator new(size_t n);
758 void operator delete(void *p);
759 // Need virtual destructor
760 virtual ~KMPAffinity() = default;
761 // Determine if affinity is capable
762 virtual void determine_capable(const char *env_var) {}
763 // Bind the current thread to os proc
764 virtual void bind_thread(int proc) {}
765 // Factory functions to allocate/deallocate a mask
766 virtual Mask *allocate_mask() { return nullptr; }
767 virtual void deallocate_mask(Mask *m) {}
768 virtual Mask *allocate_mask_array(int num) { return nullptr; }
769 virtual void deallocate_mask_array(Mask *m) {}
770 virtual Mask *index_mask_array(Mask *m, int index) { return nullptr; }
771 static void pick_api();
772 static void destroy_api();
773 enum api_type {
774 NATIVE_OS
775#if KMP_USE_HWLOC0
776 ,
777 HWLOC
778#endif
779 };
780 virtual api_type get_api_type() const {
781 KMP_ASSERT(0)if (!(0)) { __kmp_debug_assert("0", "openmp/runtime/src/kmp.h"
, 781); }
;
782 return NATIVE_OS;
783 }
784
785private:
786 static bool picked_api;
787};
788
789typedef KMPAffinity::Mask kmp_affin_mask_t;
790extern KMPAffinity *__kmp_affinity_dispatch;
791
792// Declare local char buffers with this size for printing debug and info
793// messages, using __kmp_affinity_print_mask().
794#define KMP_AFFIN_MASK_PRINT_LEN1024 1024
795
796enum affinity_type {
797 affinity_none = 0,
798 affinity_physical,
799 affinity_logical,
800 affinity_compact,
801 affinity_scatter,
802 affinity_explicit,
803 affinity_balanced,
804 affinity_disabled, // not used outsize the env var parser
805 affinity_default
806};
807
808enum affinity_top_method {
809 affinity_top_method_all = 0, // try all (supported) methods, in order
810#if KMP_ARCH_X860 || KMP_ARCH_X86_641
811 affinity_top_method_apicid,
812 affinity_top_method_x2apicid,
813 affinity_top_method_x2apicid_1f,
814#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
815 affinity_top_method_cpuinfo, // KMP_CPUINFO_FILE is usable on Windows* OS, too
816#if KMP_GROUP_AFFINITY0
817 affinity_top_method_group,
818#endif /* KMP_GROUP_AFFINITY */
819 affinity_top_method_flat,
820#if KMP_USE_HWLOC0
821 affinity_top_method_hwloc,
822#endif
823 affinity_top_method_default
824};
825
826#define affinity_respect_mask_default(2) (2)
827
828typedef struct kmp_affinity_flags_t {
829 unsigned dups : 1;
830 unsigned verbose : 1;
831 unsigned warnings : 1;
832 unsigned respect : 2;
833 unsigned reset : 1;
834 unsigned initialized : 1;
835 unsigned reserved : 25;
836} kmp_affinity_flags_t;
837KMP_BUILD_ASSERT(sizeof(kmp_affinity_flags_t) == 4)static_assert(sizeof(kmp_affinity_flags_t) == 4, "Build condition error"
)
;
838
839typedef struct kmp_affinity_t {
840 char *proclist;
841 enum affinity_type type;
842 kmp_hw_t gran;
843 int gran_levels;
844 int compact;
845 int offset;
846 kmp_affinity_flags_t flags;
847 unsigned num_masks;
848 kmp_affin_mask_t *masks;
849 unsigned num_os_id_masks;
850 kmp_affin_mask_t *os_id_masks;
851 const char *env_var;
852} kmp_affinity_t;
853
854#define KMP_AFFINITY_INIT(env){ nullptr, affinity_default, KMP_HW_UNKNOWN, -1, 0, 0, {(!0),
0, (!0), (2), 0, 0}, 0, nullptr, 0, nullptr, env }
\
855 { \
856 nullptr, affinity_default, KMP_HW_UNKNOWN, -1, 0, 0, \
857 {TRUE(!0), FALSE0, TRUE(!0), affinity_respect_mask_default(2), FALSE0, FALSE0}, 0, \
858 nullptr, 0, nullptr, env \
859 }
860
861extern enum affinity_top_method __kmp_affinity_top_method;
862extern kmp_affinity_t __kmp_affinity;
863extern kmp_affinity_t __kmp_hh_affinity;
864extern kmp_affinity_t *__kmp_affinities[2];
865
866extern void __kmp_affinity_bind_thread(int which);
867
868extern kmp_affin_mask_t *__kmp_affin_fullMask;
869extern kmp_affin_mask_t *__kmp_affin_origMask;
870extern char *__kmp_cpuinfo_file;
871
872#endif /* KMP_AFFINITY_SUPPORTED */
873
874// This needs to be kept in sync with the values in omp.h !!!
875typedef enum kmp_proc_bind_t {
876 proc_bind_false = 0,
877 proc_bind_true,
878 proc_bind_primary,
879 proc_bind_close,
880 proc_bind_spread,
881 proc_bind_intel, // use KMP_AFFINITY interface
882 proc_bind_default
883} kmp_proc_bind_t;
884
885typedef struct kmp_nested_proc_bind_t {
886 kmp_proc_bind_t *bind_types;
887 int size;
888 int used;
889} kmp_nested_proc_bind_t;
890
891extern kmp_nested_proc_bind_t __kmp_nested_proc_bind;
892extern kmp_proc_bind_t __kmp_teams_proc_bind;
893
894extern int __kmp_display_affinity;
895extern char *__kmp_affinity_format;
896static const size_t KMP_AFFINITY_FORMAT_SIZE = 512;
897#if OMPT_SUPPORT1
898extern int __kmp_tool;
899extern char *__kmp_tool_libraries;
900#endif // OMPT_SUPPORT
901
902#if KMP_AFFINITY_SUPPORTED1
903#define KMP_PLACE_ALL(-1) (-1)
904#define KMP_PLACE_UNDEFINED(-2) (-2)
905// Is KMP_AFFINITY is being used instead of OMP_PROC_BIND/OMP_PLACES?
906#define KMP_AFFINITY_NON_PROC_BIND((__kmp_nested_proc_bind.bind_types[0] == proc_bind_false || __kmp_nested_proc_bind
.bind_types[0] == proc_bind_intel) && (__kmp_affinity
.num_masks > 0 || __kmp_affinity.type == affinity_balanced
))
\
907 ((__kmp_nested_proc_bind.bind_types[0] == proc_bind_false || \
908 __kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) && \
909 (__kmp_affinity.num_masks > 0 || __kmp_affinity.type == affinity_balanced))
910#endif /* KMP_AFFINITY_SUPPORTED */
911
912extern int __kmp_affinity_num_places;
913
914typedef enum kmp_cancel_kind_t {
915 cancel_noreq = 0,
916 cancel_parallel = 1,
917 cancel_loop = 2,
918 cancel_sections = 3,
919 cancel_taskgroup = 4
920} kmp_cancel_kind_t;
921
922// KMP_HW_SUBSET support:
923typedef struct kmp_hws_item {
924 int num;
925 int offset;
926} kmp_hws_item_t;
927
928extern kmp_hws_item_t __kmp_hws_socket;
929extern kmp_hws_item_t __kmp_hws_die;
930extern kmp_hws_item_t __kmp_hws_node;
931extern kmp_hws_item_t __kmp_hws_tile;
932extern kmp_hws_item_t __kmp_hws_core;
933extern kmp_hws_item_t __kmp_hws_proc;
934extern int __kmp_hws_requested;
935extern int __kmp_hws_abs_flag; // absolute or per-item number requested
936
937/* ------------------------------------------------------------------------ */
938
939#define KMP_PAD(type, sz)(sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1)) \
940 (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1))
941
942// We need to avoid using -1 as a GTID as +1 is added to the gtid
943// when storing it in a lock, and the value 0 is reserved.
944#define KMP_GTID_DNE(-2) (-2) /* Does not exist */
945#define KMP_GTID_SHUTDOWN(-3) (-3) /* Library is shutting down */
946#define KMP_GTID_MONITOR(-4) (-4) /* Monitor thread ID */
947#define KMP_GTID_UNKNOWN(-5) (-5) /* Is not known */
948#define KMP_GTID_MIN(-6) (-6) /* Minimal gtid for low bound check in DEBUG */
949
950/* OpenMP 5.0 Memory Management support */
951
952#ifndef __OMP_H
953// Duplicate type definitions from omp.h
954typedef uintptr_t omp_uintptr_t;
955
956typedef enum {
957 omp_atk_sync_hint = 1,
958 omp_atk_alignment = 2,
959 omp_atk_access = 3,
960 omp_atk_pool_size = 4,
961 omp_atk_fallback = 5,
962 omp_atk_fb_data = 6,
963 omp_atk_pinned = 7,
964 omp_atk_partition = 8
965} omp_alloctrait_key_t;
966
967typedef enum {
968 omp_atv_false = 0,
969 omp_atv_true = 1,
970 omp_atv_contended = 3,
971 omp_atv_uncontended = 4,
972 omp_atv_serialized = 5,
973 omp_atv_sequential = omp_atv_serialized, // (deprecated)
974 omp_atv_private = 6,
975 omp_atv_all = 7,
976 omp_atv_thread = 8,
977 omp_atv_pteam = 9,
978 omp_atv_cgroup = 10,
979 omp_atv_default_mem_fb = 11,
980 omp_atv_null_fb = 12,
981 omp_atv_abort_fb = 13,
982 omp_atv_allocator_fb = 14,
983 omp_atv_environment = 15,
984 omp_atv_nearest = 16,
985 omp_atv_blocked = 17,
986 omp_atv_interleaved = 18
987} omp_alloctrait_value_t;
988#define omp_atv_default((omp_uintptr_t)-1) ((omp_uintptr_t)-1)
989
990typedef void *omp_memspace_handle_t;
991extern omp_memspace_handle_t const omp_default_mem_space;
992extern omp_memspace_handle_t const omp_large_cap_mem_space;
993extern omp_memspace_handle_t const omp_const_mem_space;
994extern omp_memspace_handle_t const omp_high_bw_mem_space;
995extern omp_memspace_handle_t const omp_low_lat_mem_space;
996extern omp_memspace_handle_t const llvm_omp_target_host_mem_space;
997extern omp_memspace_handle_t const llvm_omp_target_shared_mem_space;
998extern omp_memspace_handle_t const llvm_omp_target_device_mem_space;
999
1000typedef struct {
1001 omp_alloctrait_key_t key;
1002 omp_uintptr_t value;
1003} omp_alloctrait_t;
1004
1005typedef void *omp_allocator_handle_t;
1006extern omp_allocator_handle_t const omp_null_allocator;
1007extern omp_allocator_handle_t const omp_default_mem_alloc;
1008extern omp_allocator_handle_t const omp_large_cap_mem_alloc;
1009extern omp_allocator_handle_t const omp_const_mem_alloc;
1010extern omp_allocator_handle_t const omp_high_bw_mem_alloc;
1011extern omp_allocator_handle_t const omp_low_lat_mem_alloc;
1012extern omp_allocator_handle_t const omp_cgroup_mem_alloc;
1013extern omp_allocator_handle_t const omp_pteam_mem_alloc;
1014extern omp_allocator_handle_t const omp_thread_mem_alloc;
1015extern omp_allocator_handle_t const llvm_omp_target_host_mem_alloc;
1016extern omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc;
1017extern omp_allocator_handle_t const llvm_omp_target_device_mem_alloc;
1018extern omp_allocator_handle_t const kmp_max_mem_alloc;
1019extern omp_allocator_handle_t __kmp_def_allocator;
1020
1021// end of duplicate type definitions from omp.h
1022#endif
1023
1024extern int __kmp_memkind_available;
1025
1026typedef omp_memspace_handle_t kmp_memspace_t; // placeholder
1027
1028typedef struct kmp_allocator_t {
1029 omp_memspace_handle_t memspace;
1030 void **memkind; // pointer to memkind
1031 size_t alignment;
1032 omp_alloctrait_value_t fb;
1033 kmp_allocator_t *fb_data;
1034 kmp_uint64 pool_size;
1035 kmp_uint64 pool_used;
1036} kmp_allocator_t;
1037
1038extern omp_allocator_handle_t __kmpc_init_allocator(int gtid,
1039 omp_memspace_handle_t,
1040 int ntraits,
1041 omp_alloctrait_t traits[]);
1042extern void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t al);
1043extern void __kmpc_set_default_allocator(int gtid, omp_allocator_handle_t al);
1044extern omp_allocator_handle_t __kmpc_get_default_allocator(int gtid);
1045// external interfaces, may be used by compiler
1046extern void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
1047extern void *__kmpc_aligned_alloc(int gtid, size_t align, size_t sz,
1048 omp_allocator_handle_t al);
1049extern void *__kmpc_calloc(int gtid, size_t nmemb, size_t sz,
1050 omp_allocator_handle_t al);
1051extern void *__kmpc_realloc(int gtid, void *ptr, size_t sz,
1052 omp_allocator_handle_t al,
1053 omp_allocator_handle_t free_al);
1054extern void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
1055// internal interfaces, contain real implementation
1056extern void *__kmp_alloc(int gtid, size_t align, size_t sz,
1057 omp_allocator_handle_t al);
1058extern void *__kmp_calloc(int gtid, size_t align, size_t nmemb, size_t sz,
1059 omp_allocator_handle_t al);
1060extern void *__kmp_realloc(int gtid, void *ptr, size_t sz,
1061 omp_allocator_handle_t al,
1062 omp_allocator_handle_t free_al);
1063extern void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
1064
1065extern void __kmp_init_memkind();
1066extern void __kmp_fini_memkind();
1067extern void __kmp_init_target_mem();
1068
1069/* ------------------------------------------------------------------------ */
1070
1071#define KMP_UINT64_MAX(~((kmp_uint64)1 << ((sizeof(kmp_uint64) * (1 << 3
)) - 1)))
\
1072 (~((kmp_uint64)1 << ((sizeof(kmp_uint64) * (1 << 3)) - 1)))
1073
1074#define KMP_MIN_NTH1 1
1075
1076#ifndef KMP_MAX_NTH2147483647
1077#if defined(PTHREAD_THREADS_MAX) && PTHREAD_THREADS_MAX < INT_MAX2147483647
1078#define KMP_MAX_NTH2147483647 PTHREAD_THREADS_MAX
1079#else
1080#define KMP_MAX_NTH2147483647 INT_MAX2147483647
1081#endif
1082#endif /* KMP_MAX_NTH */
1083
1084#ifdef PTHREAD_STACK_MIN16384
1085#define KMP_MIN_STKSIZE16384 PTHREAD_STACK_MIN16384
1086#else
1087#define KMP_MIN_STKSIZE16384 ((size_t)(32 * 1024))
1088#endif
1089
1090#define KMP_MAX_STKSIZE(~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)
))
(~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)))
1091
1092#if KMP_ARCH_X860
1093#define KMP_DEFAULT_STKSIZE((size_t)(4 * 1024 * 1024)) ((size_t)(2 * 1024 * 1024))
1094#elif KMP_ARCH_X86_641
1095#define KMP_DEFAULT_STKSIZE((size_t)(4 * 1024 * 1024)) ((size_t)(4 * 1024 * 1024))
1096#define KMP_BACKUP_STKSIZE((size_t)(2 * 1024 * 1024)) ((size_t)(2 * 1024 * 1024))
1097#else
1098#define KMP_DEFAULT_STKSIZE((size_t)(4 * 1024 * 1024)) ((size_t)(1024 * 1024))
1099#endif
1100
1101#define KMP_DEFAULT_MALLOC_POOL_INCR((size_t)(1024 * 1024)) ((size_t)(1024 * 1024))
1102#define KMP_MIN_MALLOC_POOL_INCR((size_t)(4 * 1024)) ((size_t)(4 * 1024))
1103#define KMP_MAX_MALLOC_POOL_INCR(~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)
))
\
1104 (~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)))
1105
1106#define KMP_MIN_STKOFFSET(0) (0)
1107#define KMP_MAX_STKOFFSET(~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)
))
KMP_MAX_STKSIZE(~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)
))
1108#if KMP_OS_DARWIN0
1109#define KMP_DEFAULT_STKOFFSET64 KMP_MIN_STKOFFSET(0)
1110#else
1111#define KMP_DEFAULT_STKOFFSET64 CACHE_LINE64
1112#endif
1113
1114#define KMP_MIN_STKPADDING(0) (0)
1115#define KMP_MAX_STKPADDING(2 * 1024 * 1024) (2 * 1024 * 1024)
1116
1117#define KMP_BLOCKTIME_MULTIPLIER(1000) \
1118 (1000) /* number of blocktime units per second */
1119#define KMP_MIN_BLOCKTIME(0) (0)
1120#define KMP_MAX_BLOCKTIME(2147483647) \
1121 (INT_MAX2147483647) /* Must be this for "infinite" setting the work */
1122
1123/* __kmp_blocktime is in milliseconds */
1124#define KMP_DEFAULT_BLOCKTIME(__kmp_is_hybrid_cpu() ? (0) : (200)) (__kmp_is_hybrid_cpu() ? (0) : (200))
1125
1126#if KMP_USE_MONITOR
1127#define KMP_DEFAULT_MONITOR_STKSIZE ((size_t)(64 * 1024))
1128#define KMP_MIN_MONITOR_WAKEUPS (1) // min times monitor wakes up per second
1129#define KMP_MAX_MONITOR_WAKEUPS (1000) // max times monitor can wake up per sec
1130
1131/* Calculate new number of monitor wakeups for a specific block time based on
1132 previous monitor_wakeups. Only allow increasing number of wakeups */
1133#define KMP_WAKEUPS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \
1134 (((blocktime) == KMP_MAX_BLOCKTIME(2147483647)) ? (monitor_wakeups) \
1135 : ((blocktime) == KMP_MIN_BLOCKTIME(0)) ? KMP_MAX_MONITOR_WAKEUPS \
1136 : ((monitor_wakeups) > (KMP_BLOCKTIME_MULTIPLIER(1000) / (blocktime))) \
1137 ? (monitor_wakeups) \
1138 : (KMP_BLOCKTIME_MULTIPLIER(1000)) / (blocktime))
1139
1140/* Calculate number of intervals for a specific block time based on
1141 monitor_wakeups */
1142#define KMP_INTERVALS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \
1143 (((blocktime) + (KMP_BLOCKTIME_MULTIPLIER(1000) / (monitor_wakeups)) - 1) / \
1144 (KMP_BLOCKTIME_MULTIPLIER(1000) / (monitor_wakeups)))
1145#else
1146#define KMP_BLOCKTIME(team, tid)(((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs
.bt_set) ? ((team)->t.t_threads[(tid)]->th.th_current_task
->td_icvs.blocktime) : __kmp_dflt_blocktime)
\
1147 (get__bt_set(team, tid)((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs
.bt_set)
? get__blocktime(team, tid)((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs
.blocktime)
: __kmp_dflt_blocktime)
1148#if KMP_OS_UNIX1 && (KMP_ARCH_X860 || KMP_ARCH_X86_641)
1149// HW TSC is used to reduce overhead (clock tick instead of nanosecond).
1150extern kmp_uint64 __kmp_ticks_per_msec;
1151#if KMP_COMPILER_ICC0 || KMP_COMPILER_ICX0
1152#define KMP_NOW()__kmp_hardware_timestamp() ((kmp_uint64)_rdtsc())
1153#else
1154#define KMP_NOW()__kmp_hardware_timestamp() __kmp_hardware_timestamp()
1155#endif
1156#define KMP_NOW_MSEC()(__kmp_hardware_timestamp() / __kmp_ticks_per_msec) (KMP_NOW()__kmp_hardware_timestamp() / __kmp_ticks_per_msec)
1157#define KMP_BLOCKTIME_INTERVAL(team, tid)((((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs
.bt_set) ? ((team)->t.t_threads[(tid)]->th.th_current_task
->td_icvs.blocktime) : __kmp_dflt_blocktime) * __kmp_ticks_per_msec
)
\
1158 (KMP_BLOCKTIME(team, tid)(((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs
.bt_set) ? ((team)->t.t_threads[(tid)]->th.th_current_task
->td_icvs.blocktime) : __kmp_dflt_blocktime)
* __kmp_ticks_per_msec)
1159#define KMP_BLOCKING(goal, count)((goal) > __kmp_hardware_timestamp()) ((goal) > KMP_NOW()__kmp_hardware_timestamp())
1160#else
1161// System time is retrieved sporadically while blocking.
1162extern kmp_uint64 __kmp_now_nsec();
1163#define KMP_NOW()__kmp_hardware_timestamp() __kmp_now_nsec()
1164#define KMP_NOW_MSEC()(__kmp_hardware_timestamp() / __kmp_ticks_per_msec) (KMP_NOW()__kmp_hardware_timestamp() / KMP_USEC_PER_SEC1000000L)
1165#define KMP_BLOCKTIME_INTERVAL(team, tid)((((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs
.bt_set) ? ((team)->t.t_threads[(tid)]->th.th_current_task
->td_icvs.blocktime) : __kmp_dflt_blocktime) * __kmp_ticks_per_msec
)
\
1166 (KMP_BLOCKTIME(team, tid)(((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs
.bt_set) ? ((team)->t.t_threads[(tid)]->th.th_current_task
->td_icvs.blocktime) : __kmp_dflt_blocktime)
* KMP_USEC_PER_SEC1000000L)
1167#define KMP_BLOCKING(goal, count)((goal) > __kmp_hardware_timestamp()) ((count) % 1000 != 0 || (goal) > KMP_NOW()__kmp_hardware_timestamp())
1168#endif
1169#endif // KMP_USE_MONITOR
1170
1171#define KMP_MIN_STATSCOLS40 40
1172#define KMP_MAX_STATSCOLS4096 4096
1173#define KMP_DEFAULT_STATSCOLS80 80
1174
1175#define KMP_MIN_INTERVAL0 0
1176#define KMP_MAX_INTERVAL(2147483647 - 1) (INT_MAX2147483647 - 1)
1177#define KMP_DEFAULT_INTERVAL0 0
1178
1179#define KMP_MIN_CHUNK1 1
1180#define KMP_MAX_CHUNK(2147483647 - 1) (INT_MAX2147483647 - 1)
1181#define KMP_DEFAULT_CHUNK1 1
1182
1183#define KMP_MIN_DISP_NUM_BUFF1 1
1184#define KMP_DFLT_DISP_NUM_BUFF7 7
1185#define KMP_MAX_DISP_NUM_BUFF4096 4096
1186
1187#define KMP_MAX_ORDERED8 8
1188
1189#define KMP_MAX_FIELDS32 32
1190
1191#define KMP_MAX_BRANCH_BITS31 31
1192
1193#define KMP_MAX_ACTIVE_LEVELS_LIMIT2147483647 INT_MAX2147483647
1194
1195#define KMP_MAX_DEFAULT_DEVICE_LIMIT2147483647 INT_MAX2147483647
1196
1197#define KMP_MAX_TASK_PRIORITY_LIMIT2147483647 INT_MAX2147483647
1198
1199/* Minimum number of threads before switch to TLS gtid (experimentally
1200 determined) */
1201/* josh TODO: what about OS X* tuning? */
1202#if KMP_ARCH_X860 || KMP_ARCH_X86_641
1203#define KMP_TLS_GTID_MIN5 5
1204#else
1205#define KMP_TLS_GTID_MIN5 INT_MAX2147483647
1206#endif
1207
1208#define KMP_MASTER_TID(tid)(0 == (tid)) (0 == (tid))
1209#define KMP_WORKER_TID(tid)(0 != (tid)) (0 != (tid))
1210
1211#define KMP_MASTER_GTID(gtid)(0 == __kmp_tid_from_gtid((gtid))) (0 == __kmp_tid_from_gtid((gtid)))
1212#define KMP_WORKER_GTID(gtid)(0 != __kmp_tid_from_gtid((gtid))) (0 != __kmp_tid_from_gtid((gtid)))
1213#define KMP_INITIAL_GTID(gtid)(0 == (gtid)) (0 == (gtid))
1214
1215#ifndef TRUE(!0)
1216#define FALSE0 0
1217#define TRUE(!0) (!FALSE0)
1218#endif
1219
1220/* NOTE: all of the following constants must be even */
1221
1222#if KMP_OS_WINDOWS0
1223#define KMP_INIT_WAIT1024U 64U /* initial number of spin-tests */
1224#define KMP_NEXT_WAIT512U 32U /* susequent number of spin-tests */
1225#elif KMP_OS_LINUX1
1226#define KMP_INIT_WAIT1024U 1024U /* initial number of spin-tests */
1227#define KMP_NEXT_WAIT512U 512U /* susequent number of spin-tests */
1228#elif KMP_OS_DARWIN0
1229/* TODO: tune for KMP_OS_DARWIN */
1230#define KMP_INIT_WAIT1024U 1024U /* initial number of spin-tests */
1231#define KMP_NEXT_WAIT512U 512U /* susequent number of spin-tests */
1232#elif KMP_OS_DRAGONFLY0
1233/* TODO: tune for KMP_OS_DRAGONFLY */
1234#define KMP_INIT_WAIT1024U 1024U /* initial number of spin-tests */
1235#define KMP_NEXT_WAIT512U 512U /* susequent number of spin-tests */
1236#elif KMP_OS_FREEBSD0
1237/* TODO: tune for KMP_OS_FREEBSD */
1238#define KMP_INIT_WAIT1024U 1024U /* initial number of spin-tests */
1239#define KMP_NEXT_WAIT512U 512U /* susequent number of spin-tests */
1240#elif KMP_OS_NETBSD0
1241/* TODO: tune for KMP_OS_NETBSD */
1242#define KMP_INIT_WAIT1024U 1024U /* initial number of spin-tests */
1243#define KMP_NEXT_WAIT512U 512U /* susequent number of spin-tests */
1244#elif KMP_OS_HURD0
1245/* TODO: tune for KMP_OS_HURD */
1246#define KMP_INIT_WAIT1024U 1024U /* initial number of spin-tests */
1247#define KMP_NEXT_WAIT512U 512U /* susequent number of spin-tests */
1248#elif KMP_OS_OPENBSD0
1249/* TODO: tune for KMP_OS_OPENBSD */
1250#define KMP_INIT_WAIT1024U 1024U /* initial number of spin-tests */
1251#define KMP_NEXT_WAIT512U 512U /* susequent number of spin-tests */
1252#endif
1253
1254#if KMP_ARCH_X860 || KMP_ARCH_X86_641
1255typedef struct kmp_cpuid {
1256 kmp_uint32 eax;
1257 kmp_uint32 ebx;
1258 kmp_uint32 ecx;
1259 kmp_uint32 edx;
1260} kmp_cpuid_t;
1261
1262typedef struct kmp_cpuinfo_flags_t {
1263 unsigned sse2 : 1; // 0 if SSE2 instructions are not supported, 1 otherwise.
1264 unsigned rtm : 1; // 0 if RTM instructions are not supported, 1 otherwise.
1265 unsigned hybrid : 1;
1266 unsigned reserved : 29; // Ensure size of 32 bits
1267} kmp_cpuinfo_flags_t;
1268
1269typedef struct kmp_cpuinfo {
1270 int initialized; // If 0, other fields are not initialized.
1271 int signature; // CPUID(1).EAX
1272 int family; // CPUID(1).EAX[27:20]+CPUID(1).EAX[11:8] (Extended Family+Family)
1273 int model; // ( CPUID(1).EAX[19:16] << 4 ) + CPUID(1).EAX[7:4] ( ( Extended
1274 // Model << 4 ) + Model)
1275 int stepping; // CPUID(1).EAX[3:0] ( Stepping )
1276 kmp_cpuinfo_flags_t flags;
1277 int apic_id;
1278 int physical_id;
1279 int logical_id;
1280 kmp_uint64 frequency; // Nominal CPU frequency in Hz.
1281 char name[3 * sizeof(kmp_cpuid_t)]; // CPUID(0x80000002,0x80000003,0x80000004)
1282} kmp_cpuinfo_t;
1283
1284extern void __kmp_query_cpuid(kmp_cpuinfo_t *p);
1285
1286#if KMP_OS_UNIX1
1287// subleaf is only needed for cache and topology discovery and can be set to
1288// zero in most cases
1289static inline void __kmp_x86_cpuid(int leaf, int subleaf, struct kmp_cpuid *p) {
1290 __asm__ __volatile__("cpuid"
1291 : "=a"(p->eax), "=b"(p->ebx), "=c"(p->ecx), "=d"(p->edx)
1292 : "a"(leaf), "c"(subleaf));
1293}
1294// Load p into FPU control word
1295static inline void __kmp_load_x87_fpu_control_word(const kmp_int16 *p) {
1296 __asm__ __volatile__("fldcw %0" : : "m"(*p));
1297}
1298// Store FPU control word into p
1299static inline void __kmp_store_x87_fpu_control_word(kmp_int16 *p) {
1300 __asm__ __volatile__("fstcw %0" : "=m"(*p));
1301}
1302static inline void __kmp_clear_x87_fpu_status_word() {
1303#if KMP_MIC0
1304 // 32-bit protected mode x87 FPU state
1305 struct x87_fpu_state {
1306 unsigned cw;
1307 unsigned sw;
1308 unsigned tw;
1309 unsigned fip;
1310 unsigned fips;
1311 unsigned fdp;
1312 unsigned fds;
1313 };
1314 struct x87_fpu_state fpu_state = {0, 0, 0, 0, 0, 0, 0};
1315 __asm__ __volatile__("fstenv %0\n\t" // store FP env
1316 "andw $0x7f00, %1\n\t" // clear 0-7,15 bits of FP SW
1317 "fldenv %0\n\t" // load FP env back
1318 : "+m"(fpu_state), "+m"(fpu_state.sw));
1319#else
1320 __asm__ __volatile__("fnclex");
1321#endif // KMP_MIC
1322}
1323#if __SSE__1
1324static inline void __kmp_load_mxcsr(const kmp_uint32 *p) { _mm_setcsr(*p); }
1325static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = _mm_getcsr(); }
1326#else
1327static inline void __kmp_load_mxcsr(const kmp_uint32 *p) {}
1328static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = 0; }
1329#endif
1330#else
1331// Windows still has these as external functions in assembly file
1332extern void __kmp_x86_cpuid(int mode, int mode2, struct kmp_cpuid *p);
1333extern void __kmp_load_x87_fpu_control_word(const kmp_int16 *p);
1334extern void __kmp_store_x87_fpu_control_word(kmp_int16 *p);
1335extern void __kmp_clear_x87_fpu_status_word();
1336static inline void __kmp_load_mxcsr(const kmp_uint32 *p) { _mm_setcsr(*p); }
1337static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = _mm_getcsr(); }
1338#endif // KMP_OS_UNIX
1339
1340#define KMP_X86_MXCSR_MASK0xffffffc0 0xffffffc0 /* ignore status flags (6 lsb) */
1341
1342// User-level Monitor/Mwait
1343#if KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0)
1344// We always try for UMWAIT first
1345#if KMP_HAVE_WAITPKG_INTRINSICS1
1346#if KMP_HAVE_IMMINTRIN_H1
1347#include <immintrin.h>
1348#elif KMP_HAVE_INTRIN_H0
1349#include <intrin.h>
1350#endif
1351#endif // KMP_HAVE_WAITPKG_INTRINSICS
1352
1353KMP_ATTRIBUTE_TARGET_WAITPKG__attribute__((target("waitpkg")))
1354static inline int __kmp_tpause(uint32_t hint, uint64_t counter) {
1355#if !KMP_HAVE_WAITPKG_INTRINSICS1
1356 uint32_t timeHi = uint32_t(counter >> 32);
1357 uint32_t timeLo = uint32_t(counter & 0xffffffff);
1358 char flag;
1359 __asm__ volatile("#tpause\n.byte 0x66, 0x0F, 0xAE, 0xF1\n"
1360 "setb %0"
1361 // The "=q" restraint means any register accessible as rl
1362 // in 32-bit mode: a, b, c, and d;
1363 // in 64-bit mode: any integer register
1364 : "=q"(flag)
1365 : "a"(timeLo), "d"(timeHi), "c"(hint)
1366 :);
1367 return flag;
1368#else
1369 return _tpause(hint, counter);
1370#endif
1371}
1372KMP_ATTRIBUTE_TARGET_WAITPKG__attribute__((target("waitpkg")))
1373static inline void __kmp_umonitor(void *cacheline) {
1374#if !KMP_HAVE_WAITPKG_INTRINSICS1
1375 __asm__ volatile("# umonitor\n.byte 0xF3, 0x0F, 0xAE, 0x01 "
1376 :
1377 : "a"(cacheline)
1378 :);
1379#else
1380 _umonitor(cacheline);
1381#endif
1382}
1383KMP_ATTRIBUTE_TARGET_WAITPKG__attribute__((target("waitpkg")))
1384static inline int __kmp_umwait(uint32_t hint, uint64_t counter) {
1385#if !KMP_HAVE_WAITPKG_INTRINSICS1
1386 uint32_t timeHi = uint32_t(counter >> 32);
1387 uint32_t timeLo = uint32_t(counter & 0xffffffff);
1388 char flag;
1389 __asm__ volatile("#umwait\n.byte 0xF2, 0x0F, 0xAE, 0xF1\n"
1390 "setb %0"
1391 // The "=q" restraint means any register accessible as rl
1392 // in 32-bit mode: a, b, c, and d;
1393 // in 64-bit mode: any integer register
1394 : "=q"(flag)
1395 : "a"(timeLo), "d"(timeHi), "c"(hint)
1396 :);
1397 return flag;
1398#else
1399 return _umwait(hint, counter);
1400#endif
1401}
1402#elif KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0)
1403#if KMP_OS_UNIX1
1404#include <pmmintrin.h>
1405#else
1406#include <intrin.h>
1407#endif
1408#if KMP_OS_UNIX1
1409__attribute__((target("sse3")))
1410#endif
1411static inline void
1412__kmp_mm_monitor(void *cacheline, unsigned extensions, unsigned hints) {
1413 _mm_monitor(cacheline, extensions, hints);
1414}
1415#if KMP_OS_UNIX1
1416__attribute__((target("sse3")))
1417#endif
1418static inline void
1419__kmp_mm_mwait(unsigned extensions, unsigned hints) {
1420 _mm_mwait(extensions, hints);
1421}
1422#endif // KMP_HAVE_UMWAIT
1423
1424#if KMP_ARCH_X860
1425extern void __kmp_x86_pause(void);
1426#elif KMP_MIC0
1427// Performance testing on KNC (C0QS-7120 P/A/X/D, 61-core, 16 GB Memory) showed
1428// regression after removal of extra PAUSE from spin loops. Changing
1429// the delay from 100 to 300 showed even better performance than double PAUSE
1430// on Spec OMP2001 and LCPC tasking tests, no regressions on EPCC.
1431static inline void __kmp_x86_pause(void) { _mm_delay_32(300); }
1432#else
1433static inline void __kmp_x86_pause(void) { _mm_pause(); }
1434#endif
1435#define KMP_CPU_PAUSE()__kmp_x86_pause() __kmp_x86_pause()
1436#elif KMP_ARCH_PPC64(0 || 0)
1437#define KMP_PPC64_PRI_LOW() __asm__ volatile("or 1, 1, 1")
1438#define KMP_PPC64_PRI_MED() __asm__ volatile("or 2, 2, 2")
1439#define KMP_PPC64_PRI_LOC_MB() __asm__ volatile("" : : : "memory")
1440#define KMP_CPU_PAUSE()__kmp_x86_pause() \
1441 do { \
1442 KMP_PPC64_PRI_LOW(); \
1443 KMP_PPC64_PRI_MED(); \
1444 KMP_PPC64_PRI_LOC_MB(); \
1445 } while (0)
1446#else
1447#define KMP_CPU_PAUSE()__kmp_x86_pause() /* nothing to do */
1448#endif
1449
1450#define KMP_INIT_YIELD(count){ (count) = __kmp_yield_init; } \
1451 { (count) = __kmp_yield_init; }
1452
1453#define KMP_INIT_BACKOFF(time){ (time) = __kmp_pause_init; } \
1454 { (time) = __kmp_pause_init; }
1455
1456#define KMP_OVERSUBSCRIBED((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
))
\
1457 (TCR_4(__kmp_nth)(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))
1458
1459#define KMP_TRY_YIELD((__kmp_use_yield == 1) || (__kmp_use_yield == 2 && (
((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
)))))
\
1460 ((__kmp_use_yield == 1) || (__kmp_use_yield == 2 && (KMP_OVERSUBSCRIBED((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
))
)))
1461
1462#define KMP_TRY_YIELD_OVERSUB((__kmp_use_yield == 1 || __kmp_use_yield == 2) && ((
(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
))))
\
1463 ((__kmp_use_yield == 1 || __kmp_use_yield == 2) && (KMP_OVERSUBSCRIBED((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
))
))
1464
1465#define KMP_YIELD(cond){ __kmp_x86_pause(); if ((cond) && (((__kmp_use_yield
== 1) || (__kmp_use_yield == 2 && (((__kmp_nth) >
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))))))) __kmp_yield
(); }
\
1466 { \
1467 KMP_CPU_PAUSE()__kmp_x86_pause(); \
1468 if ((cond) && (KMP_TRY_YIELD((__kmp_use_yield == 1) || (__kmp_use_yield == 2 && (
((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
)))))
)) \
1469 __kmp_yield(); \
1470 }
1471
1472#define KMP_YIELD_OVERSUB(){ __kmp_x86_pause(); if ((((__kmp_use_yield == 1 || __kmp_use_yield
== 2) && (((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
: __kmp_xproc)))))) __kmp_yield(); }
\
1473 { \
1474 KMP_CPU_PAUSE()__kmp_x86_pause(); \
1475 if ((KMP_TRY_YIELD_OVERSUB((__kmp_use_yield == 1 || __kmp_use_yield == 2) && ((
(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
))))
)) \
1476 __kmp_yield(); \
1477 }
1478
1479// Note the decrement of 2 in the following Macros. With KMP_LIBRARY=turnaround,
1480// there should be no yielding since initial value from KMP_INIT_YIELD() is odd.
1481#define KMP_YIELD_SPIN(count){ __kmp_x86_pause(); if (((__kmp_use_yield == 1) || (__kmp_use_yield
== 2 && (((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
: __kmp_xproc)))))) { (count) -= 2; if (!(count)) { __kmp_yield
(); (count) = __kmp_yield_next; } } }
\
1482 { \
1483 KMP_CPU_PAUSE()__kmp_x86_pause(); \
1484 if (KMP_TRY_YIELD((__kmp_use_yield == 1) || (__kmp_use_yield == 2 && (
((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
)))))
) { \
1485 (count) -= 2; \
1486 if (!(count)) { \
1487 __kmp_yield(); \
1488 (count) = __kmp_yield_next; \
1489 } \
1490 } \
1491 }
1492
1493// If TPAUSE is available & enabled, use it. If oversubscribed, use the slower
1494// (C0.2) state, which improves performance of other SMT threads on the same
1495// core, otherwise, use the fast (C0.1) default state, or whatever the user has
1496// requested. Uses a timed TPAUSE, and exponential backoff. If TPAUSE isn't
1497// available, fall back to the regular CPU pause and yield combination.
1498#if KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0)
1499#define KMP_TPAUSE_MAX_MASK((kmp_uint64)0xFFFF) ((kmp_uint64)0xFFFF)
1500#define KMP_YIELD_OVERSUB_ELSE_SPIN(count, time){ if (__kmp_tpause_enabled) { if (((__kmp_nth) > (__kmp_avail_proc
? __kmp_avail_proc : __kmp_xproc))) { __kmp_tpause(0, (time)
); } else { __kmp_tpause(__kmp_tpause_hint, (time)); } (time)
= (time << 1 | 1) & ((kmp_uint64)0xFFFF); } else {
__kmp_x86_pause(); if ((((__kmp_use_yield == 1 || __kmp_use_yield
== 2) && (((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
: __kmp_xproc)))))) { __kmp_yield(); } else if (__kmp_use_yield
== 1) { (count) -= 2; if (!(count)) { __kmp_yield(); (count)
= __kmp_yield_next; } } } }
\
1501 { \
1502 if (__kmp_tpause_enabled) { \
1503 if (KMP_OVERSUBSCRIBED((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
))
) { \
1504 __kmp_tpause(0, (time)); \
1505 } else { \
1506 __kmp_tpause(__kmp_tpause_hint, (time)); \
1507 } \
1508 (time) = (time << 1 | 1) & KMP_TPAUSE_MAX_MASK((kmp_uint64)0xFFFF); \
1509 } else { \
1510 KMP_CPU_PAUSE()__kmp_x86_pause(); \
1511 if ((KMP_TRY_YIELD_OVERSUB((__kmp_use_yield == 1 || __kmp_use_yield == 2) && ((
(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
))))
)) { \
1512 __kmp_yield(); \
1513 } else if (__kmp_use_yield == 1) { \
1514 (count) -= 2; \
1515 if (!(count)) { \
1516 __kmp_yield(); \
1517 (count) = __kmp_yield_next; \
1518 } \
1519 } \
1520 } \
1521 }
1522#else
1523#define KMP_YIELD_OVERSUB_ELSE_SPIN(count, time){ if (__kmp_tpause_enabled) { if (((__kmp_nth) > (__kmp_avail_proc
? __kmp_avail_proc : __kmp_xproc))) { __kmp_tpause(0, (time)
); } else { __kmp_tpause(__kmp_tpause_hint, (time)); } (time)
= (time << 1 | 1) & ((kmp_uint64)0xFFFF); } else {
__kmp_x86_pause(); if ((((__kmp_use_yield == 1 || __kmp_use_yield
== 2) && (((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
: __kmp_xproc)))))) { __kmp_yield(); } else if (__kmp_use_yield
== 1) { (count) -= 2; if (!(count)) { __kmp_yield(); (count)
= __kmp_yield_next; } } } }
\
1524 { \
1525 KMP_CPU_PAUSE()__kmp_x86_pause(); \
1526 if ((KMP_TRY_YIELD_OVERSUB((__kmp_use_yield == 1 || __kmp_use_yield == 2) && ((
(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
))))
)) \
1527 __kmp_yield(); \
1528 else if (__kmp_use_yield == 1) { \
1529 (count) -= 2; \
1530 if (!(count)) { \
1531 __kmp_yield(); \
1532 (count) = __kmp_yield_next; \
1533 } \
1534 } \
1535 }
1536#endif // KMP_HAVE_UMWAIT
1537
1538/* ------------------------------------------------------------------------ */
1539/* Support datatypes for the orphaned construct nesting checks. */
1540/* ------------------------------------------------------------------------ */
1541
1542/* When adding to this enum, add its corresponding string in cons_text_c[]
1543 * array in kmp_error.cpp */
1544enum cons_type {
1545 ct_none,
1546 ct_parallel,
1547 ct_pdo,
1548 ct_pdo_ordered,
1549 ct_psections,
1550 ct_psingle,
1551 ct_critical,
1552 ct_ordered_in_parallel,
1553 ct_ordered_in_pdo,
1554 ct_master,
1555 ct_reduce,
1556 ct_barrier,
1557 ct_masked
1558};
1559
1560#define IS_CONS_TYPE_ORDERED(ct)((ct) == ct_pdo_ordered) ((ct) == ct_pdo_ordered)
1561
1562struct cons_data {
1563 ident_t const *ident;
1564 enum cons_type type;
1565 int prev;
1566 kmp_user_lock_p
1567 name; /* address exclusively for critical section name comparison */
1568};
1569
1570struct cons_header {
1571 int p_top, w_top, s_top;
1572 int stack_size, stack_top;
1573 struct cons_data *stack_data;
1574};
1575
1576struct kmp_region_info {
1577 char *text;
1578 int offset[KMP_MAX_FIELDS32];
1579 int length[KMP_MAX_FIELDS32];
1580};
1581
1582/* ---------------------------------------------------------------------- */
1583/* ---------------------------------------------------------------------- */
1584
1585#if KMP_OS_WINDOWS0
1586typedef HANDLE kmp_thread_t;
1587typedef DWORD kmp_key_t;
1588#endif /* KMP_OS_WINDOWS */
1589
1590#if KMP_OS_UNIX1
1591typedef pthread_t kmp_thread_t;
1592typedef pthread_key_t kmp_key_t;
1593#endif
1594
1595extern kmp_key_t __kmp_gtid_threadprivate_key;
1596
1597typedef struct kmp_sys_info {
1598 long maxrss; /* the maximum resident set size utilized (in kilobytes) */
1599 long minflt; /* the number of page faults serviced without any I/O */
1600 long majflt; /* the number of page faults serviced that required I/O */
1601 long nswap; /* the number of times a process was "swapped" out of memory */
1602 long inblock; /* the number of times the file system had to perform input */
1603 long oublock; /* the number of times the file system had to perform output */
1604 long nvcsw; /* the number of times a context switch was voluntarily */
1605 long nivcsw; /* the number of times a context switch was forced */
1606} kmp_sys_info_t;
1607
1608#if USE_ITT_BUILD1
1609// We cannot include "kmp_itt.h" due to circular dependency. Declare the only
1610// required type here. Later we will check the type meets requirements.
1611typedef int kmp_itt_mark_t;
1612#define KMP_ITT_DEBUG0 0
1613#endif /* USE_ITT_BUILD */
1614
1615typedef kmp_int32 kmp_critical_name[8];
1616
1617/*!
1618@ingroup PARALLEL
1619The type for a microtask which gets passed to @ref __kmpc_fork_call().
1620The arguments to the outlined function are
1621@param global_tid the global thread identity of the thread executing the
1622function.
1623@param bound_tid the local identity of the thread executing the function
1624@param ... pointers to shared variables accessed by the function.
1625*/
1626typedef void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid, ...);
1627typedef void (*kmpc_micro_bound)(kmp_int32 *bound_tid, kmp_int32 *bound_nth,
1628 ...);
1629
1630/*!
1631@ingroup THREADPRIVATE
1632@{
1633*/
1634/* ---------------------------------------------------------------------------
1635 */
1636/* Threadprivate initialization/finalization function declarations */
1637
1638/* for non-array objects: __kmpc_threadprivate_register() */
1639
1640/*!
1641 Pointer to the constructor function.
1642 The first argument is the <tt>this</tt> pointer
1643*/
1644typedef void *(*kmpc_ctor)(void *);
1645
1646/*!
1647 Pointer to the destructor function.
1648 The first argument is the <tt>this</tt> pointer
1649*/
1650typedef void (*kmpc_dtor)(
1651 void * /*, size_t */); /* 2nd arg: magic number for KCC unused by Intel
1652 compiler */
1653/*!
1654 Pointer to an alternate constructor.
1655 The first argument is the <tt>this</tt> pointer.
1656*/
1657typedef void *(*kmpc_cctor)(void *, void *);
1658
1659/* for array objects: __kmpc_threadprivate_register_vec() */
1660/* First arg: "this" pointer */
1661/* Last arg: number of array elements */
1662/*!
1663 Array constructor.
1664 First argument is the <tt>this</tt> pointer
1665 Second argument the number of array elements.
1666*/
1667typedef void *(*kmpc_ctor_vec)(void *, size_t);
1668/*!
1669 Pointer to the array destructor function.
1670 The first argument is the <tt>this</tt> pointer
1671 Second argument the number of array elements.
1672*/
1673typedef void (*kmpc_dtor_vec)(void *, size_t);
1674/*!
1675 Array constructor.
1676 First argument is the <tt>this</tt> pointer
1677 Third argument the number of array elements.
1678*/
1679typedef void *(*kmpc_cctor_vec)(void *, void *,
1680 size_t); /* function unused by compiler */
1681
1682/*!
1683@}
1684*/
1685
1686/* keeps tracked of threadprivate cache allocations for cleanup later */
1687typedef struct kmp_cached_addr {
1688 void **addr; /* address of allocated cache */
1689 void ***compiler_cache; /* pointer to compiler's cache */
1690 void *data; /* pointer to global data */
1691 struct kmp_cached_addr *next; /* pointer to next cached address */
1692} kmp_cached_addr_t;
1693
1694struct private_data {
1695 struct private_data *next; /* The next descriptor in the list */
1696 void *data; /* The data buffer for this descriptor */
1697 int more; /* The repeat count for this descriptor */
1698 size_t size; /* The data size for this descriptor */
1699};
1700
1701struct private_common {
1702 struct private_common *next;
1703 struct private_common *link;
1704 void *gbl_addr;
1705 void *par_addr; /* par_addr == gbl_addr for PRIMARY thread */
1706 size_t cmn_size;
1707};
1708
1709struct shared_common {
1710 struct shared_common *next;
1711 struct private_data *pod_init;
1712 void *obj_init;
1713 void *gbl_addr;
1714 union {
1715 kmpc_ctor ctor;
1716 kmpc_ctor_vec ctorv;
1717 } ct;
1718 union {
1719 kmpc_cctor cctor;
1720 kmpc_cctor_vec cctorv;
1721 } cct;
1722 union {
1723 kmpc_dtor dtor;
1724 kmpc_dtor_vec dtorv;
1725 } dt;
1726 size_t vec_len;
1727 int is_vec;
1728 size_t cmn_size;
1729};
1730
1731#define KMP_HASH_TABLE_LOG29 9 /* log2 of the hash table size */
1732#define KMP_HASH_TABLE_SIZE(1 << 9) \
1733 (1 << KMP_HASH_TABLE_LOG29) /* size of the hash table */
1734#define KMP_HASH_SHIFT3 3 /* throw away this many low bits from the address */
1735#define KMP_HASH(x)((((kmp_uintptr_t)x) >> 3) & ((1 << 9) - 1)) \
1736 ((((kmp_uintptr_t)x) >> KMP_HASH_SHIFT3) & (KMP_HASH_TABLE_SIZE(1 << 9) - 1))
1737
1738struct common_table {
1739 struct private_common *data[KMP_HASH_TABLE_SIZE(1 << 9)];
1740};
1741
1742struct shared_table {
1743 struct shared_common *data[KMP_HASH_TABLE_SIZE(1 << 9)];
1744};
1745
1746/* ------------------------------------------------------------------------ */
1747
1748#if KMP_USE_HIER_SCHED0
1749// Shared barrier data that exists inside a single unit of the scheduling
1750// hierarchy
1751typedef struct kmp_hier_private_bdata_t {
1752 kmp_int32 num_active;
1753 kmp_uint64 index;
1754 kmp_uint64 wait_val[2];
1755} kmp_hier_private_bdata_t;
1756#endif
1757
1758typedef struct kmp_sched_flags {
1759 unsigned ordered : 1;
1760 unsigned nomerge : 1;
1761 unsigned contains_last : 1;
1762#if KMP_USE_HIER_SCHED0
1763 unsigned use_hier : 1;
1764 unsigned unused : 28;
1765#else
1766 unsigned unused : 29;
1767#endif
1768} kmp_sched_flags_t;
1769
1770KMP_BUILD_ASSERT(sizeof(kmp_sched_flags_t) == 4)static_assert(sizeof(kmp_sched_flags_t) == 4, "Build condition error"
)
;
1771
1772#if KMP_STATIC_STEAL_ENABLED1
1773typedef struct KMP_ALIGN_CACHE__attribute__((aligned(64))) dispatch_private_info32 {
1774 kmp_int32 count;
1775 kmp_int32 ub;
1776 /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */
1777 kmp_int32 lb;
1778 kmp_int32 st;
1779 kmp_int32 tc;
1780 kmp_lock_t *steal_lock; // lock used for chunk stealing
1781 // KMP_ALIGN(32) ensures (if the KMP_ALIGN macro is turned on)
1782 // a) parm3 is properly aligned and
1783 // b) all parm1-4 are on the same cache line.
1784 // Because of parm1-4 are used together, performance seems to be better
1785 // if they are on the same cache line (not measured though).
1786
1787 struct KMP_ALIGN(32)__attribute__((aligned(32))) { // AC: changed 16 to 32 in order to simplify template
1788 kmp_int32 parm1; // structures in kmp_dispatch.cpp. This should
1789 kmp_int32 parm2; // make no real change at least while padding is off.
1790 kmp_int32 parm3;
1791 kmp_int32 parm4;
1792 };
1793
1794 kmp_uint32 ordered_lower;
1795 kmp_uint32 ordered_upper;
1796#if KMP_OS_WINDOWS0
1797 kmp_int32 last_upper;
1798#endif /* KMP_OS_WINDOWS */
1799} dispatch_private_info32_t;
1800
1801typedef struct KMP_ALIGN_CACHE__attribute__((aligned(64))) dispatch_private_info64 {
1802 kmp_int64 count; // current chunk number for static & static-steal scheduling
1803 kmp_int64 ub; /* upper-bound */
1804 /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */
1805 kmp_int64 lb; /* lower-bound */
1806 kmp_int64 st; /* stride */
1807 kmp_int64 tc; /* trip count (number of iterations) */
1808 kmp_lock_t *steal_lock; // lock used for chunk stealing
1809 /* parm[1-4] are used in different ways by different scheduling algorithms */
1810
1811 // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on )
1812 // a) parm3 is properly aligned and
1813 // b) all parm1-4 are in the same cache line.
1814 // Because of parm1-4 are used together, performance seems to be better
1815 // if they are in the same line (not measured though).
1816
1817 struct KMP_ALIGN(32)__attribute__((aligned(32))) {
1818 kmp_int64 parm1;
1819 kmp_int64 parm2;
1820 kmp_int64 parm3;
1821 kmp_int64 parm4;
1822 };
1823
1824 kmp_uint64 ordered_lower;
1825 kmp_uint64 ordered_upper;
1826#if KMP_OS_WINDOWS0
1827 kmp_int64 last_upper;
1828#endif /* KMP_OS_WINDOWS */
1829} dispatch_private_info64_t;
1830#else /* KMP_STATIC_STEAL_ENABLED */
1831typedef struct KMP_ALIGN_CACHE__attribute__((aligned(64))) dispatch_private_info32 {
1832 kmp_int32 lb;
1833 kmp_int32 ub;
1834 kmp_int32 st;
1835 kmp_int32 tc;
1836
1837 kmp_int32 parm1;
1838 kmp_int32 parm2;
1839 kmp_int32 parm3;
1840 kmp_int32 parm4;
1841
1842 kmp_int32 count;
1843
1844 kmp_uint32 ordered_lower;
1845 kmp_uint32 ordered_upper;
1846#if KMP_OS_WINDOWS0
1847 kmp_int32 last_upper;
1848#endif /* KMP_OS_WINDOWS */
1849} dispatch_private_info32_t;
1850
1851typedef struct KMP_ALIGN_CACHE__attribute__((aligned(64))) dispatch_private_info64 {
1852 kmp_int64 lb; /* lower-bound */
1853 kmp_int64 ub; /* upper-bound */
1854 kmp_int64 st; /* stride */
1855 kmp_int64 tc; /* trip count (number of iterations) */
1856
1857 /* parm[1-4] are used in different ways by different scheduling algorithms */
1858 kmp_int64 parm1;
1859 kmp_int64 parm2;
1860 kmp_int64 parm3;
1861 kmp_int64 parm4;
1862
1863 kmp_int64 count; /* current chunk number for static scheduling */
1864
1865 kmp_uint64 ordered_lower;
1866 kmp_uint64 ordered_upper;
1867#if KMP_OS_WINDOWS0
1868 kmp_int64 last_upper;
1869#endif /* KMP_OS_WINDOWS */
1870} dispatch_private_info64_t;
1871#endif /* KMP_STATIC_STEAL_ENABLED */
1872
1873typedef struct KMP_ALIGN_CACHE__attribute__((aligned(64))) dispatch_private_info {
1874 union private_info {
1875 dispatch_private_info32_t p32;
1876 dispatch_private_info64_t p64;
1877 } u;
1878 enum sched_type schedule; /* scheduling algorithm */
1879 kmp_sched_flags_t flags; /* flags (e.g., ordered, nomerge, etc.) */
1880 std::atomic<kmp_uint32> steal_flag; // static_steal only, state of a buffer
1881 kmp_int32 ordered_bumped;
1882 // Stack of buffers for nest of serial regions
1883 struct dispatch_private_info *next;
1884 kmp_int32 type_size; /* the size of types in private_info */
1885#if KMP_USE_HIER_SCHED0
1886 kmp_int32 hier_id;
1887 void *parent; /* hierarchical scheduling parent pointer */
1888#endif
1889 enum cons_type pushed_ws;
1890} dispatch_private_info_t;
1891
1892typedef struct dispatch_shared_info32 {
1893 /* chunk index under dynamic, number of idle threads under static-steal;
1894 iteration index otherwise */
1895 volatile kmp_uint32 iteration;
1896 volatile kmp_int32 num_done;
1897 volatile kmp_uint32 ordered_iteration;
1898 // Dummy to retain the structure size after making ordered_iteration scalar
1899 kmp_int32 ordered_dummy[KMP_MAX_ORDERED8 - 1];
1900} dispatch_shared_info32_t;
1901
1902typedef struct dispatch_shared_info64 {
1903 /* chunk index under dynamic, number of idle threads under static-steal;
1904 iteration index otherwise */
1905 volatile kmp_uint64 iteration;
1906 volatile kmp_int64 num_done;
1907 volatile kmp_uint64 ordered_iteration;
1908 // Dummy to retain the structure size after making ordered_iteration scalar
1909 kmp_int64 ordered_dummy[KMP_MAX_ORDERED8 - 3];
1910} dispatch_shared_info64_t;
1911
1912typedef struct dispatch_shared_info {
1913 union shared_info {
1914 dispatch_shared_info32_t s32;
1915 dispatch_shared_info64_t s64;
1916 } u;
1917 volatile kmp_uint32 buffer_index;
1918 volatile kmp_int32 doacross_buf_idx; // teamwise index
1919 volatile kmp_uint32 *doacross_flags; // shared array of iteration flags (0/1)
1920 kmp_int32 doacross_num_done; // count finished threads
1921#if KMP_USE_HIER_SCHED0
1922 void *hier;
1923#endif
1924#if KMP_USE_HWLOC0
1925 // When linking with libhwloc, the ORDERED EPCC test slows down on big
1926 // machines (> 48 cores). Performance analysis showed that a cache thrash
1927 // was occurring and this padding helps alleviate the problem.
1928 char padding[64];
1929#endif
1930} dispatch_shared_info_t;
1931
1932typedef struct kmp_disp {
1933 /* Vector for ORDERED SECTION */
1934 void (*th_deo_fcn)(int *gtid, int *cid, ident_t *);
1935 /* Vector for END ORDERED SECTION */
1936 void (*th_dxo_fcn)(int *gtid, int *cid, ident_t *);
1937
1938 dispatch_shared_info_t *th_dispatch_sh_current;
1939 dispatch_private_info_t *th_dispatch_pr_current;
1940
1941 dispatch_private_info_t *th_disp_buffer;
1942 kmp_uint32 th_disp_index;
1943 kmp_int32 th_doacross_buf_idx; // thread's doacross buffer index
1944 volatile kmp_uint32 *th_doacross_flags; // pointer to shared array of flags
1945 kmp_int64 *th_doacross_info; // info on loop bounds
1946#if KMP_USE_INTERNODE_ALIGNMENT0
1947 char more_padding[INTERNODE_CACHE_LINE4096];
1948#endif
1949} kmp_disp_t;
1950
1951/* ------------------------------------------------------------------------ */
1952/* Barrier stuff */
1953
1954/* constants for barrier state update */
1955#define KMP_INIT_BARRIER_STATE0 0 /* should probably start from zero */
1956#define KMP_BARRIER_SLEEP_BIT0 0 /* bit used for suspend/sleep part of state */
1957#define KMP_BARRIER_UNUSED_BIT1 1 // bit that must never be set for valid state
1958#define KMP_BARRIER_BUMP_BIT2 2 /* lsb used for bump of go/arrived state */
1959
1960#define KMP_BARRIER_SLEEP_STATE(1 << 0) (1 << KMP_BARRIER_SLEEP_BIT0)
1961#define KMP_BARRIER_UNUSED_STATE(1 << 1) (1 << KMP_BARRIER_UNUSED_BIT1)
1962#define KMP_BARRIER_STATE_BUMP(1 << 2) (1 << KMP_BARRIER_BUMP_BIT2)
1963
1964#if (KMP_BARRIER_SLEEP_BIT0 >= KMP_BARRIER_BUMP_BIT2)
1965#error "Barrier sleep bit must be smaller than barrier bump bit"
1966#endif
1967#if (KMP_BARRIER_UNUSED_BIT1 >= KMP_BARRIER_BUMP_BIT2)
1968#error "Barrier unused bit must be smaller than barrier bump bit"
1969#endif
1970
1971// Constants for release barrier wait state: currently, hierarchical only
1972#define KMP_BARRIER_NOT_WAITING0 0 // Normal state; worker not in wait_sleep
1973#define KMP_BARRIER_OWN_FLAG1 \
1974 1 // Normal state; worker waiting on own b_go flag in release
1975#define KMP_BARRIER_PARENT_FLAG2 \
1976 2 // Special state; worker waiting on parent's b_go flag in release
1977#define KMP_BARRIER_SWITCH_TO_OWN_FLAG3 \
1978 3 // Special state; tells worker to shift from parent to own b_go
1979#define KMP_BARRIER_SWITCHING4 \
1980 4 // Special state; worker resets appropriate flag on wake-up
1981
1982#define KMP_NOT_SAFE_TO_REAP0 \
1983 0 // Thread th_reap_state: not safe to reap (tasking)
1984#define KMP_SAFE_TO_REAP1 1 // Thread th_reap_state: safe to reap (not tasking)
1985
1986// The flag_type describes the storage used for the flag.
1987enum flag_type {
1988 flag32, /**< atomic 32 bit flags */
1989 flag64, /**< 64 bit flags */
1990 atomic_flag64, /**< atomic 64 bit flags */
1991 flag_oncore, /**< special 64-bit flag for on-core barrier (hierarchical) */
1992 flag_unset
1993};
1994
1995enum barrier_type {
1996 bs_plain_barrier = 0, /* 0, All non-fork/join barriers (except reduction
1997 barriers if enabled) */
1998 bs_forkjoin_barrier, /* 1, All fork/join (parallel region) barriers */
1999#if KMP_FAST_REDUCTION_BARRIER1
2000 bs_reduction_barrier, /* 2, All barriers that are used in reduction */
2001#endif // KMP_FAST_REDUCTION_BARRIER
2002 bs_last_barrier /* Just a placeholder to mark the end */
2003};
2004
2005// to work with reduction barriers just like with plain barriers
2006#if !KMP_FAST_REDUCTION_BARRIER1
2007#define bs_reduction_barrier bs_plain_barrier
2008#endif // KMP_FAST_REDUCTION_BARRIER
2009
2010typedef enum kmp_bar_pat { /* Barrier communication patterns */
2011 bp_linear_bar =
2012 0, /* Single level (degenerate) tree */
2013 bp_tree_bar =
2014 1, /* Balanced tree with branching factor 2^n */
2015 bp_hyper_bar = 2, /* Hypercube-embedded tree with min
2016 branching factor 2^n */
2017 bp_hierarchical_bar = 3, /* Machine hierarchy tree */
2018 bp_dist_bar = 4, /* Distributed barrier */
2019 bp_last_bar /* Placeholder to mark the end */
2020} kmp_bar_pat_e;
2021
2022#define KMP_BARRIER_ICV_PUSH1 1
2023
2024/* Record for holding the values of the internal controls stack records */
2025typedef struct kmp_internal_control {
2026 int serial_nesting_level; /* corresponds to the value of the
2027 th_team_serialized field */
2028 kmp_int8 dynamic; /* internal control for dynamic adjustment of threads (per
2029 thread) */
2030 kmp_int8
2031 bt_set; /* internal control for whether blocktime is explicitly set */
2032 int blocktime; /* internal control for blocktime */
2033#if KMP_USE_MONITOR
2034 int bt_intervals; /* internal control for blocktime intervals */
2035#endif
2036 int nproc; /* internal control for #threads for next parallel region (per
2037 thread) */
2038 int thread_limit; /* internal control for thread-limit-var */
2039 int max_active_levels; /* internal control for max_active_levels */
2040 kmp_r_sched_t
2041 sched; /* internal control for runtime schedule {sched,chunk} pair */
2042 kmp_proc_bind_t proc_bind; /* internal control for affinity */
2043 kmp_int32 default_device; /* internal control for default device */
2044 struct kmp_internal_control *next;
2045} kmp_internal_control_t;
2046
2047static inline void copy_icvs(kmp_internal_control_t *dst,
2048 kmp_internal_control_t *src) {
2049 *dst = *src;
2050}
2051
2052/* Thread barrier needs volatile barrier fields */
2053typedef struct KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_bstate {
2054 // th_fixed_icvs is aligned by virtue of kmp_bstate being aligned (and all
2055 // uses of it). It is not explicitly aligned below, because we *don't* want
2056 // it to be padded -- instead, we fit b_go into the same cache line with
2057 // th_fixed_icvs, enabling NGO cache lines stores in the hierarchical barrier.
2058 kmp_internal_control_t th_fixed_icvs; // Initial ICVs for the thread
2059 // Tuck b_go into end of th_fixed_icvs cache line, so it can be stored with
2060 // same NGO store
2061 volatile kmp_uint64 b_go; // STATE => task should proceed (hierarchical)
2062 KMP_ALIGN_CACHE__attribute__((aligned(64))) volatile kmp_uint64
2063 b_arrived; // STATE => task reached synch point.
2064 kmp_uint32 *skip_per_level;
2065 kmp_uint32 my_level;
2066 kmp_int32 parent_tid;
2067 kmp_int32 old_tid;
2068 kmp_uint32 depth;
2069 struct kmp_bstate *parent_bar;
2070 kmp_team_t *team;
2071 kmp_uint64 leaf_state;
2072 kmp_uint32 nproc;
2073 kmp_uint8 base_leaf_kids;
2074 kmp_uint8 leaf_kids;
2075 kmp_uint8 offset;
2076 kmp_uint8 wait_flag;
2077 kmp_uint8 use_oncore_barrier;
2078#if USE_DEBUGGER0
2079 // The following field is intended for the debugger solely. Only the worker
2080 // thread itself accesses this field: the worker increases it by 1 when it
2081 // arrives to a barrier.
2082 KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_uint b_worker_arrived;
2083#endif /* USE_DEBUGGER */
2084} kmp_bstate_t;
2085
2086union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_barrier_union {
2087 double b_align; /* use worst case alignment */
2088 char b_pad[KMP_PAD(kmp_bstate_t, CACHE_LINE)(sizeof(kmp_bstate_t) + (64 - ((sizeof(kmp_bstate_t) - 1) % (
64)) - 1))
];
2089 kmp_bstate_t bb;
2090};
2091
2092typedef union kmp_barrier_union kmp_balign_t;
2093
2094/* Team barrier needs only non-volatile arrived counter */
2095union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_barrier_team_union {
2096 double b_align; /* use worst case alignment */
2097 char b_pad[CACHE_LINE64];
2098 struct {
2099 kmp_uint64 b_arrived; /* STATE => task reached synch point. */
2100#if USE_DEBUGGER0
2101 // The following two fields are indended for the debugger solely. Only
2102 // primary thread of the team accesses these fields: the first one is
2103 // increased by 1 when the primary thread arrives to a barrier, the second
2104 // one is increased by one when all the threads arrived.
2105 kmp_uint b_master_arrived;
2106 kmp_uint b_team_arrived;
2107#endif
2108 };
2109};
2110
2111typedef union kmp_barrier_team_union kmp_balign_team_t;
2112
2113/* Padding for Linux* OS pthreads condition variables and mutexes used to signal
2114 threads when a condition changes. This is to workaround an NPTL bug where
2115 padding was added to pthread_cond_t which caused the initialization routine
2116 to write outside of the structure if compiled on pre-NPTL threads. */
2117#if KMP_OS_WINDOWS0
2118typedef struct kmp_win32_mutex {
2119 /* The Lock */
2120 CRITICAL_SECTION cs;
2121} kmp_win32_mutex_t;
2122
2123typedef struct kmp_win32_cond {
2124 /* Count of the number of waiters. */
2125 int waiters_count_;
2126
2127 /* Serialize access to <waiters_count_> */
2128 kmp_win32_mutex_t waiters_count_lock_;
2129
2130 /* Number of threads to release via a <cond_broadcast> or a <cond_signal> */
2131 int release_count_;
2132
2133 /* Keeps track of the current "generation" so that we don't allow */
2134 /* one thread to steal all the "releases" from the broadcast. */
2135 int wait_generation_count_;
2136
2137 /* A manual-reset event that's used to block and release waiting threads. */
2138 HANDLE event_;
2139} kmp_win32_cond_t;
2140#endif
2141
2142#if KMP_OS_UNIX1
2143
2144union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_cond_union {
2145 double c_align;
2146 char c_pad[CACHE_LINE64];
2147 pthread_cond_t c_cond;
2148};
2149
2150typedef union kmp_cond_union kmp_cond_align_t;
2151
2152union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_mutex_union {
2153 double m_align;
2154 char m_pad[CACHE_LINE64];
2155 pthread_mutex_t m_mutex;
2156};
2157
2158typedef union kmp_mutex_union kmp_mutex_align_t;
2159
2160#endif /* KMP_OS_UNIX */
2161
2162typedef struct kmp_desc_base {
2163 void *ds_stackbase;
2164 size_t ds_stacksize;
2165 int ds_stackgrow;
2166 kmp_thread_t ds_thread;
2167 volatile int ds_tid;
2168 int ds_gtid;
2169#if KMP_OS_WINDOWS0
2170 volatile int ds_alive;
2171 DWORD ds_thread_id;
2172/* ds_thread keeps thread handle on Windows* OS. It is enough for RTL purposes.
2173 However, debugger support (libomp_db) cannot work with handles, because they
2174 uncomparable. For example, debugger requests info about thread with handle h.
2175 h is valid within debugger process, and meaningless within debugee process.
2176 Even if h is duped by call to DuplicateHandle(), so the result h' is valid
2177 within debugee process, but it is a *new* handle which does *not* equal to
2178 any other handle in debugee... The only way to compare handles is convert
2179 them to system-wide ids. GetThreadId() function is available only in
2180 Longhorn and Server 2003. :-( In contrast, GetCurrentThreadId() is available
2181 on all Windows* OS flavours (including Windows* 95). Thus, we have to get
2182 thread id by call to GetCurrentThreadId() from within the thread and save it
2183 to let libomp_db identify threads. */
2184#endif /* KMP_OS_WINDOWS */
2185} kmp_desc_base_t;
2186
2187typedef union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_desc {
2188 double ds_align; /* use worst case alignment */
2189 char ds_pad[KMP_PAD(kmp_desc_base_t, CACHE_LINE)(sizeof(kmp_desc_base_t) + (64 - ((sizeof(kmp_desc_base_t) - 1
) % (64)) - 1))
];
2190 kmp_desc_base_t ds;
2191} kmp_desc_t;
2192
2193typedef struct kmp_local {
2194 volatile int this_construct; /* count of single's encountered by thread */
2195 void *reduce_data;
2196#if KMP_USE_BGET1
2197 void *bget_data;
2198 void *bget_list;
2199#if !USE_CMP_XCHG_FOR_BGET1
2200#ifdef USE_QUEUING_LOCK_FOR_BGET
2201 kmp_lock_t bget_lock; /* Lock for accessing bget free list */
2202#else
2203 kmp_bootstrap_lock_t bget_lock; // Lock for accessing bget free list. Must be
2204// bootstrap lock so we can use it at library
2205// shutdown.
2206#endif /* USE_LOCK_FOR_BGET */
2207#endif /* ! USE_CMP_XCHG_FOR_BGET */
2208#endif /* KMP_USE_BGET */
2209
2210 PACKED_REDUCTION_METHOD_T
2211 packed_reduction_method; /* stored by __kmpc_reduce*(), used by
2212 __kmpc_end_reduce*() */
2213
2214} kmp_local_t;
2215
2216#define KMP_CHECK_UPDATE(a, b)if ((a) != (b)) (a) = (b) \
2217 if ((a) != (b)) \
2218 (a) = (b)
2219#define KMP_CHECK_UPDATE_SYNC(a, b)if ((a) != (b)) (((a))) = (((b))) \
2220 if ((a) != (b)) \
2221 TCW_SYNC_PTR((a), (b))(((a))) = (((b)))
2222
2223#define get__blocktime(xteam, xtid)((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs
.blocktime)
\
2224 ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime)
2225#define get__bt_set(xteam, xtid)((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs
.bt_set)
\
2226 ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set)
2227#if KMP_USE_MONITOR
2228#define get__bt_intervals(xteam, xtid) \
2229 ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals)
2230#endif
2231
2232#define get__dynamic_2(xteam, xtid)((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs
.dynamic)
\
2233 ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.dynamic)
2234#define get__nproc_2(xteam, xtid)((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs
.nproc)
\
2235 ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nproc)
2236#define get__sched_2(xteam, xtid)((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs
.sched)
\
2237 ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.sched)
2238
2239#define set__blocktime_team(xteam, xtid, xval)(((xteam)->t.t_threads[(xtid)]->th.th_current_task->
td_icvs.blocktime) = (xval))
\
2240 (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime) = \
2241 (xval))
2242
2243#if KMP_USE_MONITOR
2244#define set__bt_intervals_team(xteam, xtid, xval) \
2245 (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals) = \
2246 (xval))
2247#endif
2248
2249#define set__bt_set_team(xteam, xtid, xval)(((xteam)->t.t_threads[(xtid)]->th.th_current_task->
td_icvs.bt_set) = (xval))
\
2250 (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set) = (xval))
2251
2252#define set__dynamic(xthread, xval)(((xthread)->th.th_current_task->td_icvs.dynamic) = (xval
))
\
2253 (((xthread)->th.th_current_task->td_icvs.dynamic) = (xval))
2254#define get__dynamic(xthread)(((xthread)->th.th_current_task->td_icvs.dynamic) ? ((!
0)) : (0))
\
2255 (((xthread)->th.th_current_task->td_icvs.dynamic) ? (FTN_TRUE(!0)) : (FTN_FALSE0))
2256
2257#define set__nproc(xthread, xval)(((xthread)->th.th_current_task->td_icvs.nproc) = (xval
))
\
2258 (((xthread)->th.th_current_task->td_icvs.nproc) = (xval))
2259
2260#define set__thread_limit(xthread, xval)(((xthread)->th.th_current_task->td_icvs.thread_limit) =
(xval))
\
2261 (((xthread)->th.th_current_task->td_icvs.thread_limit) = (xval))
2262
2263#define set__max_active_levels(xthread, xval)(((xthread)->th.th_current_task->td_icvs.max_active_levels
) = (xval))
\
2264 (((xthread)->th.th_current_task->td_icvs.max_active_levels) = (xval))
2265
2266#define get__max_active_levels(xthread)((xthread)->th.th_current_task->td_icvs.max_active_levels
)
\
2267 ((xthread)->th.th_current_task->td_icvs.max_active_levels)
2268
2269#define set__sched(xthread, xval)(((xthread)->th.th_current_task->td_icvs.sched) = (xval
))
\
2270 (((xthread)->th.th_current_task->td_icvs.sched) = (xval))
2271
2272#define set__proc_bind(xthread, xval)(((xthread)->th.th_current_task->td_icvs.proc_bind) = (
xval))
\
2273 (((xthread)->th.th_current_task->td_icvs.proc_bind) = (xval))
2274#define get__proc_bind(xthread)((xthread)->th.th_current_task->td_icvs.proc_bind) \
2275 ((xthread)->th.th_current_task->td_icvs.proc_bind)
2276
2277// OpenMP tasking data structures
2278
2279typedef enum kmp_tasking_mode {
2280 tskm_immediate_exec = 0,
2281 tskm_extra_barrier = 1,
2282 tskm_task_teams = 2,
2283 tskm_max = 2
2284} kmp_tasking_mode_t;
2285
2286extern kmp_tasking_mode_t
2287 __kmp_tasking_mode; /* determines how/when to execute tasks */
2288extern int __kmp_task_stealing_constraint;
2289extern int __kmp_enable_task_throttling;
2290extern kmp_int32 __kmp_default_device; // Set via OMP_DEFAULT_DEVICE if
2291// specified, defaults to 0 otherwise
2292// Set via OMP_MAX_TASK_PRIORITY if specified, defaults to 0 otherwise
2293extern kmp_int32 __kmp_max_task_priority;
2294// Set via KMP_TASKLOOP_MIN_TASKS if specified, defaults to 0 otherwise
2295extern kmp_uint64 __kmp_taskloop_min_tasks;
2296
2297/* NOTE: kmp_taskdata_t and kmp_task_t structures allocated in single block with
2298 taskdata first */
2299#define KMP_TASK_TO_TASKDATA(task)(((kmp_taskdata_t *)task) - 1) (((kmp_taskdata_t *)task) - 1)
2300#define KMP_TASKDATA_TO_TASK(taskdata)(kmp_task_t *)(taskdata + 1) (kmp_task_t *)(taskdata + 1)
2301
2302// The tt_found_tasks flag is a signal to all threads in the team that tasks
2303// were spawned and queued since the previous barrier release.
2304#define KMP_TASKING_ENABLED(task_team)((!0) == ((task_team)->tt.tt_found_tasks)) \
2305 (TRUE(!0) == TCR_SYNC_4((task_team)->tt.tt_found_tasks)((task_team)->tt.tt_found_tasks))
2306/*!
2307@ingroup BASIC_TYPES
2308@{
2309*/
2310
2311/*!
2312 */
2313typedef kmp_int32 (*kmp_routine_entry_t)(kmp_int32, void *);
2314
2315typedef union kmp_cmplrdata {
2316 kmp_int32 priority; /**< priority specified by user for the task */
2317 kmp_routine_entry_t
2318 destructors; /* pointer to function to invoke deconstructors of
2319 firstprivate C++ objects */
2320 /* future data */
2321} kmp_cmplrdata_t;
2322
2323/* sizeof_kmp_task_t passed as arg to kmpc_omp_task call */
2324/*!
2325 */
2326typedef struct kmp_task { /* GEH: Shouldn't this be aligned somehow? */
2327 void *shareds; /**< pointer to block of pointers to shared vars */
2328 kmp_routine_entry_t
2329 routine; /**< pointer to routine to call for executing task */
2330 kmp_int32 part_id; /**< part id for the task */
2331 kmp_cmplrdata_t
2332 data1; /* Two known optional additions: destructors and priority */
2333 kmp_cmplrdata_t data2; /* Process destructors first, priority second */
2334 /* future data */
2335 /* private vars */
2336} kmp_task_t;
2337
2338/*!
2339@}
2340*/
2341
2342typedef struct kmp_taskgroup {
2343 std::atomic<kmp_int32> count; // number of allocated and incomplete tasks
2344 std::atomic<kmp_int32>
2345 cancel_request; // request for cancellation of this taskgroup
2346 struct kmp_taskgroup *parent; // parent taskgroup
2347 // Block of data to perform task reduction
2348 void *reduce_data; // reduction related info
2349 kmp_int32 reduce_num_data; // number of data items to reduce
2350 uintptr_t *gomp_data; // gomp reduction data
2351} kmp_taskgroup_t;
2352
2353// forward declarations
2354typedef union kmp_depnode kmp_depnode_t;
2355typedef struct kmp_depnode_list kmp_depnode_list_t;
2356typedef struct kmp_dephash_entry kmp_dephash_entry_t;
2357
2358// macros for checking dep flag as an integer
2359#define KMP_DEP_IN0x1 0x1
2360#define KMP_DEP_OUT0x2 0x2
2361#define KMP_DEP_INOUT0x3 0x3
2362#define KMP_DEP_MTX0x4 0x4
2363#define KMP_DEP_SET0x8 0x8
2364#define KMP_DEP_ALL0x80 0x80
2365// Compiler sends us this info:
2366typedef struct kmp_depend_info {
2367 kmp_intptr_t base_addr;
2368 size_t len;
2369 union {
2370 kmp_uint8 flag; // flag as an unsigned char
2371 struct { // flag as a set of 8 bits
2372 unsigned in : 1;
2373 unsigned out : 1;
2374 unsigned mtx : 1;
2375 unsigned set : 1;
2376 unsigned unused : 3;
2377 unsigned all : 1;
2378 } flags;
2379 };
2380} kmp_depend_info_t;
2381
2382// Internal structures to work with task dependencies:
2383struct kmp_depnode_list {
2384 kmp_depnode_t *node;
2385 kmp_depnode_list_t *next;
2386};
2387
2388// Max number of mutexinoutset dependencies per node
2389#define MAX_MTX_DEPS4 4
2390
2391typedef struct kmp_base_depnode {
2392 kmp_depnode_list_t *successors; /* used under lock */
2393 kmp_task_t *task; /* non-NULL if depnode is active, used under lock */
2394 kmp_lock_t *mtx_locks[MAX_MTX_DEPS4]; /* lock mutexinoutset dependent tasks */
2395 kmp_int32 mtx_num_locks; /* number of locks in mtx_locks array */
2396 kmp_lock_t lock; /* guards shared fields: task, successors */
2397#if KMP_SUPPORT_GRAPH_OUTPUT
2398 kmp_uint32 id;
2399#endif
2400 std::atomic<kmp_int32> npredecessors;
2401 std::atomic<kmp_int32> nrefs;
2402} kmp_base_depnode_t;
2403
2404union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_depnode {
2405 double dn_align; /* use worst case alignment */
2406 char dn_pad[KMP_PAD(kmp_base_depnode_t, CACHE_LINE)(sizeof(kmp_base_depnode_t) + (64 - ((sizeof(kmp_base_depnode_t
) - 1) % (64)) - 1))
];
2407 kmp_base_depnode_t dn;
2408};
2409
2410struct kmp_dephash_entry {
2411 kmp_intptr_t addr;
2412 kmp_depnode_t *last_out;
2413 kmp_depnode_list_t *last_set;
2414 kmp_depnode_list_t *prev_set;
2415 kmp_uint8 last_flag;
2416 kmp_lock_t *mtx_lock; /* is referenced by depnodes w/mutexinoutset dep */
2417 kmp_dephash_entry_t *next_in_bucket;
2418};
2419
2420typedef struct kmp_dephash {
2421 kmp_dephash_entry_t **buckets;
2422 size_t size;
2423 kmp_depnode_t *last_all;
2424 size_t generation;
2425 kmp_uint32 nelements;
2426 kmp_uint32 nconflicts;
2427} kmp_dephash_t;
2428
2429typedef struct kmp_task_affinity_info {
2430 kmp_intptr_t base_addr;
2431 size_t len;
2432 struct {
2433 bool flag1 : 1;
2434 bool flag2 : 1;
2435 kmp_int32 reserved : 30;
2436 } flags;
2437} kmp_task_affinity_info_t;
2438
2439typedef enum kmp_event_type_t {
2440 KMP_EVENT_UNINITIALIZED = 0,
2441 KMP_EVENT_ALLOW_COMPLETION = 1
2442} kmp_event_type_t;
2443
2444typedef struct {
2445 kmp_event_type_t type;
2446 kmp_tas_lock_t lock;
2447 union {
2448 kmp_task_t *task;
2449 } ed;
2450} kmp_event_t;
2451
2452#ifdef BUILD_TIED_TASK_STACK
2453
2454/* Tied Task stack definitions */
2455typedef struct kmp_stack_block {
2456 kmp_taskdata_t *sb_block[TASK_STACK_BLOCK_SIZE];
2457 struct kmp_stack_block *sb_next;
2458 struct kmp_stack_block *sb_prev;
2459} kmp_stack_block_t;
2460
2461typedef struct kmp_task_stack {
2462 kmp_stack_block_t ts_first_block; // first block of stack entries
2463 kmp_taskdata_t **ts_top; // pointer to the top of stack
2464 kmp_int32 ts_entries; // number of entries on the stack
2465} kmp_task_stack_t;
2466
2467#endif // BUILD_TIED_TASK_STACK
2468
2469typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */
2470 /* Compiler flags */ /* Total compiler flags must be 16 bits */
2471 unsigned tiedness : 1; /* task is either tied (1) or untied (0) */
2472 unsigned final : 1; /* task is final(1) so execute immediately */
2473 unsigned merged_if0 : 1; /* no __kmpc_task_{begin/complete}_if0 calls in if0
2474 code path */
2475 unsigned destructors_thunk : 1; /* set if the compiler creates a thunk to
2476 invoke destructors from the runtime */
2477 unsigned proxy : 1; /* task is a proxy task (it will be executed outside the
2478 context of the RTL) */
2479 unsigned priority_specified : 1; /* set if the compiler provides priority
2480 setting for the task */
2481 unsigned detachable : 1; /* 1 == can detach */
2482 unsigned hidden_helper : 1; /* 1 == hidden helper task */
2483 unsigned reserved : 8; /* reserved for compiler use */
2484
2485 /* Library flags */ /* Total library flags must be 16 bits */
2486 unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */
2487 unsigned task_serial : 1; // task is executed immediately (1) or deferred (0)
2488 unsigned tasking_ser : 1; // all tasks in team are either executed immediately
2489 // (1) or may be deferred (0)
2490 unsigned team_serial : 1; // entire team is serial (1) [1 thread] or parallel
2491 // (0) [>= 2 threads]
2492 /* If either team_serial or tasking_ser is set, task team may be NULL */
2493 /* Task State Flags: */
2494 unsigned started : 1; /* 1==started, 0==not started */
2495 unsigned executing : 1; /* 1==executing, 0==not executing */
2496 unsigned complete : 1; /* 1==complete, 0==not complete */
2497 unsigned freed : 1; /* 1==freed, 0==allocated */
2498 unsigned native : 1; /* 1==gcc-compiled task, 0==intel */
2499 unsigned reserved31 : 7; /* reserved for library use */
2500
2501} kmp_tasking_flags_t;
2502
2503struct kmp_taskdata { /* aligned during dynamic allocation */
2504 kmp_int32 td_task_id; /* id, assigned by debugger */
2505 kmp_tasking_flags_t td_flags; /* task flags */
2506 kmp_team_t *td_team; /* team for this task */
2507 kmp_info_p *td_alloc_thread; /* thread that allocated data structures */
2508 /* Currently not used except for perhaps IDB */
2509 kmp_taskdata_t *td_parent; /* parent task */
2510 kmp_int32 td_level; /* task nesting level */
2511 std::atomic<kmp_int32> td_untied_count; // untied task active parts counter
2512 ident_t *td_ident; /* task identifier */
2513 // Taskwait data.
2514 ident_t *td_taskwait_ident;
2515 kmp_uint32 td_taskwait_counter;
2516 kmp_int32 td_taskwait_thread; /* gtid + 1 of thread encountered taskwait */
2517 KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_internal_control_t
2518 td_icvs; /* Internal control variables for the task */
2519 KMP_ALIGN_CACHE__attribute__((aligned(64))) std::atomic<kmp_int32>
2520 td_allocated_child_tasks; /* Child tasks (+ current task) not yet
2521 deallocated */
2522 std::atomic<kmp_int32>
2523 td_incomplete_child_tasks; /* Child tasks not yet complete */
2524 kmp_taskgroup_t
2525 *td_taskgroup; // Each task keeps pointer to its current taskgroup
2526 kmp_dephash_t
2527 *td_dephash; // Dependencies for children tasks are tracked from here
2528 kmp_depnode_t
2529 *td_depnode; // Pointer to graph node if this task has dependencies
2530 kmp_task_team_t *td_task_team;
2531 size_t td_size_alloc; // Size of task structure, including shareds etc.
2532#if defined(KMP_GOMP_COMPAT)
2533 // 4 or 8 byte integers for the loop bounds in GOMP_taskloop
2534 kmp_int32 td_size_loop_bounds;
2535#endif
2536 kmp_taskdata_t *td_last_tied; // keep tied task for task scheduling constraint
2537#if defined(KMP_GOMP_COMPAT)
2538 // GOMP sends in a copy function for copy constructors
2539 void (*td_copy_func)(void *, void *);
2540#endif
2541 kmp_event_t td_allow_completion_event;
2542#if OMPT_SUPPORT1
2543 ompt_task_info_t ompt_task_info;
2544#endif
2545}; // struct kmp_taskdata
2546
2547// Make sure padding above worked
2548KMP_BUILD_ASSERT(sizeof(kmp_taskdata_t) % sizeof(void *) == 0)static_assert(sizeof(kmp_taskdata_t) % sizeof(void *) == 0, "Build condition error"
)
;
2549
2550// Data for task team but per thread
2551typedef struct kmp_base_thread_data {
2552 kmp_info_p *td_thr; // Pointer back to thread info
2553 // Used only in __kmp_execute_tasks_template, maybe not avail until task is
2554 // queued?
2555 kmp_bootstrap_lock_t td_deque_lock; // Lock for accessing deque
2556 kmp_taskdata_t *
2557 *td_deque; // Deque of tasks encountered by td_thr, dynamically allocated
2558 kmp_int32 td_deque_size; // Size of deck
2559 kmp_uint32 td_deque_head; // Head of deque (will wrap)
2560 kmp_uint32 td_deque_tail; // Tail of deque (will wrap)
2561 kmp_int32 td_deque_ntasks; // Number of tasks in deque
2562 // GEH: shouldn't this be volatile since used in while-spin?
2563 kmp_int32 td_deque_last_stolen; // Thread number of last successful steal
2564#ifdef BUILD_TIED_TASK_STACK
2565 kmp_task_stack_t td_susp_tied_tasks; // Stack of suspended tied tasks for task
2566// scheduling constraint
2567#endif // BUILD_TIED_TASK_STACK
2568} kmp_base_thread_data_t;
2569
2570#define TASK_DEQUE_BITS8 8 // Used solely to define INITIAL_TASK_DEQUE_SIZE
2571#define INITIAL_TASK_DEQUE_SIZE(1 << 8) (1 << TASK_DEQUE_BITS8)
2572
2573#define TASK_DEQUE_SIZE(td)((td).td_deque_size) ((td).td_deque_size)
2574#define TASK_DEQUE_MASK(td)((td).td_deque_size - 1) ((td).td_deque_size - 1)
2575
2576typedef union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_thread_data {
2577 kmp_base_thread_data_t td;
2578 double td_align; /* use worst case alignment */
2579 char td_pad[KMP_PAD(kmp_base_thread_data_t, CACHE_LINE)(sizeof(kmp_base_thread_data_t) + (64 - ((sizeof(kmp_base_thread_data_t
) - 1) % (64)) - 1))
];
2580} kmp_thread_data_t;
2581
2582typedef struct kmp_task_pri {
2583 kmp_thread_data_t td;
2584 kmp_int32 priority;
2585 kmp_task_pri *next;
2586} kmp_task_pri_t;
2587
2588// Data for task teams which are used when tasking is enabled for the team
2589typedef struct kmp_base_task_team {
2590 kmp_bootstrap_lock_t
2591 tt_threads_lock; /* Lock used to allocate per-thread part of task team */
2592 /* must be bootstrap lock since used at library shutdown*/
2593
2594 // TODO: check performance vs kmp_tas_lock_t
2595 kmp_bootstrap_lock_t tt_task_pri_lock; /* Lock to access priority tasks */
2596 kmp_task_pri_t *tt_task_pri_list;
2597
2598 kmp_task_team_t *tt_next; /* For linking the task team free list */
2599 kmp_thread_data_t
2600 *tt_threads_data; /* Array of per-thread structures for task team */
2601 /* Data survives task team deallocation */
2602 kmp_int32 tt_found_tasks; /* Have we found tasks and queued them while
2603 executing this team? */
2604 /* TRUE means tt_threads_data is set up and initialized */
2605 kmp_int32 tt_nproc; /* #threads in team */
2606 kmp_int32 tt_max_threads; // # entries allocated for threads_data array
2607 kmp_int32 tt_found_proxy_tasks; // found proxy tasks since last barrier
2608 kmp_int32 tt_untied_task_encountered;
2609 std::atomic<kmp_int32> tt_num_task_pri; // number of priority tasks enqueued
2610 // There is hidden helper thread encountered in this task team so that we must
2611 // wait when waiting on task team
2612 kmp_int32 tt_hidden_helper_task_encountered;
2613
2614 KMP_ALIGN_CACHE__attribute__((aligned(64)))
2615 std::atomic<kmp_int32> tt_unfinished_threads; /* #threads still active */
2616
2617 KMP_ALIGN_CACHE__attribute__((aligned(64)))
2618 volatile kmp_uint32
2619 tt_active; /* is the team still actively executing tasks */
2620} kmp_base_task_team_t;
2621
2622union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_task_team {
2623 kmp_base_task_team_t tt;
2624 double tt_align; /* use worst case alignment */
2625 char tt_pad[KMP_PAD(kmp_base_task_team_t, CACHE_LINE)(sizeof(kmp_base_task_team_t) + (64 - ((sizeof(kmp_base_task_team_t
) - 1) % (64)) - 1))
];
2626};
2627
2628#if (USE_FAST_MEMORY3 == 3) || (USE_FAST_MEMORY3 == 5)
2629// Free lists keep same-size free memory slots for fast memory allocation
2630// routines
2631typedef struct kmp_free_list {
2632 void *th_free_list_self; // Self-allocated tasks free list
2633 void *th_free_list_sync; // Self-allocated tasks stolen/returned by other
2634 // threads
2635 void *th_free_list_other; // Non-self free list (to be returned to owner's
2636 // sync list)
2637} kmp_free_list_t;
2638#endif
2639#if KMP_NESTED_HOT_TEAMS1
2640// Hot teams array keeps hot teams and their sizes for given thread. Hot teams
2641// are not put in teams pool, and they don't put threads in threads pool.
2642typedef struct kmp_hot_team_ptr {
2643 kmp_team_p *hot_team; // pointer to hot_team of given nesting level
2644 kmp_int32 hot_team_nth; // number of threads allocated for the hot_team
2645} kmp_hot_team_ptr_t;
2646#endif
2647typedef struct kmp_teams_size {
2648 kmp_int32 nteams; // number of teams in a league
2649 kmp_int32 nth; // number of threads in each team of the league
2650} kmp_teams_size_t;
2651
2652// This struct stores a thread that acts as a "root" for a contention
2653// group. Contention groups are rooted at kmp_root threads, but also at
2654// each primary thread of each team created in the teams construct.
2655// This struct therefore also stores a thread_limit associated with
2656// that contention group, and a counter to track the number of threads
2657// active in that contention group. Each thread has a list of these: CG
2658// root threads have an entry in their list in which cg_root refers to
2659// the thread itself, whereas other workers in the CG will have a
2660// single entry where cg_root is same as the entry containing their CG
2661// root. When a thread encounters a teams construct, it will add a new
2662// entry to the front of its list, because it now roots a new CG.
2663typedef struct kmp_cg_root {
2664 kmp_info_p *cg_root; // "root" thread for a contention group
2665 // The CG root's limit comes from OMP_THREAD_LIMIT for root threads, or
2666 // thread_limit clause for teams primary threads
2667 kmp_int32 cg_thread_limit;
2668 kmp_int32 cg_nthreads; // Count of active threads in CG rooted at cg_root
2669 struct kmp_cg_root *up; // pointer to higher level CG root in list
2670} kmp_cg_root_t;
2671
2672// OpenMP thread data structures
2673
2674typedef struct KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_base_info {
2675 /* Start with the readonly data which is cache aligned and padded. This is
2676 written before the thread starts working by the primary thread. Uber
2677 masters may update themselves later. Usage does not consider serialized
2678 regions. */
2679 kmp_desc_t th_info;
2680 kmp_team_p *th_team; /* team we belong to */
2681 kmp_root_p *th_root; /* pointer to root of task hierarchy */
2682 kmp_info_p *th_next_pool; /* next available thread in the pool */
2683 kmp_disp_t *th_dispatch; /* thread's dispatch data */
2684 int th_in_pool; /* in thread pool (32 bits for TCR/TCW) */
2685
2686 /* The following are cached from the team info structure */
2687 /* TODO use these in more places as determined to be needed via profiling */
2688 int th_team_nproc; /* number of threads in a team */
2689 kmp_info_p *th_team_master; /* the team's primary thread */
2690 int th_team_serialized; /* team is serialized */
2691 microtask_t th_teams_microtask; /* save entry address for teams construct */
2692 int th_teams_level; /* save initial level of teams construct */
2693/* it is 0 on device but may be any on host */
2694
2695/* The blocktime info is copied from the team struct to the thread struct */
2696/* at the start of a barrier, and the values stored in the team are used */
2697/* at points in the code where the team struct is no longer guaranteed */
2698/* to exist (from the POV of worker threads). */
2699#if KMP_USE_MONITOR
2700 int th_team_bt_intervals;
2701 int th_team_bt_set;
2702#else
2703 kmp_uint64 th_team_bt_intervals;
2704#endif
2705
2706#if KMP_AFFINITY_SUPPORTED1
2707 kmp_affin_mask_t *th_affin_mask; /* thread's current affinity mask */
2708#endif
2709 omp_allocator_handle_t th_def_allocator; /* default allocator */
2710 /* The data set by the primary thread at reinit, then R/W by the worker */
2711 KMP_ALIGN_CACHE__attribute__((aligned(64))) int
2712 th_set_nproc; /* if > 0, then only use this request for the next fork */
2713#if KMP_NESTED_HOT_TEAMS1
2714 kmp_hot_team_ptr_t *th_hot_teams; /* array of hot teams */
2715#endif
2716 kmp_proc_bind_t
2717 th_set_proc_bind; /* if != proc_bind_default, use request for next fork */
2718 kmp_teams_size_t
2719 th_teams_size; /* number of teams/threads in teams construct */
2720#if KMP_AFFINITY_SUPPORTED1
2721 int th_current_place; /* place currently bound to */
2722 int th_new_place; /* place to bind to in par reg */
2723 int th_first_place; /* first place in partition */
2724 int th_last_place; /* last place in partition */
2725#endif
2726 int th_prev_level; /* previous level for affinity format */
2727 int th_prev_num_threads; /* previous num_threads for affinity format */
2728#if USE_ITT_BUILD1
2729 kmp_uint64 th_bar_arrive_time; /* arrival to barrier timestamp */
2730 kmp_uint64 th_bar_min_time; /* minimum arrival time at the barrier */
2731 kmp_uint64 th_frame_time; /* frame timestamp */
2732#endif /* USE_ITT_BUILD */
2733 kmp_local_t th_local;
2734 struct private_common *th_pri_head;
2735
2736 /* Now the data only used by the worker (after initial allocation) */
2737 /* TODO the first serial team should actually be stored in the info_t
2738 structure. this will help reduce initial allocation overhead */
2739 KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_team_p
2740 *th_serial_team; /*serialized team held in reserve*/
2741
2742#if OMPT_SUPPORT1
2743 ompt_thread_info_t ompt_thread_info;
2744#endif
2745
2746 /* The following are also read by the primary thread during reinit */
2747 struct common_table *th_pri_common;
2748
2749 volatile kmp_uint32 th_spin_here; /* thread-local location for spinning */
2750 /* while awaiting queuing lock acquire */
2751
2752 volatile void *th_sleep_loc; // this points at a kmp_flag<T>
2753 flag_type th_sleep_loc_type; // enum type of flag stored in th_sleep_loc
2754
2755 ident_t *th_ident;
2756 unsigned th_x; // Random number generator data
2757 unsigned th_a; // Random number generator data
2758
2759 /* Tasking-related data for the thread */
2760 kmp_task_team_t *th_task_team; // Task team struct
2761 kmp_taskdata_t *th_current_task; // Innermost Task being executed
2762 kmp_uint8 th_task_state; // alternating 0/1 for task team identification
2763 kmp_uint8 *th_task_state_memo_stack; // Stack holding memos of th_task_state
2764 // at nested levels
2765 kmp_uint32 th_task_state_top; // Top element of th_task_state_memo_stack
2766 kmp_uint32 th_task_state_stack_sz; // Size of th_task_state_memo_stack
2767 kmp_uint32 th_reap_state; // Non-zero indicates thread is not
2768 // tasking, thus safe to reap
2769
2770 /* More stuff for keeping track of active/sleeping threads (this part is
2771 written by the worker thread) */
2772 kmp_uint8 th_active_in_pool; // included in count of #active threads in pool
2773 int th_active; // ! sleeping; 32 bits for TCR/TCW
2774 std::atomic<kmp_uint32> th_used_in_team; // Flag indicating use in team
2775 // 0 = not used in team; 1 = used in team;
2776 // 2 = transitioning to not used in team; 3 = transitioning to used in team
2777 struct cons_header *th_cons; // used for consistency check
2778#if KMP_USE_HIER_SCHED0
2779 // used for hierarchical scheduling
2780 kmp_hier_private_bdata_t *th_hier_bar_data;
2781#endif
2782
2783 /* Add the syncronizing data which is cache aligned and padded. */
2784 KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_balign_t th_bar[bs_last_barrier];
2785
2786 KMP_ALIGN_CACHE__attribute__((aligned(64))) volatile kmp_int32
2787 th_next_waiting; /* gtid+1 of next thread on lock wait queue, 0 if none */
2788
2789#if (USE_FAST_MEMORY3 == 3) || (USE_FAST_MEMORY3 == 5)
2790#define NUM_LISTS4 4
2791 kmp_free_list_t th_free_lists[NUM_LISTS4]; // Free lists for fast memory
2792// allocation routines
2793#endif
2794
2795#if KMP_OS_WINDOWS0
2796 kmp_win32_cond_t th_suspend_cv;
2797 kmp_win32_mutex_t th_suspend_mx;
2798 std::atomic<int> th_suspend_init;
2799#endif
2800#if KMP_OS_UNIX1
2801 kmp_cond_align_t th_suspend_cv;
2802 kmp_mutex_align_t th_suspend_mx;
2803 std::atomic<int> th_suspend_init_count;
2804#endif
2805
2806#if USE_ITT_BUILD1
2807 kmp_itt_mark_t th_itt_mark_single;
2808// alignment ???
2809#endif /* USE_ITT_BUILD */
2810#if KMP_STATS_ENABLED0
2811 kmp_stats_list *th_stats;
2812#endif
2813#if KMP_OS_UNIX1
2814 std::atomic<bool> th_blocking;
2815#endif
2816 kmp_cg_root_t *th_cg_roots; // list of cg_roots associated with this thread
2817} kmp_base_info_t;
2818
2819typedef union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_info {
2820 double th_align; /* use worst case alignment */
2821 char th_pad[KMP_PAD(kmp_base_info_t, CACHE_LINE)(sizeof(kmp_base_info_t) + (64 - ((sizeof(kmp_base_info_t) - 1
) % (64)) - 1))
];
2822 kmp_base_info_t th;
2823} kmp_info_t;
2824
2825// OpenMP thread team data structures
2826
2827typedef struct kmp_base_data {
2828 volatile kmp_uint32 t_value;
2829} kmp_base_data_t;
2830
2831typedef union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_sleep_team {
2832 double dt_align; /* use worst case alignment */
2833 char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)(sizeof(kmp_base_data_t) + (64 - ((sizeof(kmp_base_data_t) - 1
) % (64)) - 1))
];
2834 kmp_base_data_t dt;
2835} kmp_sleep_team_t;
2836
2837typedef union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_ordered_team {
2838 double dt_align; /* use worst case alignment */
2839 char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)(sizeof(kmp_base_data_t) + (64 - ((sizeof(kmp_base_data_t) - 1
) % (64)) - 1))
];
2840 kmp_base_data_t dt;
2841} kmp_ordered_team_t;
2842
2843typedef int (*launch_t)(int gtid);
2844
2845/* Minimum number of ARGV entries to malloc if necessary */
2846#define KMP_MIN_MALLOC_ARGV_ENTRIES100 100
2847
2848// Set up how many argv pointers will fit in cache lines containing
2849// t_inline_argv. Historically, we have supported at least 96 bytes. Using a
2850// larger value for more space between the primary write/worker read section and
2851// read/write by all section seems to buy more performance on EPCC PARALLEL.
2852#if KMP_ARCH_X860 || KMP_ARCH_X86_641
2853#define KMP_INLINE_ARGV_BYTES(4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 * sizeof
(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) % 64))
\
2854 (4 * CACHE_LINE64 - \
2855 ((3 * KMP_PTR_SKIP(sizeof(void *)) + 2 * sizeof(int) + 2 * sizeof(kmp_int8) + \
2856 sizeof(kmp_int16) + sizeof(kmp_uint32)) % \
2857 CACHE_LINE64))
2858#else
2859#define KMP_INLINE_ARGV_BYTES(4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 * sizeof
(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) % 64))
\
2860 (2 * CACHE_LINE64 - ((3 * KMP_PTR_SKIP(sizeof(void *)) + 2 * sizeof(int)) % CACHE_LINE64))
2861#endif
2862#define KMP_INLINE_ARGV_ENTRIES(int)((4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 *
sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) %
64)) / (sizeof(void *)))
(int)(KMP_INLINE_ARGV_BYTES(4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 * sizeof
(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) % 64))
/ KMP_PTR_SKIP(sizeof(void *)))
2863
2864typedef struct KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_base_team {
2865 // Synchronization Data
2866 // ---------------------------------------------------------------------------
2867 KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_ordered_team_t t_ordered;
2868 kmp_balign_team_t t_bar[bs_last_barrier];
2869 std::atomic<int> t_construct; // count of single directive encountered by team
2870 char pad[sizeof(kmp_lock_t)]; // padding to maintain performance on big iron
2871
2872 // [0] - parallel / [1] - worksharing task reduction data shared by taskgroups
2873 std::atomic<void *> t_tg_reduce_data[2]; // to support task modifier
2874 std::atomic<int> t_tg_fini_counter[2]; // sync end of task reductions
2875
2876 // Primary thread only
2877 // ---------------------------------------------------------------------------
2878 KMP_ALIGN_CACHE__attribute__((aligned(64))) int t_master_tid; // tid of primary thread in parent team
2879 int t_master_this_cons; // "this_construct" single counter of primary thread
2880 // in parent team
2881 ident_t *t_ident; // if volatile, have to change too much other crud to
2882 // volatile too
2883 kmp_team_p *t_parent; // parent team
2884 kmp_team_p *t_next_pool; // next free team in the team pool
2885 kmp_disp_t *t_dispatch; // thread's dispatch data
2886 kmp_task_team_t *t_task_team[2]; // Task team struct; switch between 2
2887 kmp_proc_bind_t t_proc_bind; // bind type for par region
2888#if USE_ITT_BUILD1
2889 kmp_uint64 t_region_time; // region begin timestamp
2890#endif /* USE_ITT_BUILD */
2891
2892 // Primary thread write, workers read
2893 // --------------------------------------------------------------------------
2894 KMP_ALIGN_CACHE__attribute__((aligned(64))) void **t_argv;
2895 int t_argc;
2896 int t_nproc; // number of threads in team
2897 microtask_t t_pkfn;
2898 launch_t t_invoke; // procedure to launch the microtask
2899
2900#if OMPT_SUPPORT1
2901 ompt_team_info_t ompt_team_info;
2902 ompt_lw_taskteam_t *ompt_serialized_team_info;
2903#endif
2904
2905#if KMP_ARCH_X860 || KMP_ARCH_X86_641
2906 kmp_int8 t_fp_control_saved;
2907 kmp_int8 t_pad2b;
2908 kmp_int16 t_x87_fpu_control_word; // FP control regs
2909 kmp_uint32 t_mxcsr;
2910#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
2911
2912 void *t_inline_argv[KMP_INLINE_ARGV_ENTRIES(int)((4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 *
sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) %
64)) / (sizeof(void *)))
];
2913
2914 KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_info_t **t_threads;
2915 kmp_taskdata_t
2916 *t_implicit_task_taskdata; // Taskdata for the thread's implicit task
2917 int t_level; // nested parallel level
2918
2919 KMP_ALIGN_CACHE__attribute__((aligned(64))) int t_max_argc;
2920 int t_max_nproc; // max threads this team can handle (dynamically expandable)
2921 int t_serialized; // levels deep of serialized teams
2922 dispatch_shared_info_t *t_disp_buffer; // buffers for dispatch system
2923 int t_id; // team's id, assigned by debugger.
2924 int t_active_level; // nested active parallel level
2925 kmp_r_sched_t t_sched; // run-time schedule for the team
2926#if KMP_AFFINITY_SUPPORTED1
2927 int t_first_place; // first & last place in parent thread's partition.
2928 int t_last_place; // Restore these values to primary thread after par region.
2929#endif // KMP_AFFINITY_SUPPORTED
2930 int t_display_affinity;
2931 int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via
2932 // omp_set_num_threads() call
2933 omp_allocator_handle_t t_def_allocator; /* default allocator */
2934
2935// Read/write by workers as well
2936#if (KMP_ARCH_X860 || KMP_ARCH_X86_641)
2937 // Using CACHE_LINE=64 reduces memory footprint, but causes a big perf
2938 // regression of epcc 'parallel' and 'barrier' on fxe256lin01. This extra
2939 // padding serves to fix the performance of epcc 'parallel' and 'barrier' when
2940 // CACHE_LINE=64. TODO: investigate more and get rid if this padding.
2941 char dummy_padding[1024];
2942#endif
2943 // Internal control stack for additional nested teams.
2944 KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_internal_control_t *t_control_stack_top;
2945 // for SERIALIZED teams nested 2 or more levels deep
2946 // typed flag to store request state of cancellation
2947 std::atomic<kmp_int32> t_cancel_request;
2948 int t_master_active; // save on fork, restore on join
2949 void *t_copypriv_data; // team specific pointer to copyprivate data array
2950#if KMP_OS_WINDOWS0
2951 std::atomic<kmp_uint32> t_copyin_counter;
2952#endif
2953#if USE_ITT_BUILD1
2954 void *t_stack_id; // team specific stack stitching id (for ittnotify)
2955#endif /* USE_ITT_BUILD */
2956 distributedBarrier *b; // Distributed barrier data associated with team
2957} kmp_base_team_t;
2958
2959union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_team {
2960 kmp_base_team_t t;
2961 double t_align; /* use worst case alignment */
2962 char t_pad[KMP_PAD(kmp_base_team_t, CACHE_LINE)(sizeof(kmp_base_team_t) + (64 - ((sizeof(kmp_base_team_t) - 1
) % (64)) - 1))
];
2963};
2964
2965typedef union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_time_global {
2966 double dt_align; /* use worst case alignment */
2967 char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)(sizeof(kmp_base_data_t) + (64 - ((sizeof(kmp_base_data_t) - 1
) % (64)) - 1))
];
2968 kmp_base_data_t dt;
2969} kmp_time_global_t;
2970
2971typedef struct kmp_base_global {
2972 /* cache-aligned */
2973 kmp_time_global_t g_time;
2974
2975 /* non cache-aligned */
2976 volatile int g_abort;
2977 volatile int g_done;
2978
2979 int g_dynamic;
2980 enum dynamic_mode g_dynamic_mode;
2981} kmp_base_global_t;
2982
2983typedef union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_global {
2984 kmp_base_global_t g;
2985 double g_align; /* use worst case alignment */
2986 char g_pad[KMP_PAD(kmp_base_global_t, CACHE_LINE)(sizeof(kmp_base_global_t) + (64 - ((sizeof(kmp_base_global_t
) - 1) % (64)) - 1))
];
2987} kmp_global_t;
2988
2989typedef struct kmp_base_root {
2990 // TODO: GEH - combine r_active with r_in_parallel then r_active ==
2991 // (r_in_parallel>= 0)
2992 // TODO: GEH - then replace r_active with t_active_levels if we can to reduce
2993 // the synch overhead or keeping r_active
2994 volatile int r_active; /* TRUE if some region in a nest has > 1 thread */
2995 // keeps a count of active parallel regions per root
2996 std::atomic<int> r_in_parallel;
2997 // GEH: This is misnamed, should be r_active_levels
2998 kmp_team_t *r_root_team;
2999 kmp_team_t *r_hot_team;
3000 kmp_info_t *r_uber_thread;
3001 kmp_lock_t r_begin_lock;
3002 volatile int r_begin;
3003 int r_blocktime; /* blocktime for this root and descendants */
3004#if KMP_AFFINITY_SUPPORTED1
3005 int r_affinity_assigned;
3006#endif // KMP_AFFINITY_SUPPORTED
3007} kmp_base_root_t;
3008
3009typedef union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_root {
3010 kmp_base_root_t r;
3011 double r_align; /* use worst case alignment */
3012 char r_pad[KMP_PAD(kmp_base_root_t, CACHE_LINE)(sizeof(kmp_base_root_t) + (64 - ((sizeof(kmp_base_root_t) - 1
) % (64)) - 1))
];
3013} kmp_root_t;
3014
3015struct fortran_inx_info {
3016 kmp_int32 data;
3017};
3018
3019// This list type exists to hold old __kmp_threads arrays so that
3020// old references to them may complete while reallocation takes place when
3021// expanding the array. The items in this list are kept alive until library
3022// shutdown.
3023typedef struct kmp_old_threads_list_t {
3024 kmp_info_t **threads;
3025 struct kmp_old_threads_list_t *next;
3026} kmp_old_threads_list_t;
3027
3028/* ------------------------------------------------------------------------ */
3029
3030extern int __kmp_settings;
3031extern int __kmp_duplicate_library_ok;
3032#if USE_ITT_BUILD1
3033extern int __kmp_forkjoin_frames;
3034extern int __kmp_forkjoin_frames_mode;
3035#endif
3036extern PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method;
3037extern int __kmp_determ_red;
3038
3039#ifdef KMP_DEBUG1
3040extern int kmp_a_debug;
3041extern int kmp_b_debug;
3042extern int kmp_c_debug;
3043extern int kmp_d_debug;
3044extern int kmp_e_debug;
3045extern int kmp_f_debug;
3046#endif /* KMP_DEBUG */
3047
3048/* For debug information logging using rotating buffer */
3049#define KMP_DEBUG_BUF_LINES_INIT512 512
3050#define KMP_DEBUG_BUF_LINES_MIN1 1
3051
3052#define KMP_DEBUG_BUF_CHARS_INIT128 128
3053#define KMP_DEBUG_BUF_CHARS_MIN2 2
3054
3055extern int
3056 __kmp_debug_buf; /* TRUE means use buffer, FALSE means print to stderr */
3057extern int __kmp_debug_buf_lines; /* How many lines of debug stored in buffer */
3058extern int
3059 __kmp_debug_buf_chars; /* How many characters allowed per line in buffer */
3060extern int __kmp_debug_buf_atomic; /* TRUE means use atomic update of buffer
3061 entry pointer */
3062
3063extern char *__kmp_debug_buffer; /* Debug buffer itself */
3064extern std::atomic<int> __kmp_debug_count; /* Counter for number of lines
3065 printed in buffer so far */
3066extern int __kmp_debug_buf_warn_chars; /* Keep track of char increase
3067 recommended in warnings */
3068/* end rotating debug buffer */
3069
3070#ifdef KMP_DEBUG1
3071extern int __kmp_par_range; /* +1 => only go par for constructs in range */
3072
3073#define KMP_PAR_RANGE_ROUTINE_LEN1024 1024
3074extern char __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN1024];
3075#define KMP_PAR_RANGE_FILENAME_LEN1024 1024
3076extern char __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN1024];
3077extern int __kmp_par_range_lb;
3078extern int __kmp_par_range_ub;
3079#endif
3080
3081/* For printing out dynamic storage map for threads and teams */
3082extern int
3083 __kmp_storage_map; /* True means print storage map for threads and teams */
3084extern int __kmp_storage_map_verbose; /* True means storage map includes
3085 placement info */
3086extern int __kmp_storage_map_verbose_specified;
3087
3088#if KMP_ARCH_X860 || KMP_ARCH_X86_641
3089extern kmp_cpuinfo_t __kmp_cpuinfo;
3090static inline bool __kmp_is_hybrid_cpu() { return __kmp_cpuinfo.flags.hybrid; }
3091#elif KMP_OS_DARWIN0 && KMP_ARCH_AARCH640
3092static inline bool __kmp_is_hybrid_cpu() { return true; }
3093#else
3094static inline bool __kmp_is_hybrid_cpu() { return false; }
3095#endif
3096
3097extern volatile int __kmp_init_serial;
3098extern volatile int __kmp_init_gtid;
3099extern volatile int __kmp_init_common;
3100extern volatile int __kmp_need_register_serial;
3101extern volatile int __kmp_init_middle;
3102extern volatile int __kmp_init_parallel;
3103#if KMP_USE_MONITOR
3104extern volatile int __kmp_init_monitor;
3105#endif
3106extern volatile int __kmp_init_user_locks;
3107extern volatile int __kmp_init_hidden_helper_threads;
3108extern int __kmp_init_counter;
3109extern int __kmp_root_counter;
3110extern int __kmp_version;
3111
3112/* list of address of allocated caches for commons */
3113extern kmp_cached_addr_t *__kmp_threadpriv_cache_list;
3114
3115/* Barrier algorithm types and options */
3116extern kmp_uint32 __kmp_barrier_gather_bb_dflt;
3117extern kmp_uint32 __kmp_barrier_release_bb_dflt;
3118extern kmp_bar_pat_e __kmp_barrier_gather_pat_dflt;
3119extern kmp_bar_pat_e __kmp_barrier_release_pat_dflt;
3120extern kmp_uint32 __kmp_barrier_gather_branch_bits[bs_last_barrier];
3121extern kmp_uint32 __kmp_barrier_release_branch_bits[bs_last_barrier];
3122extern kmp_bar_pat_e __kmp_barrier_gather_pattern[bs_last_barrier];
3123extern kmp_bar_pat_e __kmp_barrier_release_pattern[bs_last_barrier];
3124extern char const *__kmp_barrier_branch_bit_env_name[bs_last_barrier];
3125extern char const *__kmp_barrier_pattern_env_name[bs_last_barrier];
3126extern char const *__kmp_barrier_type_name[bs_last_barrier];
3127extern char const *__kmp_barrier_pattern_name[bp_last_bar];
3128
3129/* Global Locks */
3130extern kmp_bootstrap_lock_t __kmp_initz_lock; /* control initialization */
3131extern kmp_bootstrap_lock_t __kmp_forkjoin_lock; /* control fork/join access */
3132extern kmp_bootstrap_lock_t __kmp_task_team_lock;
3133extern kmp_bootstrap_lock_t
3134 __kmp_exit_lock; /* exit() is not always thread-safe */
3135#if KMP_USE_MONITOR
3136extern kmp_bootstrap_lock_t
3137 __kmp_monitor_lock; /* control monitor thread creation */
3138#endif
3139extern kmp_bootstrap_lock_t
3140 __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and
3141 __kmp_threads expansion to co-exist */
3142
3143extern kmp_lock_t __kmp_global_lock; /* control OS/global access */
3144extern kmp_queuing_lock_t __kmp_dispatch_lock; /* control dispatch access */
3145extern kmp_lock_t __kmp_debug_lock; /* control I/O access for KMP_DEBUG */
3146
3147extern enum library_type __kmp_library;
3148
3149extern enum sched_type __kmp_sched; /* default runtime scheduling */
3150extern enum sched_type __kmp_static; /* default static scheduling method */
3151extern enum sched_type __kmp_guided; /* default guided scheduling method */
3152extern enum sched_type __kmp_auto; /* default auto scheduling method */
3153extern int __kmp_chunk; /* default runtime chunk size */
3154extern int __kmp_force_monotonic; /* whether monotonic scheduling forced */
3155
3156extern size_t __kmp_stksize; /* stack size per thread */
3157#if KMP_USE_MONITOR
3158extern size_t __kmp_monitor_stksize; /* stack size for monitor thread */
3159#endif
3160extern size_t __kmp_stkoffset; /* stack offset per thread */
3161extern int __kmp_stkpadding; /* Should we pad root thread(s) stack */
3162
3163extern size_t
3164 __kmp_malloc_pool_incr; /* incremental size of pool for kmp_malloc() */
3165extern int __kmp_env_stksize; /* was KMP_STACKSIZE specified? */
3166extern int __kmp_env_blocktime; /* was KMP_BLOCKTIME specified? */
3167extern int __kmp_env_checks; /* was KMP_CHECKS specified? */
3168extern int __kmp_env_consistency_check; // was KMP_CONSISTENCY_CHECK specified?
3169extern int __kmp_generate_warnings; /* should we issue warnings? */
3170extern int __kmp_reserve_warn; /* have we issued reserve_threads warning? */
3171
3172#ifdef DEBUG_SUSPEND
3173extern int __kmp_suspend_count; /* count inside __kmp_suspend_template() */
3174#endif
3175
3176extern kmp_int32 __kmp_use_yield;
3177extern kmp_int32 __kmp_use_yield_exp_set;
3178extern kmp_uint32 __kmp_yield_init;
3179extern kmp_uint32 __kmp_yield_next;
3180extern kmp_uint64 __kmp_pause_init;
3181
3182/* ------------------------------------------------------------------------- */
3183extern int __kmp_allThreadsSpecified;
3184
3185extern size_t __kmp_align_alloc;
3186/* following data protected by initialization routines */
3187extern int __kmp_xproc; /* number of processors in the system */
3188extern int __kmp_avail_proc; /* number of processors available to the process */
3189extern size_t __kmp_sys_min_stksize; /* system-defined minimum stack size */
3190extern int __kmp_sys_max_nth; /* system-imposed maximum number of threads */
3191// maximum total number of concurrently-existing threads on device
3192extern int __kmp_max_nth;
3193// maximum total number of concurrently-existing threads in a contention group
3194extern int __kmp_cg_max_nth;
3195extern int __kmp_teams_max_nth; // max threads used in a teams construct
3196extern int __kmp_threads_capacity; /* capacity of the arrays __kmp_threads and
3197 __kmp_root */
3198extern int __kmp_dflt_team_nth; /* default number of threads in a parallel
3199 region a la OMP_NUM_THREADS */
3200extern int __kmp_dflt_team_nth_ub; /* upper bound on "" determined at serial
3201 initialization */
3202extern int __kmp_tp_capacity; /* capacity of __kmp_threads if threadprivate is
3203 used (fixed) */
3204extern int __kmp_tp_cached; /* whether threadprivate cache has been created
3205 (__kmpc_threadprivate_cached()) */
3206extern int __kmp_dflt_blocktime; /* number of milliseconds to wait before
3207 blocking (env setting) */
3208extern bool __kmp_wpolicy_passive; /* explicitly set passive wait policy */
3209#if KMP_USE_MONITOR
3210extern int
3211 __kmp_monitor_wakeups; /* number of times monitor wakes up per second */
3212extern int __kmp_bt_intervals; /* number of monitor timestamp intervals before
3213 blocking */
3214#endif
3215#ifdef KMP_ADJUST_BLOCKTIME1
3216extern int __kmp_zero_bt; /* whether blocktime has been forced to zero */
3217#endif /* KMP_ADJUST_BLOCKTIME */
3218#ifdef KMP_DFLT_NTH_CORES
3219extern int __kmp_ncores; /* Total number of cores for threads placement */
3220#endif
3221/* Number of millisecs to delay on abort for Intel(R) VTune(TM) tools */
3222extern int __kmp_abort_delay;
3223
3224extern int __kmp_need_register_atfork_specified;
3225extern int __kmp_need_register_atfork; /* At initialization, call pthread_atfork
3226 to install fork handler */
3227extern int __kmp_gtid_mode; /* Method of getting gtid, values:
3228 0 - not set, will be set at runtime
3229 1 - using stack search
3230 2 - dynamic TLS (pthread_getspecific(Linux* OS/OS
3231 X*) or TlsGetValue(Windows* OS))
3232 3 - static TLS (__declspec(thread) __kmp_gtid),
3233 Linux* OS .so only. */
3234extern int
3235 __kmp_adjust_gtid_mode; /* If true, adjust method based on #threads */
3236#ifdef KMP_TDATA_GTID1
3237extern KMP_THREAD_LOCAL__thread int __kmp_gtid;
3238#endif
3239extern int __kmp_tls_gtid_min; /* #threads below which use sp search for gtid */
3240extern int __kmp_foreign_tp; // If true, separate TP var for each foreign thread
3241#if KMP_ARCH_X860 || KMP_ARCH_X86_641
3242extern int __kmp_inherit_fp_control; // copy fp creg(s) parent->workers at fork
3243extern kmp_int16 __kmp_init_x87_fpu_control_word; // init thread's FP ctrl reg
3244extern kmp_uint32 __kmp_init_mxcsr; /* init thread's mxscr */
3245#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
3246
3247// max_active_levels for nested parallelism enabled by default via
3248// OMP_MAX_ACTIVE_LEVELS, OMP_NESTED, OMP_NUM_THREADS, and OMP_PROC_BIND
3249extern int __kmp_dflt_max_active_levels;
3250// Indicates whether value of __kmp_dflt_max_active_levels was already
3251// explicitly set by OMP_MAX_ACTIVE_LEVELS or OMP_NESTED=false
3252extern bool __kmp_dflt_max_active_levels_set;
3253extern int __kmp_dispatch_num_buffers; /* max possible dynamic loops in
3254 concurrent execution per team */
3255#if KMP_NESTED_HOT_TEAMS1
3256extern int __kmp_hot_teams_mode;
3257extern int __kmp_hot_teams_max_level;
3258#endif
3259
3260#if KMP_OS_LINUX1
3261extern enum clock_function_type __kmp_clock_function;
3262extern int __kmp_clock_function_param;
3263#endif /* KMP_OS_LINUX */
3264
3265#if KMP_MIC_SUPPORTED((0 || 1) && (1 || 0))
3266extern enum mic_type __kmp_mic_type;
3267#endif
3268
3269#ifdef USE_LOAD_BALANCE1
3270extern double __kmp_load_balance_interval; // load balance algorithm interval
3271#endif /* USE_LOAD_BALANCE */
3272
3273// OpenMP 3.1 - Nested num threads array
3274typedef struct kmp_nested_nthreads_t {
3275 int *nth;
3276 int size;
3277 int used;
3278} kmp_nested_nthreads_t;
3279
3280extern kmp_nested_nthreads_t __kmp_nested_nth;
3281
3282#if KMP_USE_ADAPTIVE_LOCKS(0 || 1) && !0
3283
3284// Parameters for the speculative lock backoff system.
3285struct kmp_adaptive_backoff_params_t {
3286 // Number of soft retries before it counts as a hard retry.
3287 kmp_uint32 max_soft_retries;
3288 // Badness is a bit mask : 0,1,3,7,15,... on each hard failure we move one to
3289 // the right
3290 kmp_uint32 max_badness;
3291};
3292
3293extern kmp_adaptive_backoff_params_t __kmp_adaptive_backoff_params;
3294
3295#if KMP_DEBUG_ADAPTIVE_LOCKS0
3296extern const char *__kmp_speculative_statsfile;
3297#endif
3298
3299#endif // KMP_USE_ADAPTIVE_LOCKS
3300
3301extern int __kmp_display_env; /* TRUE or FALSE */
3302extern int __kmp_display_env_verbose; /* TRUE if OMP_DISPLAY_ENV=VERBOSE */
3303extern int __kmp_omp_cancellation; /* TRUE or FALSE */
3304extern int __kmp_nteams;
3305extern int __kmp_teams_thread_limit;
3306
3307/* ------------------------------------------------------------------------- */
3308
3309/* the following are protected by the fork/join lock */
3310/* write: lock read: anytime */
3311extern kmp_info_t **__kmp_threads; /* Descriptors for the threads */
3312/* Holds old arrays of __kmp_threads until library shutdown */
3313extern kmp_old_threads_list_t *__kmp_old_threads_list;
3314/* read/write: lock */
3315extern volatile kmp_team_t *__kmp_team_pool;
3316extern volatile kmp_info_t *__kmp_thread_pool;
3317extern kmp_info_t *__kmp_thread_pool_insert_pt;
3318
3319// total num threads reachable from some root thread including all root threads
3320extern volatile int __kmp_nth;
3321/* total number of threads reachable from some root thread including all root
3322 threads, and those in the thread pool */
3323extern volatile int __kmp_all_nth;
3324extern std::atomic<int> __kmp_thread_pool_active_nth;
3325
3326extern kmp_root_t **__kmp_root; /* root of thread hierarchy */
3327/* end data protected by fork/join lock */
3328/* ------------------------------------------------------------------------- */
3329
3330#define __kmp_get_gtid()__kmp_get_global_thread_id() __kmp_get_global_thread_id()
3331#define __kmp_entry_gtid()__kmp_get_global_thread_id_reg() __kmp_get_global_thread_id_reg()
3332#define __kmp_get_tid()(__kmp_tid_from_gtid(__kmp_get_global_thread_id())) (__kmp_tid_from_gtid(__kmp_get_gtid()__kmp_get_global_thread_id()))
3333#define __kmp_get_team()(__kmp_threads[(__kmp_get_global_thread_id())]->th.th_team
)
(__kmp_threads[(__kmp_get_gtid()__kmp_get_global_thread_id())]->th.th_team)
3334#define __kmp_get_thread()(__kmp_thread_from_gtid(__kmp_get_global_thread_id())) (__kmp_thread_from_gtid(__kmp_get_gtid()__kmp_get_global_thread_id()))
3335
3336// AT: Which way is correct?
3337// AT: 1. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc;
3338// AT: 2. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team_nproc;
3339#define __kmp_get_team_num_threads(gtid)(__kmp_threads[(gtid)]->th.th_team->t.t_nproc) \
3340 (__kmp_threads[(gtid)]->th.th_team->t.t_nproc)
3341
3342static inline bool KMP_UBER_GTID(int gtid) {
3343 KMP_DEBUG_ASSERT(gtid >= KMP_GTID_MIN)if (!(gtid >= (-6))) { __kmp_debug_assert("gtid >= (-6)"
, "openmp/runtime/src/kmp.h", 3343); }
;
3344 KMP_DEBUG_ASSERT(gtid < __kmp_threads_capacity)if (!(gtid < __kmp_threads_capacity)) { __kmp_debug_assert
("gtid < __kmp_threads_capacity", "openmp/runtime/src/kmp.h"
, 3344); }
;
3345 return (gtid >= 0 && __kmp_root[gtid] && __kmp_threads[gtid] &&
3346 __kmp_threads[gtid] == __kmp_root[gtid]->r.r_uber_thread);
3347}
3348
3349static inline int __kmp_tid_from_gtid(int gtid) {
3350 KMP_DEBUG_ASSERT(gtid >= 0)if (!(gtid >= 0)) { __kmp_debug_assert("gtid >= 0", "openmp/runtime/src/kmp.h"
, 3350); }
;
3351 return __kmp_threads[gtid]->th.th_info.ds.ds_tid;
3352}
3353
3354static inline int __kmp_gtid_from_tid(int tid, const kmp_team_t *team) {
3355 KMP_DEBUG_ASSERT(tid >= 0 && team)if (!(tid >= 0 && team)) { __kmp_debug_assert("tid >= 0 && team"
, "openmp/runtime/src/kmp.h", 3355); }
;
3356 return team->t.t_threads[tid]->th.th_info.ds.ds_gtid;
3357}
3358
3359static inline int __kmp_gtid_from_thread(const kmp_info_t *thr) {
3360 KMP_DEBUG_ASSERT(thr)if (!(thr)) { __kmp_debug_assert("thr", "openmp/runtime/src/kmp.h"
, 3360); }
;
3361 return thr->th.th_info.ds.ds_gtid;
3362}
3363
3364static inline kmp_info_t *__kmp_thread_from_gtid(int gtid) {
3365 KMP_DEBUG_ASSERT(gtid >= 0)if (!(gtid >= 0)) { __kmp_debug_assert("gtid >= 0", "openmp/runtime/src/kmp.h"
, 3365); }
;
3366 return __kmp_threads[gtid];
3367}
3368
3369static inline kmp_team_t *__kmp_team_from_gtid(int gtid) {
3370 KMP_DEBUG_ASSERT(gtid >= 0)if (!(gtid >= 0)) { __kmp_debug_assert("gtid >= 0", "openmp/runtime/src/kmp.h"
, 3370); }
;
3371 return __kmp_threads[gtid]->th.th_team;
3372}
3373
3374static inline void __kmp_assert_valid_gtid(kmp_int32 gtid) {
3375 if (UNLIKELY(gtid < 0 || gtid >= __kmp_threads_capacity)__builtin_expect(!!(gtid < 0 || gtid >= __kmp_threads_capacity
), 0)
)
3376 KMP_FATAL(ThreadIdentInvalid)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_ThreadIdentInvalid)
, __kmp_msg_null)
;
3377}
3378
3379#if KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0) || KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0)
3380extern int __kmp_user_level_mwait; // TRUE or FALSE; from KMP_USER_LEVEL_MWAIT
3381extern int __kmp_umwait_enabled; // Runtime check if user-level mwait enabled
3382extern int __kmp_mwait_enabled; // Runtime check if ring3 mwait is enabled
3383extern int __kmp_mwait_hints; // Hints to pass in to mwait
3384#endif
3385
3386#if KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0)
3387extern int __kmp_waitpkg_enabled; // Runtime check if waitpkg exists
3388extern int __kmp_tpause_state; // 0 (default), 1=C0.1, 2=C0.2; from KMP_TPAUSE
3389extern int __kmp_tpause_hint; // 1=C0.1 (default), 0=C0.2; from KMP_TPAUSE
3390extern int __kmp_tpause_enabled; // 0 (default), 1 (KMP_TPAUSE is non-zero)
3391#endif
3392
3393/* ------------------------------------------------------------------------- */
3394
3395extern kmp_global_t __kmp_global; /* global status */
3396
3397extern kmp_info_t __kmp_monitor;
3398// For Debugging Support Library
3399extern std::atomic<kmp_int32> __kmp_team_counter;
3400// For Debugging Support Library
3401extern std::atomic<kmp_int32> __kmp_task_counter;
3402
3403#if USE_DEBUGGER0
3404#define _KMP_GEN_ID(counter)(~0) \
3405 (__kmp_debugging ? KMP_ATOMIC_INC(&counter)(&counter)->fetch_add(1, std::memory_order_acq_rel) + 1 : ~0)
3406#else
3407#define _KMP_GEN_ID(counter)(~0) (~0)
3408#endif /* USE_DEBUGGER */
3409
3410#define KMP_GEN_TASK_ID()(~0) _KMP_GEN_ID(__kmp_task_counter)(~0)
3411#define KMP_GEN_TEAM_ID()(~0) _KMP_GEN_ID(__kmp_team_counter)(~0)
3412
3413/* ------------------------------------------------------------------------ */
3414
3415extern void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2,
3416 size_t size, char const *format, ...);
3417
3418extern void __kmp_serial_initialize(void);
3419extern void __kmp_middle_initialize(void);
3420extern void __kmp_parallel_initialize(void);
3421
3422extern void __kmp_internal_begin(void);
3423extern void __kmp_internal_end_library(int gtid);
3424extern void __kmp_internal_end_thread(int gtid);
3425extern void __kmp_internal_end_atexit(void);
3426extern void __kmp_internal_end_dtor(void);
3427extern void __kmp_internal_end_dest(void *);
3428
3429extern int __kmp_register_root(int initial_thread);
3430extern void __kmp_unregister_root(int gtid);
3431extern void __kmp_unregister_library(void); // called by __kmp_internal_end()
3432
3433extern int __kmp_ignore_mppbeg(void);
3434extern int __kmp_ignore_mppend(void);
3435
3436extern int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws);
3437extern void __kmp_exit_single(int gtid);
3438
3439extern void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref);
3440extern void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref);
3441
3442#ifdef USE_LOAD_BALANCE1
3443extern int __kmp_get_load_balance(int);
3444#endif
3445
3446extern int __kmp_get_global_thread_id(void);
3447extern int __kmp_get_global_thread_id_reg(void);
3448extern void __kmp_exit_thread(int exit_status);
3449extern void __kmp_abort(char const *format, ...);
3450extern void __kmp_abort_thread(void);
3451KMP_NORETURN[[noreturn]] extern void __kmp_abort_process(void);
3452extern void __kmp_warn(char const *format, ...);
3453
3454extern void __kmp_set_num_threads(int new_nth, int gtid);
3455
3456// Returns current thread (pointer to kmp_info_t). Current thread *must* be
3457// registered.
3458static inline kmp_info_t *__kmp_entry_thread() {
3459 int gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
3460
3461 return __kmp_threads[gtid];
3462}
3463
3464extern void __kmp_set_max_active_levels(int gtid, int new_max_active_levels);
3465extern int __kmp_get_max_active_levels(int gtid);
3466extern int __kmp_get_ancestor_thread_num(int gtid, int level);
3467extern int __kmp_get_team_size(int gtid, int level);
3468extern void __kmp_set_schedule(int gtid, kmp_sched_t new_sched, int chunk);
3469extern void __kmp_get_schedule(int gtid, kmp_sched_t *sched, int *chunk);
3470
3471extern unsigned short __kmp_get_random(kmp_info_t *thread);
3472extern void __kmp_init_random(kmp_info_t *thread);
3473
3474extern kmp_r_sched_t __kmp_get_schedule_global(void);
3475extern void __kmp_adjust_num_threads(int new_nproc);
3476extern void __kmp_check_stksize(size_t *val);
3477
3478extern void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL, char const *_file_, int _line_);
3479extern void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL, char const *_file_, int _line_);
3480extern void ___kmp_free(void *ptr KMP_SRC_LOC_DECL, char const *_file_, int _line_);
3481#define __kmp_allocate(size)___kmp_allocate((size), "openmp/runtime/src/kmp.h", 3481) ___kmp_allocate((size)KMP_SRC_LOC_CURR, "openmp/runtime/src/kmp.h", 3481)
3482#define __kmp_page_allocate(size)___kmp_page_allocate((size), "openmp/runtime/src/kmp.h", 3482
)
___kmp_page_allocate((size)KMP_SRC_LOC_CURR, "openmp/runtime/src/kmp.h", 3482)
3483#define __kmp_free(ptr)___kmp_free((ptr), "openmp/runtime/src/kmp.h", 3483) ___kmp_free((ptr)KMP_SRC_LOC_CURR, "openmp/runtime/src/kmp.h", 3483)
3484
3485#if USE_FAST_MEMORY3
3486extern void *___kmp_fast_allocate(kmp_info_t *this_thr,
3487 size_t size KMP_SRC_LOC_DECL, char const *_file_, int _line_);
3488extern void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL, char const *_file_, int _line_);
3489extern void __kmp_free_fast_memory(kmp_info_t *this_thr);
3490extern void __kmp_initialize_fast_memory(kmp_info_t *this_thr);
3491#define __kmp_fast_allocate(this_thr, size)___kmp_fast_allocate((this_thr), (size), "openmp/runtime/src/kmp.h"
, 3491)
\
3492 ___kmp_fast_allocate((this_thr), (size)KMP_SRC_LOC_CURR, "openmp/runtime/src/kmp.h", 3492)
3493#define __kmp_fast_free(this_thr, ptr)___kmp_fast_free((this_thr), (ptr), "openmp/runtime/src/kmp.h"
, 3493)
\
3494 ___kmp_fast_free((this_thr), (ptr)KMP_SRC_LOC_CURR, "openmp/runtime/src/kmp.h", 3494)
3495#endif
3496
3497extern void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL, char const *_file_, int _line_);
3498extern void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem,
3499 size_t elsize KMP_SRC_LOC_DECL, char const *_file_, int _line_);
3500extern void *___kmp_thread_realloc(kmp_info_t *th, void *ptr,
3501 size_t size KMP_SRC_LOC_DECL, char const *_file_, int _line_);
3502extern void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL, char const *_file_, int _line_);
3503#define __kmp_thread_malloc(th, size)___kmp_thread_malloc((th), (size), "openmp/runtime/src/kmp.h"
, 3503)
\
3504 ___kmp_thread_malloc((th), (size)KMP_SRC_LOC_CURR, "openmp/runtime/src/kmp.h", 3504)
3505#define __kmp_thread_calloc(th, nelem, elsize)___kmp_thread_calloc((th), (nelem), (elsize), "openmp/runtime/src/kmp.h"
, 3505)
\
3506 ___kmp_thread_calloc((th), (nelem), (elsize)KMP_SRC_LOC_CURR, "openmp/runtime/src/kmp.h", 3506)
3507#define __kmp_thread_realloc(th, ptr, size)___kmp_thread_realloc((th), (ptr), (size), "openmp/runtime/src/kmp.h"
, 3507)
\
3508 ___kmp_thread_realloc((th), (ptr), (size)KMP_SRC_LOC_CURR, "openmp/runtime/src/kmp.h", 3508)
3509#define __kmp_thread_free(th, ptr)___kmp_thread_free((th), (ptr), "openmp/runtime/src/kmp.h", 3509
)
\
3510 ___kmp_thread_free((th), (ptr)KMP_SRC_LOC_CURR, "openmp/runtime/src/kmp.h", 3510)
3511
3512extern void __kmp_push_num_threads(ident_t *loc, int gtid, int num_threads);
3513
3514extern void __kmp_push_proc_bind(ident_t *loc, int gtid,
3515 kmp_proc_bind_t proc_bind);
3516extern void __kmp_push_num_teams(ident_t *loc, int gtid, int num_teams,
3517 int num_threads);
3518extern void __kmp_push_num_teams_51(ident_t *loc, int gtid, int num_teams_lb,
3519 int num_teams_ub, int num_threads);
3520
3521extern void __kmp_yield();
3522
3523extern void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
3524 enum sched_type schedule, kmp_int32 lb,
3525 kmp_int32 ub, kmp_int32 st, kmp_int32 chunk);
3526extern void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
3527 enum sched_type schedule, kmp_uint32 lb,
3528 kmp_uint32 ub, kmp_int32 st,
3529 kmp_int32 chunk);
3530extern void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
3531 enum sched_type schedule, kmp_int64 lb,
3532 kmp_int64 ub, kmp_int64 st, kmp_int64 chunk);
3533extern void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
3534 enum sched_type schedule, kmp_uint64 lb,
3535 kmp_uint64 ub, kmp_int64 st,
3536 kmp_int64 chunk);
3537
3538extern int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid,
3539 kmp_int32 *p_last, kmp_int32 *p_lb,
3540 kmp_int32 *p_ub, kmp_int32 *p_st);
3541extern int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid,
3542 kmp_int32 *p_last, kmp_uint32 *p_lb,
3543 kmp_uint32 *p_ub, kmp_int32 *p_st);
3544extern int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid,
3545 kmp_int32 *p_last, kmp_int64 *p_lb,
3546 kmp_int64 *p_ub, kmp_int64 *p_st);
3547extern int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid,
3548 kmp_int32 *p_last, kmp_uint64 *p_lb,
3549 kmp_uint64 *p_ub, kmp_int64 *p_st);
3550
3551extern void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid);
3552extern void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid);
3553extern void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid);
3554extern void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid);
3555
3556#ifdef KMP_GOMP_COMPAT
3557
3558extern void __kmp_aux_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
3559 enum sched_type schedule, kmp_int32 lb,
3560 kmp_int32 ub, kmp_int32 st,
3561 kmp_int32 chunk, int push_ws);
3562extern void __kmp_aux_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
3563 enum sched_type schedule, kmp_uint32 lb,
3564 kmp_uint32 ub, kmp_int32 st,
3565 kmp_int32 chunk, int push_ws);
3566extern void __kmp_aux_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
3567 enum sched_type schedule, kmp_int64 lb,
3568 kmp_int64 ub, kmp_int64 st,
3569 kmp_int64 chunk, int push_ws);
3570extern void __kmp_aux_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
3571 enum sched_type schedule, kmp_uint64 lb,
3572 kmp_uint64 ub, kmp_int64 st,
3573 kmp_int64 chunk, int push_ws);
3574extern void __kmp_aux_dispatch_fini_chunk_4(ident_t *loc, kmp_int32 gtid);
3575extern void __kmp_aux_dispatch_fini_chunk_8(ident_t *loc, kmp_int32 gtid);
3576extern void __kmp_aux_dispatch_fini_chunk_4u(ident_t *loc, kmp_int32 gtid);
3577extern void __kmp_aux_dispatch_fini_chunk_8u(ident_t *loc, kmp_int32 gtid);
3578
3579#endif /* KMP_GOMP_COMPAT */
3580
3581extern kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker);
3582extern kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker);
3583extern kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker);
3584extern kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker);
3585extern kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker);
3586extern kmp_uint32 __kmp_wait_4(kmp_uint32 volatile *spinner, kmp_uint32 checker,
3587 kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
3588 void *obj);
3589extern void __kmp_wait_4_ptr(void *spinner, kmp_uint32 checker,
3590 kmp_uint32 (*pred)(void *, kmp_uint32), void *obj);
3591
3592extern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64<> *flag,
3593 int final_spin
3594#if USE_ITT_BUILD1
3595 ,
3596 void *itt_sync_obj
3597#endif
3598);
3599extern void __kmp_release_64(kmp_flag_64<> *flag);
3600
3601extern void __kmp_infinite_loop(void);
3602
3603extern void __kmp_cleanup(void);
3604
3605#if KMP_HANDLE_SIGNALS(1 || 0)
3606extern int __kmp_handle_signals;
3607extern void __kmp_install_signals(int parallel_init);
3608extern void __kmp_remove_signals(void);
3609#endif
3610
3611extern void __kmp_clear_system_time(void);
3612extern void __kmp_read_system_time(double *delta);
3613
3614extern void __kmp_check_stack_overlap(kmp_info_t *thr);
3615
3616extern void __kmp_expand_host_name(char *buffer, size_t size);
3617extern void __kmp_expand_file_name(char *result, size_t rlen, char *pattern);
3618
3619#if KMP_ARCH_X860 || KMP_ARCH_X86_641 || (KMP_OS_WINDOWS0 && (KMP_ARCH_AARCH640 || KMP_ARCH_ARM))
3620extern void
3621__kmp_initialize_system_tick(void); /* Initialize timer tick value */
3622#endif
3623
3624extern void
3625__kmp_runtime_initialize(void); /* machine specific initialization */
3626extern void __kmp_runtime_destroy(void);
3627
3628#if KMP_AFFINITY_SUPPORTED1
3629extern char *__kmp_affinity_print_mask(char *buf, int buf_len,
3630 kmp_affin_mask_t *mask);
3631extern kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf,
3632 kmp_affin_mask_t *mask);
3633extern void __kmp_affinity_initialize(kmp_affinity_t &affinity);
3634extern void __kmp_affinity_uninitialize(void);
3635extern void __kmp_affinity_set_init_mask(
3636 int gtid, int isa_root); /* set affinity according to KMP_AFFINITY */
3637extern void __kmp_affinity_set_place(int gtid);
3638extern void __kmp_affinity_determine_capable(const char *env_var);
3639extern int __kmp_aux_set_affinity(void **mask);
3640extern int __kmp_aux_get_affinity(void **mask);
3641extern int __kmp_aux_get_affinity_max_proc();
3642extern int __kmp_aux_set_affinity_mask_proc(int proc, void **mask);
3643extern int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask);
3644extern int __kmp_aux_get_affinity_mask_proc(int proc, void **mask);
3645extern void __kmp_balanced_affinity(kmp_info_t *th, int team_size);
3646#if KMP_OS_LINUX1 || KMP_OS_FREEBSD0
3647extern int kmp_set_thread_affinity_mask_initial(void);
3648#endif
3649static inline void __kmp_assign_root_init_mask() {
3650 int gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
11
Value assigned to 'ompt_enabled.enabled', which participates in a condition later
12
Value assigned to 'ompt_enabled.ompt_callback_parallel_begin', which participates in a condition later
3651 kmp_root_t *r = __kmp_threads[gtid]->th.th_root;
3652 if (r->r.r_uber_thread == __kmp_threads[gtid] && !r->r.r_affinity_assigned) {
13
Assuming the condition is false
3653 __kmp_affinity_set_init_mask(gtid, TRUE(!0));
3654 r->r.r_affinity_assigned = TRUE(!0);
3655 }
3656}
3657static inline void __kmp_reset_root_init_mask(int gtid) {
3658 if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0))
3659 return;
3660 kmp_info_t *th = __kmp_threads[gtid];
3661 kmp_root_t *r = th->th.th_root;
3662 if (r->r.r_uber_thread == th && r->r.r_affinity_assigned) {
3663 __kmp_set_system_affinity(__kmp_affin_origMask, FALSE)(__kmp_affin_origMask)->set_system_affinity(0);
3664 KMP_CPU_COPY(th->th.th_affin_mask, __kmp_affin_origMask)(th->th.th_affin_mask)->copy(__kmp_affin_origMask);
3665 r->r.r_affinity_assigned = FALSE0;
3666 }
3667}
3668#else /* KMP_AFFINITY_SUPPORTED */
3669#define __kmp_assign_root_init_mask() /* Nothing */
3670static inline void __kmp_reset_root_init_mask(int gtid) {}
3671#endif /* KMP_AFFINITY_SUPPORTED */
3672// No need for KMP_AFFINITY_SUPPORTED guard as only one field in the
3673// format string is for affinity, so platforms that do not support
3674// affinity can still use the other fields, e.g., %n for num_threads
3675extern size_t __kmp_aux_capture_affinity(int gtid, const char *format,
3676 kmp_str_buf_t *buffer);
3677extern void __kmp_aux_display_affinity(int gtid, const char *format);
3678
3679extern void __kmp_cleanup_hierarchy();
3680extern void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar);
3681
3682#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
3683
3684extern int __kmp_futex_determine_capable(void);
3685
3686#endif // KMP_USE_FUTEX
3687
3688extern void __kmp_gtid_set_specific(int gtid);
3689extern int __kmp_gtid_get_specific(void);
3690
3691extern double __kmp_read_cpu_time(void);
3692
3693extern int __kmp_read_system_info(struct kmp_sys_info *info);
3694
3695#if KMP_USE_MONITOR
3696extern void __kmp_create_monitor(kmp_info_t *th);
3697#endif
3698
3699extern void *__kmp_launch_thread(kmp_info_t *thr);
3700
3701extern void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size);
3702
3703#if KMP_OS_WINDOWS0
3704extern int __kmp_still_running(kmp_info_t *th);
3705extern int __kmp_is_thread_alive(kmp_info_t *th, DWORD *exit_val);
3706extern void __kmp_free_handle(kmp_thread_t tHandle);
3707#endif
3708
3709#if KMP_USE_MONITOR
3710extern void __kmp_reap_monitor(kmp_info_t *th);
3711#endif
3712extern void __kmp_reap_worker(kmp_info_t *th);
3713extern void __kmp_terminate_thread(int gtid);
3714
3715extern int __kmp_try_suspend_mx(kmp_info_t *th);
3716extern void __kmp_lock_suspend_mx(kmp_info_t *th);
3717extern void __kmp_unlock_suspend_mx(kmp_info_t *th);
3718
3719extern void __kmp_elapsed(double *);
3720extern void __kmp_elapsed_tick(double *);
3721
3722extern void __kmp_enable(int old_state);
3723extern void __kmp_disable(int *old_state);
3724
3725extern void __kmp_thread_sleep(int millis);
3726
3727extern void __kmp_common_initialize(void);
3728extern void __kmp_common_destroy(void);
3729extern void __kmp_common_destroy_gtid(int gtid);
3730
3731#if KMP_OS_UNIX1
3732extern void __kmp_register_atfork(void);
3733#endif
3734extern void __kmp_suspend_initialize(void);
3735extern void __kmp_suspend_initialize_thread(kmp_info_t *th);
3736extern void __kmp_suspend_uninitialize_thread(kmp_info_t *th);
3737
3738extern kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
3739 int tid);
3740extern kmp_team_t *
3741__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
3742#if OMPT_SUPPORT1
3743 ompt_data_t ompt_parallel_data,
3744#endif
3745 kmp_proc_bind_t proc_bind, kmp_internal_control_t *new_icvs,
3746 int argc USE_NESTED_HOT_ARG(kmp_info_t *thr), kmp_info_t *thr);
3747extern void __kmp_free_thread(kmp_info_t *);
3748extern void __kmp_free_team(kmp_root_t *,
3749 kmp_team_t *USE_NESTED_HOT_ARG(kmp_info_t *), kmp_info_t *);
3750extern kmp_team_t *__kmp_reap_team(kmp_team_t *);
3751
3752/* ------------------------------------------------------------------------ */
3753
3754extern void __kmp_initialize_bget(kmp_info_t *th);
3755extern void __kmp_finalize_bget(kmp_info_t *th);
3756
3757KMP_EXPORTextern void *kmpc_malloc(size_t size);
3758KMP_EXPORTextern void *kmpc_aligned_malloc(size_t size, size_t alignment);
3759KMP_EXPORTextern void *kmpc_calloc(size_t nelem, size_t elsize);
3760KMP_EXPORTextern void *kmpc_realloc(void *ptr, size_t size);
3761KMP_EXPORTextern void kmpc_free(void *ptr);
3762
3763/* declarations for internal use */
3764
3765extern int __kmp_barrier(enum barrier_type bt, int gtid, int is_split,
3766 size_t reduce_size, void *reduce_data,
3767 void (*reduce)(void *, void *));
3768extern void __kmp_end_split_barrier(enum barrier_type bt, int gtid);
3769extern int __kmp_barrier_gomp_cancel(int gtid);
3770
3771/*!
3772 * Tell the fork call which compiler generated the fork call, and therefore how
3773 * to deal with the call.
3774 */
3775enum fork_context_e {
3776 fork_context_gnu, /**< Called from GNU generated code, so must not invoke the
3777 microtask internally. */
3778 fork_context_intel, /**< Called from Intel generated code. */
3779 fork_context_last
3780};
3781extern int __kmp_fork_call(ident_t *loc, int gtid,
3782 enum fork_context_e fork_context, kmp_int32 argc,
3783 microtask_t microtask, launch_t invoker,
3784 kmp_va_list ap);
3785
3786extern void __kmp_join_call(ident_t *loc, int gtid
3787#if OMPT_SUPPORT1
3788 ,
3789 enum fork_context_e fork_context
3790#endif
3791 ,
3792 int exit_teams = 0);
3793
3794extern void __kmp_serialized_parallel(ident_t *id, kmp_int32 gtid);
3795extern void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team);
3796extern void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team);
3797extern int __kmp_invoke_task_func(int gtid);
3798extern void __kmp_run_before_invoked_task(int gtid, int tid,
3799 kmp_info_t *this_thr,
3800 kmp_team_t *team);
3801extern void __kmp_run_after_invoked_task(int gtid, int tid,
3802 kmp_info_t *this_thr,
3803 kmp_team_t *team);
3804
3805// should never have been exported
3806KMP_EXPORTextern int __kmpc_invoke_task_func(int gtid);
3807extern int __kmp_invoke_teams_master(int gtid);
3808extern void __kmp_teams_master(int gtid);
3809extern int __kmp_aux_get_team_num();
3810extern int __kmp_aux_get_num_teams();
3811extern void __kmp_save_internal_controls(kmp_info_t *thread);
3812extern void __kmp_user_set_library(enum library_type arg);
3813extern void __kmp_aux_set_library(enum library_type arg);
3814extern void __kmp_aux_set_stacksize(size_t arg);
3815extern void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid);
3816extern void __kmp_aux_set_defaults(char const *str, size_t len);
3817
3818/* Functions called from __kmp_aux_env_initialize() in kmp_settings.cpp */
3819void kmpc_set_blocktime(int arg);
3820void ompc_set_nested(int flag);
3821void ompc_set_dynamic(int flag);
3822void ompc_set_num_threads(int arg);
3823
3824extern void __kmp_push_current_task_to_thread(kmp_info_t *this_thr,
3825 kmp_team_t *team, int tid);
3826extern void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr);
3827extern kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
3828 kmp_tasking_flags_t *flags,
3829 size_t sizeof_kmp_task_t,
3830 size_t sizeof_shareds,
3831 kmp_routine_entry_t task_entry);
3832extern void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr,
3833 kmp_team_t *team, int tid,
3834 int set_curr_task);
3835extern void __kmp_finish_implicit_task(kmp_info_t *this_thr);
3836extern void __kmp_free_implicit_task(kmp_info_t *this_thr);
3837
3838extern kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3839 int gtid,
3840 kmp_task_t *task);
3841extern void __kmp_fulfill_event(kmp_event_t *event);
3842
3843extern void __kmp_free_task_team(kmp_info_t *thread,
3844 kmp_task_team_t *task_team);
3845extern void __kmp_reap_task_teams(void);
3846extern void __kmp_wait_to_unref_task_teams(void);
3847extern void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team,
3848 int always);
3849extern void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team);
3850extern void __kmp_task_team_wait(kmp_info_t *this_thr, kmp_team_t *team
3851#if USE_ITT_BUILD1
3852 ,
3853 void *itt_sync_obj
3854#endif /* USE_ITT_BUILD */
3855 ,
3856 int wait = 1);
3857extern void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread,
3858 int gtid);
3859
3860extern int __kmp_is_address_mapped(void *addr);
3861extern kmp_uint64 __kmp_hardware_timestamp(void);
3862
3863#if KMP_OS_UNIX1
3864extern int __kmp_read_from_file(char const *path, char const *format, ...);
3865#endif
3866
3867/* ------------------------------------------------------------------------ */
3868//
3869// Assembly routines that have no compiler intrinsic replacement
3870//
3871
3872extern int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int npr, int argc,
3873 void *argv[]
3874#if OMPT_SUPPORT1
3875 ,
3876 void **exit_frame_ptr
3877#endif
3878);
3879
3880/* ------------------------------------------------------------------------ */
3881
3882KMP_EXPORTextern void __kmpc_begin(ident_t *, kmp_int32 flags);
3883KMP_EXPORTextern void __kmpc_end(ident_t *);
3884
3885KMP_EXPORTextern void __kmpc_threadprivate_register_vec(ident_t *, void *data,
3886 kmpc_ctor_vec ctor,
3887 kmpc_cctor_vec cctor,
3888 kmpc_dtor_vec dtor,
3889 size_t vector_length);
3890KMP_EXPORTextern void __kmpc_threadprivate_register(ident_t *, void *data,
3891 kmpc_ctor ctor, kmpc_cctor cctor,
3892 kmpc_dtor dtor);
3893KMP_EXPORTextern void *__kmpc_threadprivate(ident_t *, kmp_int32 global_tid,
3894 void *data, size_t size);
3895
3896KMP_EXPORTextern kmp_int32 __kmpc_global_thread_num(ident_t *);
3897KMP_EXPORTextern kmp_int32 __kmpc_global_num_threads(ident_t *);
3898KMP_EXPORTextern kmp_int32 __kmpc_bound_thread_num(ident_t *);
3899KMP_EXPORTextern kmp_int32 __kmpc_bound_num_threads(ident_t *);
3900
3901KMP_EXPORTextern kmp_int32 __kmpc_ok_to_fork(ident_t *);
3902KMP_EXPORTextern void __kmpc_fork_call(ident_t *, kmp_int32 nargs,
3903 kmpc_micro microtask, ...);
3904
3905KMP_EXPORTextern void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid);
3906KMP_EXPORTextern void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid);
3907
3908KMP_EXPORTextern void __kmpc_flush(ident_t *);
3909KMP_EXPORTextern void __kmpc_barrier(ident_t *, kmp_int32 global_tid);
3910KMP_EXPORTextern kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
3911KMP_EXPORTextern void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
3912KMP_EXPORTextern kmp_int32 __kmpc_masked(ident_t *, kmp_int32 global_tid,
3913 kmp_int32 filter);
3914KMP_EXPORTextern void __kmpc_end_masked(ident_t *, kmp_int32 global_tid);
3915KMP_EXPORTextern void __kmpc_ordered(ident_t *, kmp_int32 global_tid);
3916KMP_EXPORTextern void __kmpc_end_ordered(ident_t *, kmp_int32 global_tid);
3917KMP_EXPORTextern void __kmpc_critical(ident_t *, kmp_int32 global_tid,
3918 kmp_critical_name *);
3919KMP_EXPORTextern void __kmpc_end_critical(ident_t *, kmp_int32 global_tid,
3920 kmp_critical_name *);
3921KMP_EXPORTextern void __kmpc_critical_with_hint(ident_t *, kmp_int32 global_tid,
3922 kmp_critical_name *, uint32_t hint);
3923
3924KMP_EXPORTextern kmp_int32 __kmpc_barrier_master(ident_t *, kmp_int32 global_tid);
3925KMP_EXPORTextern void __kmpc_end_barrier_master(ident_t *, kmp_int32 global_tid);
3926
3927KMP_EXPORTextern kmp_int32 __kmpc_barrier_master_nowait(ident_t *,
3928 kmp_int32 global_tid);
3929
3930KMP_EXPORTextern kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
3931KMP_EXPORTextern void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
3932
3933KMP_EXPORTextern kmp_int32 __kmpc_sections_init(ident_t *loc, kmp_int32 global_tid);
3934KMP_EXPORTextern kmp_int32 __kmpc_next_section(ident_t *loc, kmp_int32 global_tid,
3935 kmp_int32 numberOfSections);
3936KMP_EXPORTextern void __kmpc_end_sections(ident_t *loc, kmp_int32 global_tid);
3937
3938KMP_EXPORTextern void KMPC_FOR_STATIC_INIT(ident_t *loc, kmp_int32 global_tid,
3939 kmp_int32 schedtype, kmp_int32 *plastiter,
3940 kmp_int *plower, kmp_int *pupper,
3941 kmp_int *pstride, kmp_int incr,
3942 kmp_int chunk);
3943
3944KMP_EXPORTextern void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
3945
3946KMP_EXPORTextern void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
3947 size_t cpy_size, void *cpy_data,
3948 void (*cpy_func)(void *, void *),
3949 kmp_int32 didit);
3950
3951KMP_EXPORTextern void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid,
3952 void *cpy_data);
3953
3954extern void KMPC_SET_NUM_THREADS(int arg);
3955extern void KMPC_SET_DYNAMIC(int flag);
3956extern void KMPC_SET_NESTED(int flag);
3957
3958/* OMP 3.0 tasking interface routines */
3959KMP_EXPORTextern kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid,
3960 kmp_task_t *new_task);
3961KMP_EXPORTextern kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
3962 kmp_int32 flags,
3963 size_t sizeof_kmp_task_t,
3964 size_t sizeof_shareds,
3965 kmp_routine_entry_t task_entry);
3966KMP_EXPORTextern kmp_task_t *__kmpc_omp_target_task_alloc(
3967 ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
3968 size_t sizeof_shareds, kmp_routine_entry_t task_entry, kmp_int64 device_id);
3969KMP_EXPORTextern void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid,
3970 kmp_task_t *task);
3971KMP_EXPORTextern void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid,
3972 kmp_task_t *task);
3973KMP_EXPORTextern kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid,
3974 kmp_task_t *new_task);
3975KMP_EXPORTextern kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid);
3976/* __kmpc_omp_taskwait_51 : Function for OpenMP 5.1 nowait clause.
3977 * Placeholder for taskwait with nowait clause.*/
3978KMP_EXPORTextern kmp_int32 __kmpc_omp_taskwait_51(ident_t *loc_ref, kmp_int32 gtid,
3979 kmp_int32 has_no_wait);
3980
3981KMP_EXPORTextern kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid,
3982 int end_part);
3983
3984#if TASK_UNUSED
3985void __kmpc_omp_task_begin(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task);
3986void __kmpc_omp_task_complete(ident_t *loc_ref, kmp_int32 gtid,
3987 kmp_task_t *task);
3988#endif // TASK_UNUSED
3989
3990/* ------------------------------------------------------------------------ */
3991
3992KMP_EXPORTextern void __kmpc_taskgroup(ident_t *loc, int gtid);
3993KMP_EXPORTextern void __kmpc_end_taskgroup(ident_t *loc, int gtid);
3994
3995KMP_EXPORTextern kmp_int32 __kmpc_omp_task_with_deps(
3996 ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps,
3997 kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
3998 kmp_depend_info_t *noalias_dep_list);
3999KMP_EXPORTextern void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid,
4000 kmp_int32 ndeps,
4001 kmp_depend_info_t *dep_list,
4002 kmp_int32 ndeps_noalias,
4003 kmp_depend_info_t *noalias_dep_list);
4004/* __kmpc_omp_taskwait_deps_51 : Function for OpenMP 5.1 nowait clause.
4005 * Placeholder for taskwait with nowait clause.*/
4006KMP_EXPORTextern void __kmpc_omp_taskwait_deps_51(ident_t *loc_ref, kmp_int32 gtid,
4007 kmp_int32 ndeps,
4008 kmp_depend_info_t *dep_list,
4009 kmp_int32 ndeps_noalias,
4010 kmp_depend_info_t *noalias_dep_list,
4011 kmp_int32 has_no_wait);
4012
4013extern kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
4014 bool serialize_immediate);
4015
4016KMP_EXPORTextern kmp_int32 __kmpc_cancel(ident_t *loc_ref, kmp_int32 gtid,
4017 kmp_int32 cncl_kind);
4018KMP_EXPORTextern kmp_int32 __kmpc_cancellationpoint(ident_t *loc_ref, kmp_int32 gtid,
4019 kmp_int32 cncl_kind);
4020KMP_EXPORTextern kmp_int32 __kmpc_cancel_barrier(ident_t *loc_ref, kmp_int32 gtid);
4021KMP_EXPORTextern int __kmp_get_cancellation_status(int cancel_kind);
4022
4023KMP_EXPORTextern void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask);
4024KMP_EXPORTextern void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask);
4025KMP_EXPORTextern void __kmpc_taskloop(ident_t *loc, kmp_int32 gtid, kmp_task_t *task,
4026 kmp_int32 if_val, kmp_uint64 *lb,
4027 kmp_uint64 *ub, kmp_int64 st, kmp_int32 nogroup,
4028 kmp_int32 sched, kmp_uint64 grainsize,
4029 void *task_dup);
4030KMP_EXPORTextern void __kmpc_taskloop_5(ident_t *loc, kmp_int32 gtid,
4031 kmp_task_t *task, kmp_int32 if_val,
4032 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4033 kmp_int32 nogroup, kmp_int32 sched,
4034 kmp_uint64 grainsize, kmp_int32 modifier,
4035 void *task_dup);
4036KMP_EXPORTextern void *__kmpc_task_reduction_init(int gtid, int num_data, void *data);
4037KMP_EXPORTextern void *__kmpc_taskred_init(int gtid, int num_data, void *data);
4038KMP_EXPORTextern void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *d);
4039KMP_EXPORTextern void *__kmpc_task_reduction_modifier_init(ident_t *loc, int gtid,
4040 int is_ws, int num,
4041 void *data);
4042KMP_EXPORTextern void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int is_ws,
4043 int num, void *data);
4044KMP_EXPORTextern void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid,
4045 int is_ws);
4046KMP_EXPORTextern kmp_int32 __kmpc_omp_reg_task_with_affinity(
4047 ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 naffins,
4048 kmp_task_affinity_info_t *affin_list);
4049KMP_EXPORTextern void __kmp_set_num_teams(int num_teams);
4050KMP_EXPORTextern int __kmp_get_max_teams(void);
4051KMP_EXPORTextern void __kmp_set_teams_thread_limit(int limit);
4052KMP_EXPORTextern int __kmp_get_teams_thread_limit(void);
4053
4054/* Lock interface routines (fast versions with gtid passed in) */
4055KMP_EXPORTextern void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid,
4056 void **user_lock);
4057KMP_EXPORTextern void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid,
4058 void **user_lock);
4059KMP_EXPORTextern void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid,
4060 void **user_lock);
4061KMP_EXPORTextern void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid,
4062 void **user_lock);
4063KMP_EXPORTextern void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock);
4064KMP_EXPORTextern void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid,
4065 void **user_lock);
4066KMP_EXPORTextern void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid,
4067 void **user_lock);
4068KMP_EXPORTextern void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid,
4069 void **user_lock);
4070KMP_EXPORTextern int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock);
4071KMP_EXPORTextern int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid,
4072 void **user_lock);
4073
4074KMP_EXPORTextern void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid,
4075 void **user_lock, uintptr_t hint);
4076KMP_EXPORTextern void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
4077 void **user_lock,
4078 uintptr_t hint);
4079
4080/* Interface to fast scalable reduce methods routines */
4081
4082KMP_EXPORTextern kmp_int32 __kmpc_reduce_nowait(
4083 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
4084 void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
4085 kmp_critical_name *lck);
4086KMP_EXPORTextern void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
4087 kmp_critical_name *lck);
4088KMP_EXPORTextern kmp_int32 __kmpc_reduce(
4089 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
4090 void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
4091 kmp_critical_name *lck);
4092KMP_EXPORTextern void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
4093 kmp_critical_name *lck);
4094
4095/* Internal fast reduction routines */
4096
4097extern PACKED_REDUCTION_METHOD_T __kmp_determine_reduction_method(
4098 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
4099 void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
4100 kmp_critical_name *lck);
4101
4102// this function is for testing set/get/determine reduce method
4103KMP_EXPORTextern kmp_int32 __kmp_get_reduce_method(void);
4104
4105KMP_EXPORTextern kmp_uint64 __kmpc_get_taskid();
4106KMP_EXPORTextern kmp_uint64 __kmpc_get_parent_taskid();
4107
4108// C++ port
4109// missing 'extern "C"' declarations
4110
4111KMP_EXPORTextern kmp_int32 __kmpc_in_parallel(ident_t *loc);
4112KMP_EXPORTextern void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid);
4113KMP_EXPORTextern void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
4114 kmp_int32 num_threads);
4115
4116KMP_EXPORTextern void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
4117 int proc_bind);
4118KMP_EXPORTextern void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
4119 kmp_int32 num_teams,
4120 kmp_int32 num_threads);
4121/* Function for OpenMP 5.1 num_teams clause */
4122KMP_EXPORTextern void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
4123 kmp_int32 num_teams_lb,
4124 kmp_int32 num_teams_ub,
4125 kmp_int32 num_threads);
4126KMP_EXPORTextern void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc,
4127 kmpc_micro microtask, ...);
4128struct kmp_dim { // loop bounds info casted to kmp_int64
4129 kmp_int64 lo; // lower
4130 kmp_int64 up; // upper
4131 kmp_int64 st; // stride
4132};
4133KMP_EXPORTextern void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
4134 kmp_int32 num_dims,
4135 const struct kmp_dim *dims);
4136KMP_EXPORTextern void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid,
4137 const kmp_int64 *vec);
4138KMP_EXPORTextern void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid,
4139 const kmp_int64 *vec);
4140KMP_EXPORTextern void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
4141
4142KMP_EXPORTextern void *__kmpc_threadprivate_cached(ident_t *loc, kmp_int32 global_tid,
4143 void *data, size_t size,
4144 void ***cache);
4145
4146// Symbols for MS mutual detection.
4147extern int _You_must_link_with_exactly_one_OpenMP_library;
4148extern int _You_must_link_with_Intel_OpenMP_library;
4149#if KMP_OS_WINDOWS0 && (KMP_VERSION_MAJOR5 > 4)
4150extern int _You_must_link_with_Microsoft_OpenMP_library;
4151#endif
4152
4153// The routines below are not exported.
4154// Consider making them 'static' in corresponding source files.
4155void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
4156 void *data_addr, size_t pc_size);
4157struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
4158 void *data_addr,
4159 size_t pc_size);
4160void __kmp_threadprivate_resize_cache(int newCapacity);
4161void __kmp_cleanup_threadprivate_caches();
4162
4163// ompc_, kmpc_ entries moved from omp.h.
4164#if KMP_OS_WINDOWS0
4165#define KMPC_CONVENTION __cdecl
4166#else
4167#define KMPC_CONVENTION
4168#endif
4169
4170#ifndef __OMP_H
4171typedef enum omp_sched_t {
4172 omp_sched_static = 1,
4173 omp_sched_dynamic = 2,
4174 omp_sched_guided = 3,
4175 omp_sched_auto = 4
4176} omp_sched_t;
4177typedef void *kmp_affinity_mask_t;
4178#endif
4179
4180KMP_EXPORTextern void KMPC_CONVENTION ompc_set_max_active_levels(int);
4181KMP_EXPORTextern void KMPC_CONVENTION ompc_set_schedule(omp_sched_t, int);
4182KMP_EXPORTextern int KMPC_CONVENTION ompc_get_ancestor_thread_num(int);
4183KMP_EXPORTextern int KMPC_CONVENTION ompc_get_team_size(int);
4184KMP_EXPORTextern int KMPC_CONVENTION
4185kmpc_set_affinity_mask_proc(int, kmp_affinity_mask_t *);
4186KMP_EXPORTextern int KMPC_CONVENTION
4187kmpc_unset_affinity_mask_proc(int, kmp_affinity_mask_t *);
4188KMP_EXPORTextern int KMPC_CONVENTION
4189kmpc_get_affinity_mask_proc(int, kmp_affinity_mask_t *);
4190
4191KMP_EXPORTextern void KMPC_CONVENTION kmpc_set_stacksize(int);
4192KMP_EXPORTextern void KMPC_CONVENTION kmpc_set_stacksize_s(size_t);
4193KMP_EXPORTextern void KMPC_CONVENTION kmpc_set_library(int);
4194KMP_EXPORTextern void KMPC_CONVENTION kmpc_set_defaults(char const *);
4195KMP_EXPORTextern void KMPC_CONVENTION kmpc_set_disp_num_buffers(int);
4196void KMP_EXPAND_NAME(ompc_set_affinity_format)__kmp_api_ompc_set_affinity_format(char const *format);
4197size_t KMP_EXPAND_NAME(ompc_get_affinity_format)__kmp_api_ompc_get_affinity_format(char *buffer, size_t size);
4198void KMP_EXPAND_NAME(ompc_display_affinity)__kmp_api_ompc_display_affinity(char const *format);
4199size_t KMP_EXPAND_NAME(ompc_capture_affinity)__kmp_api_ompc_capture_affinity(char *buffer, size_t buf_size,
4200 char const *format);
4201
4202enum kmp_target_offload_kind {
4203 tgt_disabled = 0,
4204 tgt_default = 1,
4205 tgt_mandatory = 2
4206};
4207typedef enum kmp_target_offload_kind kmp_target_offload_kind_t;
4208// Set via OMP_TARGET_OFFLOAD if specified, defaults to tgt_default otherwise
4209extern kmp_target_offload_kind_t __kmp_target_offload;
4210extern int __kmpc_get_target_offload();
4211
4212// Constants used in libomptarget
4213#define KMP_DEVICE_DEFAULT-1 -1 // This is libomptarget's default device.
4214#define KMP_DEVICE_ALL-11 -11 // This is libomptarget's "all devices".
4215
4216// OMP Pause Resource
4217
4218// The following enum is used both to set the status in __kmp_pause_status, and
4219// as the internal equivalent of the externally-visible omp_pause_resource_t.
4220typedef enum kmp_pause_status_t {
4221 kmp_not_paused = 0, // status is not paused, or, requesting resume
4222 kmp_soft_paused = 1, // status is soft-paused, or, requesting soft pause
4223 kmp_hard_paused = 2 // status is hard-paused, or, requesting hard pause
4224} kmp_pause_status_t;
4225
4226// This stores the pause state of the runtime
4227extern kmp_pause_status_t __kmp_pause_status;
4228extern int __kmpc_pause_resource(kmp_pause_status_t level);
4229extern int __kmp_pause_resource(kmp_pause_status_t level);
4230// Soft resume sets __kmp_pause_status, and wakes up all threads.
4231extern void __kmp_resume_if_soft_paused();
4232// Hard resume simply resets the status to not paused. Library will appear to
4233// be uninitialized after hard pause. Let OMP constructs trigger required
4234// initializations.
4235static inline void __kmp_resume_if_hard_paused() {
4236 if (__kmp_pause_status == kmp_hard_paused) {
4237 __kmp_pause_status = kmp_not_paused;
4238 }
4239}
4240
4241extern void __kmp_omp_display_env(int verbose);
4242
4243// 1: it is initializing hidden helper team
4244extern volatile int __kmp_init_hidden_helper;
4245// 1: the hidden helper team is done
4246extern volatile int __kmp_hidden_helper_team_done;
4247// 1: enable hidden helper task
4248extern kmp_int32 __kmp_enable_hidden_helper;
4249// Main thread of hidden helper team
4250extern kmp_info_t *__kmp_hidden_helper_main_thread;
4251// Descriptors for the hidden helper threads
4252extern kmp_info_t **__kmp_hidden_helper_threads;
4253// Number of hidden helper threads
4254extern kmp_int32 __kmp_hidden_helper_threads_num;
4255// Number of hidden helper tasks that have not been executed yet
4256extern std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
4257
4258extern void __kmp_hidden_helper_initialize();
4259extern void __kmp_hidden_helper_threads_initz_routine();
4260extern void __kmp_do_initialize_hidden_helper_threads();
4261extern void __kmp_hidden_helper_threads_initz_wait();
4262extern void __kmp_hidden_helper_initz_release();
4263extern void __kmp_hidden_helper_threads_deinitz_wait();
4264extern void __kmp_hidden_helper_threads_deinitz_release();
4265extern void __kmp_hidden_helper_main_thread_wait();
4266extern void __kmp_hidden_helper_worker_thread_wait();
4267extern void __kmp_hidden_helper_worker_thread_signal();
4268extern void __kmp_hidden_helper_main_thread_release();
4269
4270// Check whether a given thread is a hidden helper thread
4271#define KMP_HIDDEN_HELPER_THREAD(gtid)((gtid) >= 1 && (gtid) <= __kmp_hidden_helper_threads_num
)
\
4272 ((gtid) >= 1 && (gtid) <= __kmp_hidden_helper_threads_num)
4273
4274#define KMP_HIDDEN_HELPER_WORKER_THREAD(gtid)((gtid) > 1 && (gtid) <= __kmp_hidden_helper_threads_num
)
\
4275 ((gtid) > 1 && (gtid) <= __kmp_hidden_helper_threads_num)
4276
4277#define KMP_HIDDEN_HELPER_MAIN_THREAD(gtid)((gtid) == 1 && (gtid) <= __kmp_hidden_helper_threads_num
)
\
4278 ((gtid) == 1 && (gtid) <= __kmp_hidden_helper_threads_num)
4279
4280#define KMP_HIDDEN_HELPER_TEAM(team)(team->t.t_threads[0] == __kmp_hidden_helper_main_thread) \
4281 (team->t.t_threads[0] == __kmp_hidden_helper_main_thread)
4282
4283// Map a gtid to a hidden helper thread. The first hidden helper thread, a.k.a
4284// main thread, is skipped.
4285#define KMP_GTID_TO_SHADOW_GTID(gtid)((gtid) % (__kmp_hidden_helper_threads_num - 1) + 2) \
4286 ((gtid) % (__kmp_hidden_helper_threads_num - 1) + 2)
4287
4288// Return the adjusted gtid value by subtracting from gtid the number
4289// of hidden helper threads. This adjusted value is the gtid the thread would
4290// have received if there were no hidden helper threads.
4291static inline int __kmp_adjust_gtid_for_hidden_helpers(int gtid) {
4292 int adjusted_gtid = gtid;
4293 if (__kmp_hidden_helper_threads_num > 0 && gtid > 0 &&
4294 gtid - __kmp_hidden_helper_threads_num >= 0) {
4295 adjusted_gtid -= __kmp_hidden_helper_threads_num;
4296 }
4297 return adjusted_gtid;
4298}
4299
4300// Support for error directive
4301typedef enum kmp_severity_t {
4302 severity_warning = 1,
4303 severity_fatal = 2
4304} kmp_severity_t;
4305extern void __kmpc_error(ident_t *loc, int severity, const char *message);
4306
4307// Support for scope directive
4308KMP_EXPORTextern void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved);
4309KMP_EXPORTextern void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved);
4310
4311#ifdef __cplusplus201703L
4312}
4313#endif
4314
4315template <bool C, bool S>
4316extern void __kmp_suspend_32(int th_gtid, kmp_flag_32<C, S> *flag);
4317template <bool C, bool S>
4318extern void __kmp_suspend_64(int th_gtid, kmp_flag_64<C, S> *flag);
4319template <bool C, bool S>
4320extern void __kmp_atomic_suspend_64(int th_gtid,
4321 kmp_atomic_flag_64<C, S> *flag);
4322extern void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag);
4323#if KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0) || KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0)
4324template <bool C, bool S>
4325extern void __kmp_mwait_32(int th_gtid, kmp_flag_32<C, S> *flag);
4326template <bool C, bool S>
4327extern void __kmp_mwait_64(int th_gtid, kmp_flag_64<C, S> *flag);
4328template <bool C, bool S>
4329extern void __kmp_atomic_mwait_64(int th_gtid, kmp_atomic_flag_64<C, S> *flag);
4330extern void __kmp_mwait_oncore(int th_gtid, kmp_flag_oncore *flag);
4331#endif
4332template <bool C, bool S>
4333extern void __kmp_resume_32(int target_gtid, kmp_flag_32<C, S> *flag);
4334template <bool C, bool S>
4335extern void __kmp_resume_64(int target_gtid, kmp_flag_64<C, S> *flag);
4336template <bool C, bool S>
4337extern void __kmp_atomic_resume_64(int target_gtid,
4338 kmp_atomic_flag_64<C, S> *flag);
4339extern void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag);
4340
4341template <bool C, bool S>
4342int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid,
4343 kmp_flag_32<C, S> *flag, int final_spin,
4344 int *thread_finished,
4345#if USE_ITT_BUILD1
4346 void *itt_sync_obj,
4347#endif /* USE_ITT_BUILD */
4348 kmp_int32 is_constrained);
4349template <bool C, bool S>
4350int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid,
4351 kmp_flag_64<C, S> *flag, int final_spin,
4352 int *thread_finished,
4353#if USE_ITT_BUILD1
4354 void *itt_sync_obj,
4355#endif /* USE_ITT_BUILD */
4356 kmp_int32 is_constrained);
4357template <bool C, bool S>
4358int __kmp_atomic_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid,
4359 kmp_atomic_flag_64<C, S> *flag,
4360 int final_spin, int *thread_finished,
4361#if USE_ITT_BUILD1
4362 void *itt_sync_obj,
4363#endif /* USE_ITT_BUILD */
4364 kmp_int32 is_constrained);
4365int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid,
4366 kmp_flag_oncore *flag, int final_spin,
4367 int *thread_finished,
4368#if USE_ITT_BUILD1
4369 void *itt_sync_obj,
4370#endif /* USE_ITT_BUILD */
4371 kmp_int32 is_constrained);
4372
4373extern int __kmp_nesting_mode;
4374extern int __kmp_nesting_mode_nlevels;
4375extern int *__kmp_nesting_nth_level;
4376extern void __kmp_init_nesting_mode();
4377extern void __kmp_set_nesting_mode_threads();
4378
4379/// This class safely opens and closes a C-style FILE* object using RAII
4380/// semantics. There are also methods which allow using stdout or stderr as
4381/// the underlying FILE* object. With the implicit conversion operator to
4382/// FILE*, an object with this type can be used in any function which takes
4383/// a FILE* object e.g., fprintf().
4384/// No close method is needed at use sites.
4385class kmp_safe_raii_file_t {
4386 FILE *f;
4387
4388 void close() {
4389 if (f && f != stdoutstdout && f != stderrstderr) {
4390 fclose(f);
4391 f = nullptr;
4392 }
4393 }
4394
4395public:
4396 kmp_safe_raii_file_t() : f(nullptr) {}
4397 kmp_safe_raii_file_t(const char *filename, const char *mode,
4398 const char *env_var = nullptr)
4399 : f(nullptr) {
4400 open(filename, mode, env_var);
4401 }
4402 ~kmp_safe_raii_file_t() { close(); }
4403
4404 /// Open filename using mode. This is automatically closed in the destructor.
4405 /// The env_var parameter indicates the environment variable the filename
4406 /// came from if != nullptr.
4407 void open(const char *filename, const char *mode,
4408 const char *env_var = nullptr) {
4409 KMP_ASSERT(!f)if (!(!f)) { __kmp_debug_assert("!f", "openmp/runtime/src/kmp.h"
, 4409); }
;
4410 f = fopen(filename, mode);
4411 if (!f) {
4412 int code = errno(*__errno_location ());
4413 if (env_var) {
4414 __kmp_fatal(KMP_MSG(CantOpenFileForReading, filename)__kmp_msg_format(kmp_i18n_msg_CantOpenFileForReading, filename
)
, KMP_ERR(code)__kmp_msg_error_code(code),
4415 KMP_HNT(CheckEnvVar, env_var, filename)__kmp_msg_format(kmp_i18n_hnt_CheckEnvVar, env_var, filename), __kmp_msg_null);
4416 } else {
4417 __kmp_fatal(KMP_MSG(CantOpenFileForReading, filename)__kmp_msg_format(kmp_i18n_msg_CantOpenFileForReading, filename
)
, KMP_ERR(code)__kmp_msg_error_code(code),
4418 __kmp_msg_null);
4419 }
4420 }
4421 }
4422 /// Instead of erroring out, return non-zero when
4423 /// unsuccessful fopen() for any reason
4424 int try_open(const char *filename, const char *mode) {
4425 KMP_ASSERT(!f)if (!(!f)) { __kmp_debug_assert("!f", "openmp/runtime/src/kmp.h"
, 4425); }
;
4426 f = fopen(filename, mode);
4427 if (!f)
4428 return errno(*__errno_location ());
4429 return 0;
4430 }
4431 /// Set the FILE* object to stdout and output there
4432 /// No open call should happen before this call.
4433 void set_stdout() {
4434 KMP_ASSERT(!f)if (!(!f)) { __kmp_debug_assert("!f", "openmp/runtime/src/kmp.h"
, 4434); }
;
4435 f = stdoutstdout;
4436 }
4437 /// Set the FILE* object to stderr and output there
4438 /// No open call should happen before this call.
4439 void set_stderr() {
4440 KMP_ASSERT(!f)if (!(!f)) { __kmp_debug_assert("!f", "openmp/runtime/src/kmp.h"
, 4440); }
;
4441 f = stderrstderr;
4442 }
4443 operator bool() { return bool(f); }
4444 operator FILE *() { return f; }
4445};
4446
4447template <typename SourceType, typename TargetType,
4448 bool isSourceSmaller = (sizeof(SourceType) < sizeof(TargetType)),
4449 bool isSourceEqual = (sizeof(SourceType) == sizeof(TargetType)),
4450 bool isSourceSigned = std::is_signed<SourceType>::value,
4451 bool isTargetSigned = std::is_signed<TargetType>::value>
4452struct kmp_convert {};
4453
4454// Both types are signed; Source smaller
4455template <typename SourceType, typename TargetType>
4456struct kmp_convert<SourceType, TargetType, true, false, true, true> {
4457 static TargetType to(SourceType src) { return (TargetType)src; }
4458};
4459// Source equal
4460template <typename SourceType, typename TargetType>
4461struct kmp_convert<SourceType, TargetType, false, true, true, true> {
4462 static TargetType to(SourceType src) { return src; }
4463};
4464// Source bigger
4465template <typename SourceType, typename TargetType>
4466struct kmp_convert<SourceType, TargetType, false, false, true, true> {
4467 static TargetType to(SourceType src) {
4468 KMP_ASSERT(src <= static_cast<SourceType>(if (!(src <= static_cast<SourceType>( (std::numeric_limits
<TargetType>::max)()))) { __kmp_debug_assert("src <= static_cast<SourceType>( (std::numeric_limits<TargetType>::max)())"
, "openmp/runtime/src/kmp.h", 4469); }
4469 (std::numeric_limits<TargetType>::max)()))if (!(src <= static_cast<SourceType>( (std::numeric_limits
<TargetType>::max)()))) { __kmp_debug_assert("src <= static_cast<SourceType>( (std::numeric_limits<TargetType>::max)())"
, "openmp/runtime/src/kmp.h", 4469); }
;
4470 KMP_ASSERT(src >= static_cast<SourceType>(if (!(src >= static_cast<SourceType>( (std::numeric_limits
<TargetType>::min)()))) { __kmp_debug_assert("src >= static_cast<SourceType>( (std::numeric_limits<TargetType>::min)())"
, "openmp/runtime/src/kmp.h", 4471); }
4471 (std::numeric_limits<TargetType>::min)()))if (!(src >= static_cast<SourceType>( (std::numeric_limits
<TargetType>::min)()))) { __kmp_debug_assert("src >= static_cast<SourceType>( (std::numeric_limits<TargetType>::min)())"
, "openmp/runtime/src/kmp.h", 4471); }
;
4472 return (TargetType)src;
4473 }
4474};
4475
4476// Source signed, Target unsigned
4477// Source smaller
4478template <typename SourceType, typename TargetType>
4479struct kmp_convert<SourceType, TargetType, true, false, true, false> {
4480 static TargetType to(SourceType src) {
4481 KMP_ASSERT(src >= 0)if (!(src >= 0)) { __kmp_debug_assert("src >= 0", "openmp/runtime/src/kmp.h"
, 4481); }
;
4482 return (TargetType)src;
4483 }
4484};
4485// Source equal
4486template <typename SourceType, typename TargetType>
4487struct kmp_convert<SourceType, TargetType, false, true, true, false> {
4488 static TargetType to(SourceType src) {
4489 KMP_ASSERT(src >= 0)if (!(src >= 0)) { __kmp_debug_assert("src >= 0", "openmp/runtime/src/kmp.h"
, 4489); }
;
4490 return (TargetType)src;
4491 }
4492};
4493// Source bigger
4494template <typename SourceType, typename TargetType>
4495struct kmp_convert<SourceType, TargetType, false, false, true, false> {
4496 static TargetType to(SourceType src) {
4497 KMP_ASSERT(src >= 0)if (!(src >= 0)) { __kmp_debug_assert("src >= 0", "openmp/runtime/src/kmp.h"
, 4497); }
;
4498 KMP_ASSERT(src <= static_cast<SourceType>(if (!(src <= static_cast<SourceType>( (std::numeric_limits
<TargetType>::max)()))) { __kmp_debug_assert("src <= static_cast<SourceType>( (std::numeric_limits<TargetType>::max)())"
, "openmp/runtime/src/kmp.h", 4499); }
4499 (std::numeric_limits<TargetType>::max)()))if (!(src <= static_cast<SourceType>( (std::numeric_limits
<TargetType>::max)()))) { __kmp_debug_assert("src <= static_cast<SourceType>( (std::numeric_limits<TargetType>::max)())"
, "openmp/runtime/src/kmp.h", 4499); }
;
4500 return (TargetType)src;
4501 }
4502};
4503
4504// Source unsigned, Target signed
4505// Source smaller
4506template <typename SourceType, typename TargetType>
4507struct kmp_convert<SourceType, TargetType, true, false, false, true> {
4508 static TargetType to(SourceType src) { return (TargetType)src; }
4509};
4510// Source equal
4511template <typename SourceType, typename TargetType>
4512struct kmp_convert<SourceType, TargetType, false, true, false, true> {
4513 static TargetType to(SourceType src) {
4514 KMP_ASSERT(src <= static_cast<SourceType>(if (!(src <= static_cast<SourceType>( (std::numeric_limits
<TargetType>::max)()))) { __kmp_debug_assert("src <= static_cast<SourceType>( (std::numeric_limits<TargetType>::max)())"
, "openmp/runtime/src/kmp.h", 4515); }
4515 (std::numeric_limits<TargetType>::max)()))if (!(src <= static_cast<SourceType>( (std::numeric_limits
<TargetType>::max)()))) { __kmp_debug_assert("src <= static_cast<SourceType>( (std::numeric_limits<TargetType>::max)())"
, "openmp/runtime/src/kmp.h", 4515); }
;
4516 return (TargetType)src;
4517 }
4518};
4519// Source bigger
4520template <typename SourceType, typename TargetType>
4521struct kmp_convert<SourceType, TargetType, false, false, false, true> {
4522 static TargetType to(SourceType src) {
4523 KMP_ASSERT(src <= static_cast<SourceType>(if (!(src <= static_cast<SourceType>( (std::numeric_limits
<TargetType>::max)()))) { __kmp_debug_assert("src <= static_cast<SourceType>( (std::numeric_limits<TargetType>::max)())"
, "openmp/runtime/src/kmp.h", 4524); }
4524 (std::numeric_limits<TargetType>::max)()))if (!(src <= static_cast<SourceType>( (std::numeric_limits
<TargetType>::max)()))) { __kmp_debug_assert("src <= static_cast<SourceType>( (std::numeric_limits<TargetType>::max)())"
, "openmp/runtime/src/kmp.h", 4524); }
;
4525 return (TargetType)src;
4526 }
4527};
4528
4529// Source unsigned, Target unsigned
4530// Source smaller
4531template <typename SourceType, typename TargetType>
4532struct kmp_convert<SourceType, TargetType, true, false, false, false> {
4533 static TargetType to(SourceType src) { return (TargetType)src; }
4534};
4535// Source equal
4536template <typename SourceType, typename TargetType>
4537struct kmp_convert<SourceType, TargetType, false, true, false, false> {
4538 static TargetType to(SourceType src) { return src; }
4539};
4540// Source bigger
4541template <typename SourceType, typename TargetType>
4542struct kmp_convert<SourceType, TargetType, false, false, false, false> {
4543 static TargetType to(SourceType src) {
4544 KMP_ASSERT(src <= static_cast<SourceType>(if (!(src <= static_cast<SourceType>( (std::numeric_limits
<TargetType>::max)()))) { __kmp_debug_assert("src <= static_cast<SourceType>( (std::numeric_limits<TargetType>::max)())"
, "openmp/runtime/src/kmp.h", 4545); }
4545 (std::numeric_limits<TargetType>::max)()))if (!(src <= static_cast<SourceType>( (std::numeric_limits
<TargetType>::max)()))) { __kmp_debug_assert("src <= static_cast<SourceType>( (std::numeric_limits<TargetType>::max)())"
, "openmp/runtime/src/kmp.h", 4545); }
;
4546 return (TargetType)src;
4547 }
4548};
4549
4550template <typename T1, typename T2>
4551static inline void __kmp_type_convert(T1 src, T2 *dest) {
4552 *dest = kmp_convert<T1, T2>::to(src);
4553}
4554
4555#endif /* KMP_H */