Bug Summary

File:build/source/openmp/runtime/src/kmp_runtime.cpp
Warning:line 1750, column 11
2nd function call argument is an uninitialized value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name kmp_runtime.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-17/lib/clang/17 -D _DEBUG -D _GLIBCXX_ASSERTIONS -D _GNU_SOURCE -D _LIBCPP_ENABLE_ASSERTIONS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D omp_EXPORTS -I projects/openmp/runtime/src -I /build/source/openmp/runtime/src -I include -I /build/source/llvm/include -I /build/source/openmp/runtime/src/i18n -I /build/source/openmp/runtime/src/include -I /build/source/openmp/runtime/src/thirdparty/ittnotify -D _FORTIFY_SOURCE=2 -D NDEBUG -D _GNU_SOURCE -D _REENTRANT -D _FORTIFY_SOURCE=2 -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-17/lib/clang/17/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/source/= -source-date-epoch 1683717183 -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -Wno-extra -Wno-pedantic -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-frame-address -Wno-strict-aliasing -Wno-stringop-truncation -Wno-switch -Wno-uninitialized -Wno-cast-qual -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fno-rtti -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2023-05-10-133810-16478-1 -x c++ /build/source/openmp/runtime/src/kmp_runtime.cpp
1/*
2 * kmp_runtime.cpp -- KPTS runtime support library
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#include "kmp.h"
14#include "kmp_affinity.h"
15#include "kmp_atomic.h"
16#include "kmp_environment.h"
17#include "kmp_error.h"
18#include "kmp_i18n.h"
19#include "kmp_io.h"
20#include "kmp_itt.h"
21#include "kmp_settings.h"
22#include "kmp_stats.h"
23#include "kmp_str.h"
24#include "kmp_wait_release.h"
25#include "kmp_wrapper_getpid.h"
26#include "kmp_dispatch.h"
27#if KMP_USE_HIER_SCHED0
28#include "kmp_dispatch_hier.h"
29#endif
30
31#if OMPT_SUPPORT1
32#include "ompt-specific.h"
33#endif
34#if OMPD_SUPPORT1
35#include "ompd-specific.h"
36#endif
37
38#if OMP_PROFILING_SUPPORT0
39#include "llvm/Support/TimeProfiler.h"
40static char *ProfileTraceFile = nullptr;
41#endif
42
43/* these are temporary issues to be dealt with */
44#define KMP_USE_PRCTL0 0
45
46#if KMP_OS_WINDOWS0
47#include <process.h>
48#endif
49
50#if KMP_OS_WINDOWS0
51// windows does not need include files as it doesn't use shared memory
52#else
53#include <sys/mman.h>
54#include <sys/stat.h>
55#include <fcntl.h>
56#define SHM_SIZE1024 1024
57#endif
58
59#if defined(KMP_GOMP_COMPAT)
60char const __kmp_version_alt_comp[] =
61 KMP_VERSION_PREFIX"\x00@(#) " "LLVM OMP " "alternative compiler support: yes";
62#endif /* defined(KMP_GOMP_COMPAT) */
63
64char const __kmp_version_omp_api[] =
65 KMP_VERSION_PREFIX"\x00@(#) " "LLVM OMP " "API version: 5.0 (201611)";
66
67#ifdef KMP_DEBUG1
68char const __kmp_version_lock[] =
69 KMP_VERSION_PREFIX"\x00@(#) " "LLVM OMP " "lock type: run time selectable";
70#endif /* KMP_DEBUG */
71
72#define KMP_MIN(x, y)((x) < (y) ? (x) : (y)) ((x) < (y) ? (x) : (y))
73
74/* ------------------------------------------------------------------------ */
75
76#if KMP_USE_MONITOR
77kmp_info_t __kmp_monitor;
78#endif
79
80/* Forward declarations */
81
82void __kmp_cleanup(void);
83
84static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *, int tid,
85 int gtid);
86static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
87 kmp_internal_control_t *new_icvs,
88 ident_t *loc);
89#if KMP_AFFINITY_SUPPORTED1
90static void __kmp_partition_places(kmp_team_t *team,
91 int update_master_only = 0);
92#endif
93static void __kmp_do_serial_initialize(void);
94void __kmp_fork_barrier(int gtid, int tid);
95void __kmp_join_barrier(int gtid);
96void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc,
97 kmp_internal_control_t *new_icvs, ident_t *loc);
98
99#ifdef USE_LOAD_BALANCE1
100static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc);
101#endif
102
103static int __kmp_expand_threads(int nNeed);
104#if KMP_OS_WINDOWS0
105static int __kmp_unregister_root_other_thread(int gtid);
106#endif
107static void __kmp_reap_thread(kmp_info_t *thread, int is_root);
108kmp_info_t *__kmp_thread_pool_insert_pt = NULL__null;
109
110void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads,
111 int new_nthreads);
112void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads);
113
114/* Calculate the identifier of the current thread */
115/* fast (and somewhat portable) way to get unique identifier of executing
116 thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */
117int __kmp_get_global_thread_id() {
118 int i;
119 kmp_info_t **other_threads;
120 size_t stack_data;
121 char *stack_addr;
122 size_t stack_size;
123 char *stack_base;
124
125 KA_TRACE(if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n"
, __kmp_nth, __kmp_all_nth); }
126 1000,if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n"
, __kmp_nth, __kmp_all_nth); }
127 ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n"
, __kmp_nth, __kmp_all_nth); }
128 __kmp_nth, __kmp_all_nth))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n"
, __kmp_nth, __kmp_all_nth); }
;
129
130 /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to
131 a parallel region, made it return KMP_GTID_DNE to force serial_initialize
132 by caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee
133 __kmp_init_gtid for this to work. */
134
135 if (!TCR_4(__kmp_init_gtid)(__kmp_init_gtid))
136 return KMP_GTID_DNE(-2);
137
138#ifdef KMP_TDATA_GTID1
139 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) >= 3) {
140 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using TDATA\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: using TDATA\n"
); }
;
141 return __kmp_gtid;
142 }
143#endif
144 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) >= 2) {
145 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using keyed TLS\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: using keyed TLS\n"
); }
;
146 return __kmp_gtid_get_specific();
147 }
148 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using internal alg.\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: using internal alg.\n"
); }
;
149
150 stack_addr = (char *)&stack_data;
151 other_threads = __kmp_threads;
152
153 /* ATT: The code below is a source of potential bugs due to unsynchronized
154 access to __kmp_threads array. For example:
155 1. Current thread loads other_threads[i] to thr and checks it, it is
156 non-NULL.
157 2. Current thread is suspended by OS.
158 3. Another thread unregisters and finishes (debug versions of free()
159 may fill memory with something like 0xEF).
160 4. Current thread is resumed.
161 5. Current thread reads junk from *thr.
162 TODO: Fix it. --ln */
163
164 for (i = 0; i < __kmp_threads_capacity; i++) {
165
166 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i])((void *)(other_threads[i]));
167 if (!thr)
168 continue;
169
170 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize)((void *)(thr->th.th_info.ds.ds_stacksize));
171 stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase)((void *)(thr->th.th_info.ds.ds_stackbase));
172
173 /* stack grows down -- search through all of the active threads */
174
175 if (stack_addr <= stack_base) {
176 size_t stack_diff = stack_base - stack_addr;
177
178 if (stack_diff <= stack_size) {
179 /* The only way we can be closer than the allocated */
180 /* stack size is if we are running on this thread. */
181 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i)if (!(__kmp_gtid_get_specific() == i)) { __kmp_debug_assert("__kmp_gtid_get_specific() == i"
, "openmp/runtime/src/kmp_runtime.cpp", 181); }
;
182 return i;
183 }
184 }
185 }
186
187 /* get specific to try and determine our gtid */
188 KA_TRACE(1000,if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: internal alg. failed to find "
"thread, using TLS\n"); }
189 ("*** __kmp_get_global_thread_id: internal alg. failed to find "if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: internal alg. failed to find "
"thread, using TLS\n"); }
190 "thread, using TLS\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: internal alg. failed to find "
"thread, using TLS\n"); }
;
191 i = __kmp_gtid_get_specific();
192
193 /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */
194
195 /* if we havn't been assigned a gtid, then return code */
196 if (i < 0)
197 return i;
198
199 /* dynamically updated stack window for uber threads to avoid get_specific
200 call */
201 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
202 KMP_FATAL(StackOverflow, i)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_StackOverflow, i), __kmp_msg_null
)
;
203 }
204
205 stack_base = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
206 if (stack_addr > stack_base) {
207 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr)((other_threads[i]->th.th_info.ds.ds_stackbase)) = ((stack_addr
))
;
208 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,((other_threads[i]->th.th_info.ds.ds_stacksize)) = ((other_threads
[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base)
)
209 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -((other_threads[i]->th.th_info.ds.ds_stacksize)) = ((other_threads
[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base)
)
210 stack_base)((other_threads[i]->th.th_info.ds.ds_stacksize)) = ((other_threads
[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base)
)
;
211 } else {
212 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,((other_threads[i]->th.th_info.ds.ds_stacksize)) = ((stack_base
- stack_addr))
213 stack_base - stack_addr)((other_threads[i]->th.th_info.ds.ds_stacksize)) = ((stack_base
- stack_addr))
;
214 }
215
216 /* Reprint stack bounds for ubermaster since they have been refined */
217 if (__kmp_storage_map) {
218 char *stack_end = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
219 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
220 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
221 other_threads[i]->th.th_info.ds.ds_stacksize,
222 "th_%d stack (refinement)", i);
223 }
224 return i;
225}
226
227int __kmp_get_global_thread_id_reg() {
228 int gtid;
229
230 if (!__kmp_init_serial) {
231 gtid = KMP_GTID_DNE(-2);
232 } else
233#ifdef KMP_TDATA_GTID1
234 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) >= 3) {
235 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using TDATA\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id_reg: using TDATA\n"
); }
;
236 gtid = __kmp_gtid;
237 } else
238#endif
239 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) >= 2) {
240 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using keyed TLS\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id_reg: using keyed TLS\n"
); }
;
241 gtid = __kmp_gtid_get_specific();
242 } else {
243 KA_TRACE(1000,if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id_reg: using internal alg.\n"
); }
244 ("*** __kmp_get_global_thread_id_reg: using internal alg.\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id_reg: using internal alg.\n"
); }
;
245 gtid = __kmp_get_global_thread_id();
246 }
247
248 /* we must be a new uber master sibling thread */
249 if (gtid == KMP_GTID_DNE(-2)) {
250 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_get_global_thread_id_reg: Encountered new root thread. "
"Registering a new gtid.\n"); }
251 ("__kmp_get_global_thread_id_reg: Encountered new root thread. "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_get_global_thread_id_reg: Encountered new root thread. "
"Registering a new gtid.\n"); }
252 "Registering a new gtid.\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_get_global_thread_id_reg: Encountered new root thread. "
"Registering a new gtid.\n"); }
;
253 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
254 if (!__kmp_init_serial) {
255 __kmp_do_serial_initialize();
256 gtid = __kmp_gtid_get_specific();
257 } else {
258 gtid = __kmp_register_root(FALSE0);
259 }
260 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
261 /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */
262 }
263
264 KMP_DEBUG_ASSERT(gtid >= 0)if (!(gtid >= 0)) { __kmp_debug_assert("gtid >= 0", "openmp/runtime/src/kmp_runtime.cpp"
, 264); }
;
265
266 return gtid;
267}
268
269/* caller must hold forkjoin_lock */
270void __kmp_check_stack_overlap(kmp_info_t *th) {
271 int f;
272 char *stack_beg = NULL__null;
273 char *stack_end = NULL__null;
274 int gtid;
275
276 KA_TRACE(10, ("__kmp_check_stack_overlap: called\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_check_stack_overlap: called\n"
); }
;
277 if (__kmp_storage_map) {
278 stack_end = (char *)th->th.th_info.ds.ds_stackbase;
279 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
280
281 gtid = __kmp_gtid_from_thread(th);
282
283 if (gtid == KMP_GTID_MONITOR(-4)) {
284 __kmp_print_storage_map_gtid(
285 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
286 "th_%s stack (%s)", "mon",
287 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
288 } else {
289 __kmp_print_storage_map_gtid(
290 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
291 "th_%d stack (%s)", gtid,
292 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
293 }
294 }
295
296 /* No point in checking ubermaster threads since they use refinement and
297 * cannot overlap */
298 gtid = __kmp_gtid_from_thread(th);
299 if (__kmp_env_checks == TRUE(!0) && !KMP_UBER_GTID(gtid)) {
300 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_check_stack_overlap: performing extensive checking\n"
); }
301 ("__kmp_check_stack_overlap: performing extensive checking\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_check_stack_overlap: performing extensive checking\n"
); }
;
302 if (stack_beg == NULL__null) {
303 stack_end = (char *)th->th.th_info.ds.ds_stackbase;
304 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
305 }
306
307 for (f = 0; f < __kmp_threads_capacity; f++) {
308 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f])((void *)(__kmp_threads[f]));
309
310 if (f_th && f_th != th) {
311 char *other_stack_end =
312 (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase)((void *)(f_th->th.th_info.ds.ds_stackbase));
313 char *other_stack_beg =
314 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize)((void *)(f_th->th.th_info.ds.ds_stacksize));
315 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
316 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
317
318 /* Print the other stack values before the abort */
319 if (__kmp_storage_map)
320 __kmp_print_storage_map_gtid(
321 -1, other_stack_beg, other_stack_end,
322 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize)((void *)(f_th->th.th_info.ds.ds_stacksize)),
323 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
324
325 __kmp_fatal(KMP_MSG(StackOverlap)__kmp_msg_format(kmp_i18n_msg_StackOverlap), KMP_HNT(ChangeStackLimit)__kmp_msg_format(kmp_i18n_hnt_ChangeStackLimit),
326 __kmp_msg_null);
327 }
328 }
329 }
330 }
331 KA_TRACE(10, ("__kmp_check_stack_overlap: returning\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_check_stack_overlap: returning\n"
); }
;
332}
333
334/* ------------------------------------------------------------------------ */
335
336void __kmp_infinite_loop(void) {
337 static int done = FALSE0;
338
339 while (!done) {
340 KMP_YIELD(TRUE){ __kmp_x86_pause(); if (((!0)) && (((__kmp_use_yield
== 1) || (__kmp_use_yield == 2 && (((__kmp_nth) >
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))))))) __kmp_yield
(); }
;
341 }
342}
343
344#define MAX_MESSAGE512 512
345
346void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, size_t size,
347 char const *format, ...) {
348 char buffer[MAX_MESSAGE512];
349 va_list ap;
350
351 va_start(ap, format)__builtin_va_start(ap, format);
352 KMP_SNPRINTFsnprintf(buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1,
353 p2, (unsigned long)size, format);
354 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
355 __kmp_vprintf(kmp_err, buffer, ap);
356#if KMP_PRINT_DATA_PLACEMENT
357 int node;
358 if (gtid >= 0) {
359 if (p1 <= p2 && (char *)p2 - (char *)p1 == size) {
360 if (__kmp_storage_map_verbose) {
361 node = __kmp_get_host_node(p1);
362 if (node < 0) /* doesn't work, so don't try this next time */
363 __kmp_storage_map_verbose = FALSE0;
364 else {
365 char *last;
366 int lastNode;
367 int localProc = __kmp_get_cpu_from_gtid(gtid);
368
369 const int page_size = KMP_GET_PAGE_SIZE()getpagesize();
370
371 p1 = (void *)((size_t)p1 & ~((size_t)page_size - 1));
372 p2 = (void *)(((size_t)p2 - 1) & ~((size_t)page_size - 1));
373 if (localProc >= 0)
374 __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid,
375 localProc >> 1);
376 else
377 __kmp_printf_no_lock(" GTID %d\n", gtid);
378#if KMP_USE_PRCTL0
379 /* The more elaborate format is disabled for now because of the prctl
380 * hanging bug. */
381 do {
382 last = p1;
383 lastNode = node;
384 /* This loop collates adjacent pages with the same host node. */
385 do {
386 (char *)p1 += page_size;
387 } while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
388 __kmp_printf_no_lock(" %p-%p memNode %d\n", last, (char *)p1 - 1,
389 lastNode);
390 } while (p1 <= p2);
391#else
392 __kmp_printf_no_lock(" %p-%p memNode %d\n", p1,
393 (char *)p1 + (page_size - 1),
394 __kmp_get_host_node(p1));
395 if (p1 < p2) {
396 __kmp_printf_no_lock(" %p-%p memNode %d\n", p2,
397 (char *)p2 + (page_size - 1),
398 __kmp_get_host_node(p2));
399 }
400#endif
401 }
402 }
403 } else
404 __kmp_printf_no_lock(" %s\n", KMP_I18N_STR(StorageMapWarning)__kmp_i18n_catgets(kmp_i18n_str_StorageMapWarning));
405 }
406#endif /* KMP_PRINT_DATA_PLACEMENT */
407 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
408}
409
410void __kmp_warn(char const *format, ...) {
411 char buffer[MAX_MESSAGE512];
412 va_list ap;
413
414 if (__kmp_generate_warnings == kmp_warnings_off) {
415 return;
416 }
417
418 va_start(ap, format)__builtin_va_start(ap, format);
419
420 KMP_SNPRINTFsnprintf(buffer, sizeof(buffer), "OMP warning: %s\n", format);
421 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
422 __kmp_vprintf(kmp_err, buffer, ap);
423 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
424
425 va_end(ap)__builtin_va_end(ap);
426}
427
428void __kmp_abort_process() {
429 // Later threads may stall here, but that's ok because abort() will kill them.
430 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
431
432 if (__kmp_debug_buf) {
433 __kmp_dump_debug_buffer();
434 }
435
436 if (KMP_OS_WINDOWS0) {
437 // Let other threads know of abnormal termination and prevent deadlock
438 // if abort happened during library initialization or shutdown
439 __kmp_global.g.g_abort = SIGABRT6;
440
441 /* On Windows* OS by default abort() causes pop-up error box, which stalls
442 nightly testing. Unfortunately, we cannot reliably suppress pop-up error
443 boxes. _set_abort_behavior() works well, but this function is not
444 available in VS7 (this is not problem for DLL, but it is a problem for
445 static OpenMP RTL). SetErrorMode (and so, timelimit utility) does not
446 help, at least in some versions of MS C RTL.
447
448 It seems following sequence is the only way to simulate abort() and
449 avoid pop-up error box. */
450 raise(SIGABRT6);
451 _exit(3); // Just in case, if signal ignored, exit anyway.
452 } else {
453 __kmp_unregister_library();
454 abort();
455 }
456
457 __kmp_infinite_loop();
458 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
459
460} // __kmp_abort_process
461
462void __kmp_abort_thread(void) {
463 // TODO: Eliminate g_abort global variable and this function.
464 // In case of abort just call abort(), it will kill all the threads.
465 __kmp_infinite_loop();
466} // __kmp_abort_thread
467
468/* Print out the storage map for the major kmp_info_t thread data structures
469 that are allocated together. */
470
471static void __kmp_print_thread_storage_map(kmp_info_t *thr, int gtid) {
472 __kmp_print_storage_map_gtid(gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d",
473 gtid);
474
475 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
476 sizeof(kmp_desc_t), "th_%d.th_info", gtid);
477
478 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
479 sizeof(kmp_local_t), "th_%d.th_local", gtid);
480
481 __kmp_print_storage_map_gtid(
482 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
483 sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid);
484
485 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
486 &thr->th.th_bar[bs_plain_barrier + 1],
487 sizeof(kmp_balign_t), "th_%d.th_bar[plain]",
488 gtid);
489
490 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
491 &thr->th.th_bar[bs_forkjoin_barrier + 1],
492 sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]",
493 gtid);
494
495#if KMP_FAST_REDUCTION_BARRIER1
496 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
497 &thr->th.th_bar[bs_reduction_barrier + 1],
498 sizeof(kmp_balign_t), "th_%d.th_bar[reduction]",
499 gtid);
500#endif // KMP_FAST_REDUCTION_BARRIER
501}
502
503/* Print out the storage map for the major kmp_team_t team data structures
504 that are allocated together. */
505
506static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team,
507 int team_id, int num_thr) {
508 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
509 __kmp_print_storage_map_gtid(-1, team, team + 1, sizeof(kmp_team_t), "%s_%d",
510 header, team_id);
511
512 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
513 &team->t.t_bar[bs_last_barrier],
514 sizeof(kmp_balign_team_t) * bs_last_barrier,
515 "%s_%d.t_bar", header, team_id);
516
517 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
518 &team->t.t_bar[bs_plain_barrier + 1],
519 sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]",
520 header, team_id);
521
522 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
523 &team->t.t_bar[bs_forkjoin_barrier + 1],
524 sizeof(kmp_balign_team_t),
525 "%s_%d.t_bar[forkjoin]", header, team_id);
526
527#if KMP_FAST_REDUCTION_BARRIER1
528 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
529 &team->t.t_bar[bs_reduction_barrier + 1],
530 sizeof(kmp_balign_team_t),
531 "%s_%d.t_bar[reduction]", header, team_id);
532#endif // KMP_FAST_REDUCTION_BARRIER
533
534 __kmp_print_storage_map_gtid(
535 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
536 sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id);
537
538 __kmp_print_storage_map_gtid(
539 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
540 sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id);
541
542 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
543 &team->t.t_disp_buffer[num_disp_buff],
544 sizeof(dispatch_shared_info_t) * num_disp_buff,
545 "%s_%d.t_disp_buffer", header, team_id);
546}
547
548static void __kmp_init_allocator() {
549 __kmp_init_memkind();
550 __kmp_init_target_mem();
551}
552static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
553
554/* ------------------------------------------------------------------------ */
555
556#if ENABLE_LIBOMPTARGET1
557static void __kmp_init_omptarget() {
558 __kmp_init_target_task();
559}
560#endif
561
562/* ------------------------------------------------------------------------ */
563
564#if KMP_DYNAMIC_LIB1
565#if KMP_OS_WINDOWS0
566
567BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
568 //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
569
570 switch (fdwReason) {
571
572 case DLL_PROCESS_ATTACH:
573 KA_TRACE(10, ("DllMain: PROCESS_ATTACH\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("DllMain: PROCESS_ATTACH\n"
); }
;
574
575 return TRUE(!0);
576
577 case DLL_PROCESS_DETACH:
578 KA_TRACE(10, ("DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()))if (kmp_a_debug >= 10) { __kmp_debug_printf ("DllMain: PROCESS_DETACH T#%d\n"
, __kmp_gtid_get_specific()); }
;
579
580 // According to Windows* documentation for DllMain entry point:
581 // for DLL_PROCESS_DETACH, lpReserved is used for telling the difference:
582 // lpReserved == NULL when FreeLibrary() is called,
583 // lpReserved != NULL when the process is terminated.
584 // When FreeLibrary() is called, worker threads remain alive. So the
585 // runtime's state is consistent and executing proper shutdown is OK.
586 // When the process is terminated, worker threads have exited or been
587 // forcefully terminated by the OS and only the shutdown thread remains.
588 // This can leave the runtime in an inconsistent state.
589 // Hence, only attempt proper cleanup when FreeLibrary() is called.
590 // Otherwise, rely on OS to reclaim resources.
591 if (lpReserved == NULL__null)
592 __kmp_internal_end_library(__kmp_gtid_get_specific());
593
594 return TRUE(!0);
595
596 case DLL_THREAD_ATTACH:
597 KA_TRACE(10, ("DllMain: THREAD_ATTACH\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("DllMain: THREAD_ATTACH\n"
); }
;
598
599 /* if we want to register new siblings all the time here call
600 * __kmp_get_gtid(); */
601 return TRUE(!0);
602
603 case DLL_THREAD_DETACH:
604 KA_TRACE(10, ("DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()))if (kmp_a_debug >= 10) { __kmp_debug_printf ("DllMain: THREAD_DETACH T#%d\n"
, __kmp_gtid_get_specific()); }
;
605
606 __kmp_internal_end_thread(__kmp_gtid_get_specific());
607 return TRUE(!0);
608 }
609
610 return TRUE(!0);
611}
612
613#endif /* KMP_OS_WINDOWS */
614#endif /* KMP_DYNAMIC_LIB */
615
616/* __kmp_parallel_deo -- Wait until it's our turn. */
617void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
618 int gtid = *gtid_ref;
619#ifdef BUILD_PARALLEL_ORDERED1
620 kmp_team_t *team = __kmp_team_from_gtid(gtid);
621#endif /* BUILD_PARALLEL_ORDERED */
622
623 if (__kmp_env_consistency_check) {
624 if (__kmp_threads[gtid]->th.th_root->r.r_active)
625#if KMP_USE_DYNAMIC_LOCK1
626 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL__null, 0);
627#else
628 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL__null);
629#endif
630 }
631#ifdef BUILD_PARALLEL_ORDERED1
632 if (!team->t.t_serialized) {
633 KMP_MB();
634 KMP_WAIT__kmp_wait_4(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ__kmp_eq_4,
635 NULL__null);
636 KMP_MB();
637 }
638#endif /* BUILD_PARALLEL_ORDERED */
639}
640
641/* __kmp_parallel_dxo -- Signal the next task. */
642void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
643 int gtid = *gtid_ref;
644#ifdef BUILD_PARALLEL_ORDERED1
645 int tid = __kmp_tid_from_gtid(gtid);
646 kmp_team_t *team = __kmp_team_from_gtid(gtid);
647#endif /* BUILD_PARALLEL_ORDERED */
648
649 if (__kmp_env_consistency_check) {
650 if (__kmp_threads[gtid]->th.th_root->r.r_active)
651 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
652 }
653#ifdef BUILD_PARALLEL_ORDERED1
654 if (!team->t.t_serialized) {
655 KMP_MB(); /* Flush all pending memory write invalidates. */
656
657 /* use the tid of the next thread in this team */
658 /* TODO replace with general release procedure */
659 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
660
661 KMP_MB(); /* Flush all pending memory write invalidates. */
662 }
663#endif /* BUILD_PARALLEL_ORDERED */
664}
665
666/* ------------------------------------------------------------------------ */
667/* The BARRIER for a SINGLE process section is always explicit */
668
669int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws) {
670 int status;
671 kmp_info_t *th;
672 kmp_team_t *team;
673
674 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
675 __kmp_parallel_initialize();
676 __kmp_resume_if_soft_paused();
677
678 th = __kmp_threads[gtid];
679 team = th->th.th_team;
680 status = 0;
681
682 th->th.th_ident = id_ref;
683
684 if (team->t.t_serialized) {
685 status = 1;
686 } else {
687 kmp_int32 old_this = th->th.th_local.this_construct;
688
689 ++th->th.th_local.this_construct;
690 /* try to set team count to thread count--success means thread got the
691 single block */
692 /* TODO: Should this be acquire or release? */
693 if (team->t.t_construct == old_this) {
694 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
695 th->th.th_local.this_construct);
696 }
697#if USE_ITT_BUILD1
698 if (__itt_metadata_add_ptr__kmp_itt_metadata_add_ptr__3_0 && __kmp_forkjoin_frames_mode == 3 &&
699 KMP_MASTER_GTID(gtid)(0 == __kmp_tid_from_gtid((gtid))) && th->th.th_teams_microtask == NULL__null &&
700 team->t.t_active_level == 1) {
701 // Only report metadata by primary thread of active team at level 1
702 __kmp_itt_metadata_single(id_ref);
703 }
704#endif /* USE_ITT_BUILD */
705 }
706
707 if (__kmp_env_consistency_check) {
708 if (status && push_ws) {
709 __kmp_push_workshare(gtid, ct_psingle, id_ref);
710 } else {
711 __kmp_check_workshare(gtid, ct_psingle, id_ref);
712 }
713 }
714#if USE_ITT_BUILD1
715 if (status) {
716 __kmp_itt_single_start(gtid);
717 }
718#endif /* USE_ITT_BUILD */
719 return status;
720}
721
722void __kmp_exit_single(int gtid) {
723#if USE_ITT_BUILD1
724 __kmp_itt_single_end(gtid);
725#endif /* USE_ITT_BUILD */
726 if (__kmp_env_consistency_check)
727 __kmp_pop_workshare(gtid, ct_psingle, NULL__null);
728}
729
730/* determine if we can go parallel or must use a serialized parallel region and
731 * how many threads we can use
732 * set_nproc is the number of threads requested for the team
733 * returns 0 if we should serialize or only use one thread,
734 * otherwise the number of threads to use
735 * The forkjoin lock is held by the caller. */
736static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
737 int master_tid, int set_nthreads,
738 int enter_teams) {
739 int capacity;
740 int new_nthreads;
741 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 741); }
;
742 KMP_DEBUG_ASSERT(root && parent_team)if (!(root && parent_team)) { __kmp_debug_assert("root && parent_team"
, "openmp/runtime/src/kmp_runtime.cpp", 742); }
;
743 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
744
745 // If dyn-var is set, dynamically adjust the number of desired threads,
746 // according to the method specified by dynamic_mode.
747 new_nthreads = set_nthreads;
748 if (!get__dynamic_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.dynamic)
) {
749 ;
750 }
751#ifdef USE_LOAD_BALANCE1
752 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
753 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
754 if (new_nthreads == 1) {
755 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d load balance reduced "
"reservation to 1 thread\n", master_tid); }
756 "reservation to 1 thread\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d load balance reduced "
"reservation to 1 thread\n", master_tid); }
757 master_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d load balance reduced "
"reservation to 1 thread\n", master_tid); }
;
758 return 1;
759 }
760 if (new_nthreads < set_nthreads) {
761 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d load balance reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
762 "reservation to %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d load balance reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
763 master_tid, new_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d load balance reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
;
764 }
765 }
766#endif /* USE_LOAD_BALANCE */
767 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
768 new_nthreads = __kmp_avail_proc - __kmp_nth +
769 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
770 if (new_nthreads <= 1) {
771 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d thread limit reduced "
"reservation to 1 thread\n", master_tid); }
772 "reservation to 1 thread\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d thread limit reduced "
"reservation to 1 thread\n", master_tid); }
773 master_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d thread limit reduced "
"reservation to 1 thread\n", master_tid); }
;
774 return 1;
775 }
776 if (new_nthreads < set_nthreads) {
777 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d thread limit reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
778 "reservation to %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d thread limit reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
779 master_tid, new_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d thread limit reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
;
780 } else {
781 new_nthreads = set_nthreads;
782 }
783 } else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
784 if (set_nthreads > 2) {
785 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
786 new_nthreads = (new_nthreads % set_nthreads) + 1;
787 if (new_nthreads == 1) {
788 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d dynamic random reduced "
"reservation to 1 thread\n", master_tid); }
789 "reservation to 1 thread\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d dynamic random reduced "
"reservation to 1 thread\n", master_tid); }
790 master_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d dynamic random reduced "
"reservation to 1 thread\n", master_tid); }
;
791 return 1;
792 }
793 if (new_nthreads < set_nthreads) {
794 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d dynamic random reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
795 "reservation to %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d dynamic random reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
796 master_tid, new_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d dynamic random reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
;
797 }
798 }
799 } else {
800 KMP_ASSERT(0)if (!(0)) { __kmp_debug_assert("0", "openmp/runtime/src/kmp_runtime.cpp"
, 800); }
;
801 }
802
803 // Respect KMP_ALL_THREADS/KMP_DEVICE_THREAD_LIMIT.
804 if (__kmp_nth + new_nthreads -
805 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
806 __kmp_max_nth) {
807 int tl_nthreads = __kmp_max_nth - __kmp_nth +
808 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
809 if (tl_nthreads <= 0) {
810 tl_nthreads = 1;
811 }
812
813 // If dyn-var is false, emit a 1-time warning.
814 if (!get__dynamic_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.dynamic)
&& (!__kmp_reserve_warn)) {
815 __kmp_reserve_warn = 1;
816 __kmp_msg(kmp_ms_warning,
817 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, set_nthreads, tl_nthreads
)
,
818 KMP_HNT(Unset_ALL_THREADS)__kmp_msg_format(kmp_i18n_hnt_Unset_ALL_THREADS), __kmp_msg_null);
819 }
820 if (tl_nthreads == 1) {
821 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
"reduced reservation to 1 thread\n", master_tid); }
822 "reduced reservation to 1 thread\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
"reduced reservation to 1 thread\n", master_tid); }
823 master_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
"reduced reservation to 1 thread\n", master_tid); }
;
824 return 1;
825 }
826 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
"reservation to %d threads\n", master_tid, tl_nthreads); }
827 "reservation to %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
"reservation to %d threads\n", master_tid, tl_nthreads); }
828 master_tid, tl_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
"reservation to %d threads\n", master_tid, tl_nthreads); }
;
829 new_nthreads = tl_nthreads;
830 }
831
832 // Respect OMP_THREAD_LIMIT
833 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
834 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
835 if (cg_nthreads + new_nthreads -
836 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
837 max_cg_threads) {
838 int tl_nthreads = max_cg_threads - cg_nthreads +
839 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
840 if (tl_nthreads <= 0) {
841 tl_nthreads = 1;
842 }
843
844 // If dyn-var is false, emit a 1-time warning.
845 if (!get__dynamic_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.dynamic)
&& (!__kmp_reserve_warn)) {
846 __kmp_reserve_warn = 1;
847 __kmp_msg(kmp_ms_warning,
848 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, set_nthreads, tl_nthreads
)
,
849 KMP_HNT(Unset_ALL_THREADS)__kmp_msg_format(kmp_i18n_hnt_Unset_ALL_THREADS), __kmp_msg_null);
850 }
851 if (tl_nthreads == 1) {
852 KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
"reduced reservation to 1 thread\n", master_tid); }
853 "reduced reservation to 1 thread\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
"reduced reservation to 1 thread\n", master_tid); }
854 master_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
"reduced reservation to 1 thread\n", master_tid); }
;
855 return 1;
856 }
857 KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
"reservation to %d threads\n", master_tid, tl_nthreads); }
858 "reservation to %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
"reservation to %d threads\n", master_tid, tl_nthreads); }
859 master_tid, tl_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
"reservation to %d threads\n", master_tid, tl_nthreads); }
;
860 new_nthreads = tl_nthreads;
861 }
862
863 // Check if the threads array is large enough, or needs expanding.
864 // See comment in __kmp_register_root() about the adjustment if
865 // __kmp_threads[0] == NULL.
866 capacity = __kmp_threads_capacity;
867 if (TCR_PTR(__kmp_threads[0])((void *)(__kmp_threads[0])) == NULL__null) {
868 --capacity;
869 }
870 // If it is not for initializing the hidden helper team, we need to take
871 // __kmp_hidden_helper_threads_num out of the capacity because it is included
872 // in __kmp_threads_capacity.
873 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)(__kmp_init_hidden_helper_threads)) {
874 capacity -= __kmp_hidden_helper_threads_num;
875 }
876 if (__kmp_nth + new_nthreads -
877 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
878 capacity) {
879 // Expand the threads array.
880 int slotsRequired = __kmp_nth + new_nthreads -
881 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
882 capacity;
883 int slotsAdded = __kmp_expand_threads(slotsRequired);
884 if (slotsAdded < slotsRequired) {
885 // The threads array was not expanded enough.
886 new_nthreads -= (slotsRequired - slotsAdded);
887 KMP_ASSERT(new_nthreads >= 1)if (!(new_nthreads >= 1)) { __kmp_debug_assert("new_nthreads >= 1"
, "openmp/runtime/src/kmp_runtime.cpp", 887); }
;
888
889 // If dyn-var is false, emit a 1-time warning.
890 if (!get__dynamic_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.dynamic)
&& (!__kmp_reserve_warn)) {
891 __kmp_reserve_warn = 1;
892 if (__kmp_tp_cached) {
893 __kmp_msg(kmp_ms_warning,
894 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, set_nthreads, new_nthreads
)
,
895 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity)__kmp_msg_format(kmp_i18n_hnt_Set_ALL_THREADPRIVATE, __kmp_tp_capacity
)
,
896 KMP_HNT(PossibleSystemLimitOnThreads)__kmp_msg_format(kmp_i18n_hnt_PossibleSystemLimitOnThreads), __kmp_msg_null);
897 } else {
898 __kmp_msg(kmp_ms_warning,
899 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, set_nthreads, new_nthreads
)
,
900 KMP_HNT(SystemLimitOnThreads)__kmp_msg_format(kmp_i18n_hnt_SystemLimitOnThreads), __kmp_msg_null);
901 }
902 }
903 }
904 }
905
906#ifdef KMP_DEBUG1
907 if (new_nthreads == 1) {
908 KC_TRACE(10,if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d serializing team after reclaiming "
"dead roots and rechecking; requested %d threads\n", __kmp_get_global_thread_id
(), set_nthreads); }
909 ("__kmp_reserve_threads: T#%d serializing team after reclaiming "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d serializing team after reclaiming "
"dead roots and rechecking; requested %d threads\n", __kmp_get_global_thread_id
(), set_nthreads); }
910 "dead roots and rechecking; requested %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d serializing team after reclaiming "
"dead roots and rechecking; requested %d threads\n", __kmp_get_global_thread_id
(), set_nthreads); }
911 __kmp_get_gtid(), set_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d serializing team after reclaiming "
"dead roots and rechecking; requested %d threads\n", __kmp_get_global_thread_id
(), set_nthreads); }
;
912 } else {
913 KC_TRACE(10, ("__kmp_reserve_threads: T#%d allocating %d threads; requested"if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d allocating %d threads; requested"
" %d threads\n", __kmp_get_global_thread_id(), new_nthreads,
set_nthreads); }
914 " %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d allocating %d threads; requested"
" %d threads\n", __kmp_get_global_thread_id(), new_nthreads,
set_nthreads); }
915 __kmp_get_gtid(), new_nthreads, set_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d allocating %d threads; requested"
" %d threads\n", __kmp_get_global_thread_id(), new_nthreads,
set_nthreads); }
;
916 }
917#endif // KMP_DEBUG
918 return new_nthreads;
919}
920
921/* Allocate threads from the thread pool and assign them to the new team. We are
922 assured that there are enough threads available, because we checked on that
923 earlier within critical section forkjoin */
924static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
925 kmp_info_t *master_th, int master_gtid,
926 int fork_teams_workers) {
927 int i;
928 int use_hot_team;
929
930 KA_TRACE(10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_team_threads: new_nprocs = %d\n"
, team->t.t_nproc); }
;
931 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid())if (!(master_gtid == __kmp_get_global_thread_id())) { __kmp_debug_assert
("master_gtid == __kmp_get_global_thread_id()", "openmp/runtime/src/kmp_runtime.cpp"
, 931); }
;
932 KMP_MB();
933
934 /* first, let's setup the primary thread */
935 master_th->th.th_info.ds.ds_tid = 0;
936 master_th->th.th_team = team;
937 master_th->th.th_team_nproc = team->t.t_nproc;
938 master_th->th.th_team_master = master_th;
939 master_th->th.th_team_serialized = FALSE0;
940 master_th->th.th_dispatch = &team->t.t_dispatch[0];
941
942/* make sure we are not the optimized hot team */
943#if KMP_NESTED_HOT_TEAMS1
944 use_hot_team = 0;
945 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
946 if (hot_teams) { // hot teams array is not allocated if
947 // KMP_HOT_TEAMS_MAX_LEVEL=0
948 int level = team->t.t_active_level - 1; // index in array of hot teams
949 if (master_th->th.th_teams_microtask) { // are we inside the teams?
950 if (master_th->th.th_teams_size.nteams > 1) {
951 ++level; // level was not increased in teams construct for
952 // team_of_masters
953 }
954 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
955 master_th->th.th_teams_level == team->t.t_level) {
956 ++level; // level was not increased in teams construct for
957 // team_of_workers before the parallel
958 } // team->t.t_level will be increased inside parallel
959 }
960 if (level < __kmp_hot_teams_max_level) {
961 if (hot_teams[level].hot_team) {
962 // hot team has already been allocated for given level
963 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team)if (!(hot_teams[level].hot_team == team)) { __kmp_debug_assert
("hot_teams[level].hot_team == team", "openmp/runtime/src/kmp_runtime.cpp"
, 963); }
;
964 use_hot_team = 1; // the team is ready to use
965 } else {
966 use_hot_team = 0; // AC: threads are not allocated yet
967 hot_teams[level].hot_team = team; // remember new hot team
968 hot_teams[level].hot_team_nth = team->t.t_nproc;
969 }
970 } else {
971 use_hot_team = 0;
972 }
973 }
974#else
975 use_hot_team = team == root->r.r_hot_team;
976#endif
977 if (!use_hot_team) {
978
979 /* install the primary thread */
980 team->t.t_threads[0] = master_th;
981 __kmp_initialize_info(master_th, team, 0, master_gtid);
982
983 /* now, install the worker threads */
984 for (i = 1; i < team->t.t_nproc; i++) {
985
986 /* fork or reallocate a new thread and install it in team */
987 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
988 team->t.t_threads[i] = thr;
989 KMP_DEBUG_ASSERT(thr)if (!(thr)) { __kmp_debug_assert("thr", "openmp/runtime/src/kmp_runtime.cpp"
, 989); }
;
990 KMP_DEBUG_ASSERT(thr->th.th_team == team)if (!(thr->th.th_team == team)) { __kmp_debug_assert("thr->th.th_team == team"
, "openmp/runtime/src/kmp_runtime.cpp", 990); }
;
991 /* align team and thread arrived states */
992 KA_TRACE(20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
"T#%d(%d:%d) join =%llu, plain=%llu\n", __kmp_gtid_from_tid(
0, team), team->t.t_id, 0, __kmp_gtid_from_tid(i, team), team
->t.t_id, i, team->t.t_bar[bs_forkjoin_barrier].b_arrived
, team->t.t_bar[bs_plain_barrier].b_arrived); }
993 "T#%d(%d:%d) join =%llu, plain=%llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
"T#%d(%d:%d) join =%llu, plain=%llu\n", __kmp_gtid_from_tid(
0, team), team->t.t_id, 0, __kmp_gtid_from_tid(i, team), team
->t.t_id, i, team->t.t_bar[bs_forkjoin_barrier].b_arrived
, team->t.t_bar[bs_plain_barrier].b_arrived); }
994 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
"T#%d(%d:%d) join =%llu, plain=%llu\n", __kmp_gtid_from_tid(
0, team), team->t.t_id, 0, __kmp_gtid_from_tid(i, team), team
->t.t_id, i, team->t.t_bar[bs_forkjoin_barrier].b_arrived
, team->t.t_bar[bs_plain_barrier].b_arrived); }
995 __kmp_gtid_from_tid(i, team), team->t.t_id, i,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
"T#%d(%d:%d) join =%llu, plain=%llu\n", __kmp_gtid_from_tid(
0, team), team->t.t_id, 0, __kmp_gtid_from_tid(i, team), team
->t.t_id, i, team->t.t_bar[bs_forkjoin_barrier].b_arrived
, team->t.t_bar[bs_plain_barrier].b_arrived); }
996 team->t.t_bar[bs_forkjoin_barrier].b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
"T#%d(%d:%d) join =%llu, plain=%llu\n", __kmp_gtid_from_tid(
0, team), team->t.t_id, 0, __kmp_gtid_from_tid(i, team), team
->t.t_id, i, team->t.t_bar[bs_forkjoin_barrier].b_arrived
, team->t.t_bar[bs_plain_barrier].b_arrived); }
997 team->t.t_bar[bs_plain_barrier].b_arrived))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
"T#%d(%d:%d) join =%llu, plain=%llu\n", __kmp_gtid_from_tid(
0, team), team->t.t_id, 0, __kmp_gtid_from_tid(i, team), team
->t.t_id, i, team->t.t_bar[bs_forkjoin_barrier].b_arrived
, team->t.t_bar[bs_plain_barrier].b_arrived); }
;
998 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
999 thr->th.th_teams_level = master_th->th.th_teams_level;
1000 thr->th.th_teams_size = master_th->th.th_teams_size;
1001 { // Initialize threads' barrier data.
1002 int b;
1003 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1004 for (b = 0; b < bs_last_barrier; ++b) {
1005 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1006 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG)if (!(balign[b].bb.wait_flag != 2)) { __kmp_debug_assert("balign[b].bb.wait_flag != 2"
, "openmp/runtime/src/kmp_runtime.cpp", 1006); }
;
1007#if USE_DEBUGGER0
1008 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1009#endif
1010 }
1011 }
1012 }
1013
1014#if KMP_AFFINITY_SUPPORTED1
1015 // Do not partition the places list for teams construct workers who
1016 // haven't actually been forked to do real work yet. This partitioning
1017 // will take place in the parallel region nested within the teams construct.
1018 if (!fork_teams_workers) {
1019 __kmp_partition_places(team);
1020 }
1021#endif
1022
1023 if (team->t.t_nproc > 1 &&
1024 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1025 team->t.b->update_num_threads(team->t.t_nproc);
1026 __kmp_add_threads_to_team(team, team->t.t_nproc);
1027 }
1028 }
1029
1030 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1031 for (i = 0; i < team->t.t_nproc; i++) {
1032 kmp_info_t *thr = team->t.t_threads[i];
1033 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1034 thr->th.th_prev_level != team->t.t_level) {
1035 team->t.t_display_affinity = 1;
1036 break;
1037 }
1038 }
1039 }
1040
1041 KMP_MB();
1042}
1043
1044#if KMP_ARCH_X860 || KMP_ARCH_X86_641
1045// Propagate any changes to the floating point control registers out to the team
1046// We try to avoid unnecessary writes to the relevant cache line in the team
1047// structure, so we don't make changes unless they are needed.
1048inline static void propagateFPControl(kmp_team_t *team) {
1049 if (__kmp_inherit_fp_control) {
1050 kmp_int16 x87_fpu_control_word;
1051 kmp_uint32 mxcsr;
1052
1053 // Get primary thread's values of FPU control flags (both X87 and vector)
1054 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1055 __kmp_store_mxcsr(&mxcsr);
1056 mxcsr &= KMP_X86_MXCSR_MASK0xffffffc0;
1057
1058 // There is no point looking at t_fp_control_saved here.
1059 // If it is TRUE, we still have to update the values if they are different
1060 // from those we now have. If it is FALSE we didn't save anything yet, but
1061 // our objective is the same. We have to ensure that the values in the team
1062 // are the same as those we have.
1063 // So, this code achieves what we need whether or not t_fp_control_saved is
1064 // true. By checking whether the value needs updating we avoid unnecessary
1065 // writes that would put the cache-line into a written state, causing all
1066 // threads in the team to have to read it again.
1067 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word)if ((team->t.t_x87_fpu_control_word) != (x87_fpu_control_word
)) (team->t.t_x87_fpu_control_word) = (x87_fpu_control_word
)
;
1068 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr)if ((team->t.t_mxcsr) != (mxcsr)) (team->t.t_mxcsr) = (
mxcsr)
;
1069 // Although we don't use this value, other code in the runtime wants to know
1070 // whether it should restore them. So we must ensure it is correct.
1071 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE)if ((team->t.t_fp_control_saved) != ((!0))) (team->t.t_fp_control_saved
) = ((!0))
;
1072 } else {
1073 // Similarly here. Don't write to this cache-line in the team structure
1074 // unless we have to.
1075 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE)if ((team->t.t_fp_control_saved) != (0)) (team->t.t_fp_control_saved
) = (0)
;
1076 }
1077}
1078
1079// Do the opposite, setting the hardware registers to the updated values from
1080// the team.
1081inline static void updateHWFPControl(kmp_team_t *team) {
1082 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1083 // Only reset the fp control regs if they have been changed in the team.
1084 // the parallel region that we are exiting.
1085 kmp_int16 x87_fpu_control_word;
1086 kmp_uint32 mxcsr;
1087 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1088 __kmp_store_mxcsr(&mxcsr);
1089 mxcsr &= KMP_X86_MXCSR_MASK0xffffffc0;
1090
1091 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1092 __kmp_clear_x87_fpu_status_word();
1093 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1094 }
1095
1096 if (team->t.t_mxcsr != mxcsr) {
1097 __kmp_load_mxcsr(&team->t.t_mxcsr);
1098 }
1099 }
1100}
1101#else
1102#define propagateFPControl(x) ((void)0)
1103#define updateHWFPControl(x) ((void)0)
1104#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1105
1106static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team,
1107 int realloc); // forward declaration
1108
1109/* Run a parallel region that has been serialized, so runs only in a team of the
1110 single primary thread. */
1111void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
1112 kmp_info_t *this_thr;
1113 kmp_team_t *serial_team;
1114
1115 KC_TRACE(10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: called by T#%d\n"
, global_tid); }
;
1116
1117 /* Skip all this code for autopar serialized loops since it results in
1118 unacceptable overhead */
1119 if (loc != NULL__null && (loc->flags & KMP_IDENT_AUTOPAR))
1120 return;
1121
1122 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
1123 __kmp_parallel_initialize();
1124 __kmp_resume_if_soft_paused();
1125
1126 this_thr = __kmp_threads[global_tid];
1127 serial_team = this_thr->th.th_serial_team;
1128
1129 /* utilize the serialized team held by this thread */
1130 KMP_DEBUG_ASSERT(serial_team)if (!(serial_team)) { __kmp_debug_assert("serial_team", "openmp/runtime/src/kmp_runtime.cpp"
, 1130); }
;
1131 KMP_MB();
1132
1133 if (__kmp_tasking_mode != tskm_immediate_exec) {
1134 KMP_DEBUG_ASSERT(if (!(this_thr->th.th_task_team == this_thr->th.th_team
->t.t_task_team[this_thr->th.th_task_state])) { __kmp_debug_assert
("this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 1136); }
1135 this_thr->th.th_task_team ==if (!(this_thr->th.th_task_team == this_thr->th.th_team
->t.t_task_team[this_thr->th.th_task_state])) { __kmp_debug_assert
("this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 1136); }
1136 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state])if (!(this_thr->th.th_task_team == this_thr->th.th_team
->t.t_task_team[this_thr->th.th_task_state])) { __kmp_debug_assert
("this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 1136); }
;
1137 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==if (!(serial_team->t.t_task_team[this_thr->th.th_task_state
] == __null)) { __kmp_debug_assert("serial_team->t.t_task_team[this_thr->th.th_task_state] == __null"
, "openmp/runtime/src/kmp_runtime.cpp", 1138); }
1138 NULL)if (!(serial_team->t.t_task_team[this_thr->th.th_task_state
] == __null)) { __kmp_debug_assert("serial_team->t.t_task_team[this_thr->th.th_task_state] == __null"
, "openmp/runtime/src/kmp_runtime.cpp", 1138); }
;
1139 KA_TRACE(20, ("__kmpc_serialized_parallel: T#%d pushing task_team %p / "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d pushing task_team %p / "
"team %p, new task_team = NULL\n", global_tid, this_thr->
th.th_task_team, this_thr->th.th_team); }
1140 "team %p, new task_team = NULL\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d pushing task_team %p / "
"team %p, new task_team = NULL\n", global_tid, this_thr->
th.th_task_team, this_thr->th.th_team); }
1141 global_tid, this_thr->th.th_task_team, this_thr->th.th_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d pushing task_team %p / "
"team %p, new task_team = NULL\n", global_tid, this_thr->
th.th_task_team, this_thr->th.th_team); }
;
1142 this_thr->th.th_task_team = NULL__null;
1143 }
1144
1145 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1146 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1147 proc_bind = proc_bind_false;
1148 } else if (proc_bind == proc_bind_default) {
1149 // No proc_bind clause was specified, so use the current value
1150 // of proc-bind-var for this parallel region.
1151 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1152 }
1153 // Reset for next parallel region
1154 this_thr->th.th_set_proc_bind = proc_bind_default;
1155
1156#if OMPT_SUPPORT1
1157 ompt_data_t ompt_parallel_data = ompt_data_none{0};
1158 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid)__ompt_load_return_address(global_tid);
1159 if (ompt_enabled.enabled &&
1160 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1161
1162 ompt_task_info_t *parent_task_info;
1163 parent_task_info = OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info));
1164
1165 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
1166 if (ompt_enabled.ompt_callback_parallel_begin) {
1167 int team_size = 1;
1168
1169 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)ompt_callback_parallel_begin_callback(
1170 &(parent_task_info->task_data), &(parent_task_info->frame),
1171 &ompt_parallel_data, team_size,
1172 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1173 }
1174 }
1175#endif // OMPT_SUPPORT
1176
1177 if (this_thr->th.th_team != serial_team) {
1178 // Nested level will be an index in the nested nthreads array
1179 int level = this_thr->th.th_team->t.t_level;
1180
1181 if (serial_team->t.t_serialized) {
1182 /* this serial team was already used
1183 TODO increase performance by making this locks more specific */
1184 kmp_team_t *new_team;
1185
1186 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1187
1188 new_team =
1189 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1190#if OMPT_SUPPORT1
1191 ompt_parallel_data,
1192#endif
1193 proc_bind, &this_thr->th.th_current_task->td_icvs,
1194 0 USE_NESTED_HOT_ARG(NULL), __null);
1195 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1196 KMP_ASSERT(new_team)if (!(new_team)) { __kmp_debug_assert("new_team", "openmp/runtime/src/kmp_runtime.cpp"
, 1196); }
;
1197
1198 /* setup new serialized team and install it */
1199 new_team->t.t_threads[0] = this_thr;
1200 new_team->t.t_parent = this_thr->th.th_team;
1201 serial_team = new_team;
1202 this_thr->th.th_serial_team = serial_team;
1203
1204 KF_TRACE(if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n"
, global_tid, serial_team); }
1205 10,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n"
, global_tid, serial_team); }
1206 ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n"
, global_tid, serial_team); }
1207 global_tid, serial_team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n"
, global_tid, serial_team); }
;
1208
1209 /* TODO the above breaks the requirement that if we run out of resources,
1210 then we can still guarantee that serialized teams are ok, since we may
1211 need to allocate a new one */
1212 } else {
1213 KF_TRACE(if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n"
, global_tid, serial_team); }
1214 10,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n"
, global_tid, serial_team); }
1215 ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n"
, global_tid, serial_team); }
1216 global_tid, serial_team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n"
, global_tid, serial_team); }
;
1217 }
1218
1219 /* we have to initialize this serial team */
1220 KMP_DEBUG_ASSERT(serial_team->t.t_threads)if (!(serial_team->t.t_threads)) { __kmp_debug_assert("serial_team->t.t_threads"
, "openmp/runtime/src/kmp_runtime.cpp", 1220); }
;
1221 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr)if (!(serial_team->t.t_threads[0] == this_thr)) { __kmp_debug_assert
("serial_team->t.t_threads[0] == this_thr", "openmp/runtime/src/kmp_runtime.cpp"
, 1221); }
;
1222 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team)if (!(this_thr->th.th_team != serial_team)) { __kmp_debug_assert
("this_thr->th.th_team != serial_team", "openmp/runtime/src/kmp_runtime.cpp"
, 1222); }
;
1223 serial_team->t.t_ident = loc;
1224 serial_team->t.t_serialized = 1;
1225 serial_team->t.t_nproc = 1;
1226 serial_team->t.t_parent = this_thr->th.th_team;
1227 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1228 this_thr->th.th_team = serial_team;
1229 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1230
1231 KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d curtask=%p\n"
, global_tid, this_thr->th.th_current_task); }
1232 this_thr->th.th_current_task))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d curtask=%p\n"
, global_tid, this_thr->th.th_current_task); }
;
1233 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1)if (!(this_thr->th.th_current_task->td_flags.executing ==
1)) { __kmp_debug_assert("this_thr->th.th_current_task->td_flags.executing == 1"
, "openmp/runtime/src/kmp_runtime.cpp", 1233); }
;
1234 this_thr->th.th_current_task->td_flags.executing = 0;
1235
1236 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1237
1238 /* TODO: GEH: do ICVs work for nested serialized teams? Don't we need an
1239 implicit task for each serialized task represented by
1240 team->t.t_serialized? */
1241 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1242 &this_thr->th.th_current_task->td_parent->td_icvs);
1243
1244 // Thread value exists in the nested nthreads array for the next nested
1245 // level
1246 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1247 this_thr->th.th_current_task->td_icvs.nproc =
1248 __kmp_nested_nth.nth[level + 1];
1249 }
1250
1251 if (__kmp_nested_proc_bind.used &&
1252 (level + 1 < __kmp_nested_proc_bind.used)) {
1253 this_thr->th.th_current_task->td_icvs.proc_bind =
1254 __kmp_nested_proc_bind.bind_types[level + 1];
1255 }
1256
1257#if USE_DEBUGGER0
1258 serial_team->t.t_pkfn = (microtask_t)(~0); // For the debugger.
1259#endif
1260 this_thr->th.th_info.ds.ds_tid = 0;
1261
1262 /* set thread cache values */
1263 this_thr->th.th_team_nproc = 1;
1264 this_thr->th.th_team_master = this_thr;
1265 this_thr->th.th_team_serialized = 1;
1266
1267 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1268 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1269 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator; // save
1270
1271 propagateFPControl(serial_team);
1272
1273 /* check if we need to allocate dispatch buffers stack */
1274 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch)if (!(serial_team->t.t_dispatch)) { __kmp_debug_assert("serial_team->t.t_dispatch"
, "openmp/runtime/src/kmp_runtime.cpp", 1274); }
;
1275 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1276 serial_team->t.t_dispatch->th_disp_buffer =
1277 (dispatch_private_info_t *)__kmp_allocate(___kmp_allocate((sizeof(dispatch_private_info_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 1278)
1278 sizeof(dispatch_private_info_t))___kmp_allocate((sizeof(dispatch_private_info_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 1278)
;
1279 }
1280 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1281
1282 KMP_MB();
1283
1284 } else {
1285 /* this serialized team is already being used,
1286 * that's fine, just add another nested level */
1287 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team)if (!(this_thr->th.th_team == serial_team)) { __kmp_debug_assert
("this_thr->th.th_team == serial_team", "openmp/runtime/src/kmp_runtime.cpp"
, 1287); }
;
1288 KMP_DEBUG_ASSERT(serial_team->t.t_threads)if (!(serial_team->t.t_threads)) { __kmp_debug_assert("serial_team->t.t_threads"
, "openmp/runtime/src/kmp_runtime.cpp", 1288); }
;
1289 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr)if (!(serial_team->t.t_threads[0] == this_thr)) { __kmp_debug_assert
("serial_team->t.t_threads[0] == this_thr", "openmp/runtime/src/kmp_runtime.cpp"
, 1289); }
;
1290 ++serial_team->t.t_serialized;
1291 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1292
1293 // Nested level will be an index in the nested nthreads array
1294 int level = this_thr->th.th_team->t.t_level;
1295 // Thread value exists in the nested nthreads array for the next nested
1296 // level
1297 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1298 this_thr->th.th_current_task->td_icvs.nproc =
1299 __kmp_nested_nth.nth[level + 1];
1300 }
1301 serial_team->t.t_level++;
1302 KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d increasing nesting level "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d increasing nesting level "
"of serial team %p to %d\n", global_tid, serial_team, serial_team
->t.t_level); }
1303 "of serial team %p to %d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d increasing nesting level "
"of serial team %p to %d\n", global_tid, serial_team, serial_team
->t.t_level); }
1304 global_tid, serial_team, serial_team->t.t_level))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d increasing nesting level "
"of serial team %p to %d\n", global_tid, serial_team, serial_team
->t.t_level); }
;
1305
1306 /* allocate/push dispatch buffers stack */
1307 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch)if (!(serial_team->t.t_dispatch)) { __kmp_debug_assert("serial_team->t.t_dispatch"
, "openmp/runtime/src/kmp_runtime.cpp", 1307); }
;
1308 {
1309 dispatch_private_info_t *disp_buffer =
1310 (dispatch_private_info_t *)__kmp_allocate(___kmp_allocate((sizeof(dispatch_private_info_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 1311)
1311 sizeof(dispatch_private_info_t))___kmp_allocate((sizeof(dispatch_private_info_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 1311)
;
1312 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1313 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1314 }
1315 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1316
1317 KMP_MB();
1318 }
1319 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq)if ((serial_team->t.t_cancel_request) != (cancel_noreq)) (
serial_team->t.t_cancel_request) = (cancel_noreq)
;
1320
1321 // Perform the display affinity functionality for
1322 // serialized parallel regions
1323 if (__kmp_display_affinity) {
1324 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1325 this_thr->th.th_prev_num_threads != 1) {
1326 // NULL means use the affinity-format-var ICV
1327 __kmp_aux_display_affinity(global_tid, NULL__null);
1328 this_thr->th.th_prev_level = serial_team->t.t_level;
1329 this_thr->th.th_prev_num_threads = 1;
1330 }
1331 }
1332
1333 if (__kmp_env_consistency_check)
1334 __kmp_push_parallel(global_tid, NULL__null);
1335#if OMPT_SUPPORT1
1336 serial_team->t.ompt_team_info.master_return_address = codeptr;
1337 if (ompt_enabled.enabled &&
1338 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1339 OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info))->frame.exit_frame.ptr =
1340 OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
1341
1342 ompt_lw_taskteam_t lw_taskteam;
1343 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1344 &ompt_parallel_data, codeptr);
1345
1346 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1347 // don't use lw_taskteam after linking. content was swaped
1348
1349 /* OMPT implicit task begin */
1350 if (ompt_enabled.ompt_callback_implicit_task) {
1351 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1352 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr)(&(this_thr->th.th_team->t.ompt_team_info.parallel_data
))
,
1353 OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data
))
, 1, __kmp_tid_from_gtid(global_tid),
1354 ompt_task_implicit); // TODO: Can this be ompt_task_initial?
1355 OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info))->thread_num =
1356 __kmp_tid_from_gtid(global_tid);
1357 }
1358
1359 /* OMPT state */
1360 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1361 OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info))->frame.exit_frame.ptr =
1362 OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
1363 }
1364#endif
1365}
1366
1367// Test if this fork is for a team closely nested in a teams construct
1368static inline bool __kmp_is_fork_in_teams(kmp_info_t *master_th,
1369 microtask_t microtask, int level,
1370 int teams_level, kmp_va_list ap) {
1371 return (master_th->th.th_teams_microtask && ap &&
1372 microtask != (microtask_t)__kmp_teams_master && level == teams_level);
1373}
1374
1375// Test if this fork is for the teams construct, i.e. to form the outer league
1376// of teams
1377static inline bool __kmp_is_entering_teams(int active_level, int level,
1378 int teams_level, kmp_va_list ap) {
1379 return ((ap == NULL__null && active_level == 0) ||
1380 (ap && teams_level > 0 && teams_level == level));
1381}
1382
1383// AC: This is start of parallel that is nested inside teams construct.
1384// The team is actual (hot), all workers are ready at the fork barrier.
1385// No lock needed to initialize the team a bit, then free workers.
1386static inline int
1387__kmp_fork_in_teams(ident_t *loc, int gtid, kmp_team_t *parent_team,
1388 kmp_int32 argc, kmp_info_t *master_th, kmp_root_t *root,
1389 enum fork_context_e call_context, microtask_t microtask,
1390 launch_t invoker, int master_set_numthreads, int level,
1391#if OMPT_SUPPORT1
1392 ompt_data_t ompt_parallel_data, void *return_address,
1393#endif
1394 kmp_va_list ap) {
1395 void **argv;
1396 int i;
1397
1398 parent_team->t.t_ident = loc;
1399 __kmp_alloc_argv_entries(argc, parent_team, TRUE(!0));
1400 parent_team->t.t_argc = argc;
1401 argv = (void **)parent_team->t.t_argv;
1402 for (i = argc - 1; i >= 0; --i) {
1403 *argv++ = va_arg(kmp_va_deref(ap), void *)__builtin_va_arg((*(ap)), void *);
1404 }
1405 // Increment our nested depth levels, but not increase the serialization
1406 if (parent_team == master_th->th.th_serial_team) {
1407 // AC: we are in serialized parallel
1408 __kmpc_serialized_parallel(loc, gtid);
1409 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1)if (!(parent_team->t.t_serialized > 1)) { __kmp_debug_assert
("parent_team->t.t_serialized > 1", "openmp/runtime/src/kmp_runtime.cpp"
, 1409); }
;
1410
1411 if (call_context == fork_context_gnu) {
1412 // AC: need to decrement t_serialized for enquiry functions to work
1413 // correctly, will restore at join time
1414 parent_team->t.t_serialized--;
1415 return TRUE(!0);
1416 }
1417
1418#if OMPD_SUPPORT1
1419 parent_team->t.t_pkfn = microtask;
1420#endif
1421
1422#if OMPT_SUPPORT1
1423 void *dummy;
1424 void **exit_frame_p;
1425 ompt_data_t *implicit_task_data;
1426 ompt_lw_taskteam_t lw_taskteam;
1427
1428 if (ompt_enabled.enabled) {
1429 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1430 &ompt_parallel_data, return_address);
1431 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1432
1433 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1434 // Don't use lw_taskteam after linking. Content was swapped.
1435
1436 /* OMPT implicit task begin */
1437 implicit_task_data = OMPT_CUR_TASK_DATA(master_th)(&(master_th->th.th_current_task->ompt_task_info.task_data
))
;
1438 if (ompt_enabled.ompt_callback_implicit_task) {
1439 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num = __kmp_tid_from_gtid(gtid);
1440 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1441 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
, implicit_task_data,
1442 1, OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, ompt_task_implicit);
1443 }
1444
1445 /* OMPT state */
1446 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1447 } else {
1448 exit_frame_p = &dummy;
1449 }
1450#endif
1451
1452 // AC: need to decrement t_serialized for enquiry functions to work
1453 // correctly, will restore at join time
1454 parent_team->t.t_serialized--;
1455
1456 {
1457 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel)((void)0);
1458 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK)((void)0);
1459 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1460#if OMPT_SUPPORT1
1461 ,
1462 exit_frame_p
1463#endif
1464 );
1465 }
1466
1467#if OMPT_SUPPORT1
1468 if (ompt_enabled.enabled) {
1469 *exit_frame_p = NULL__null;
1470 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->frame.exit_frame = ompt_data_none{0};
1471 if (ompt_enabled.ompt_callback_implicit_task) {
1472 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1473 ompt_scope_end, NULL__null, implicit_task_data, 1,
1474 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, ompt_task_implicit);
1475 }
1476 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
;
1477 __ompt_lw_taskteam_unlink(master_th);
1478 if (ompt_enabled.ompt_callback_parallel_end) {
1479 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)ompt_callback_parallel_end_callback(
1480 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th)(&(master_th->th.th_current_task->ompt_task_info.task_data
))
,
1481 OMPT_INVOKER(call_context)((call_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
| ompt_parallel_team, return_address);
1482 }
1483 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1484 }
1485#endif
1486 return TRUE(!0);
1487 }
1488
1489 parent_team->t.t_pkfn = microtask;
1490 parent_team->t.t_invoke = invoker;
1491 KMP_ATOMIC_INC(&root->r.r_in_parallel)(&root->r.r_in_parallel)->fetch_add(1, std::memory_order_acq_rel
)
;
1492 parent_team->t.t_active_level++;
1493 parent_team->t.t_level++;
1494 parent_team->t.t_def_allocator = master_th->th.th_def_allocator; // save
1495
1496 // If the threads allocated to the team are less than the thread limit, update
1497 // the thread limit here. th_teams_size.nth is specific to this team nested
1498 // in a teams construct, the team is fully created, and we're about to do
1499 // the actual fork. Best to do this here so that the subsequent uses below
1500 // and in the join have the correct value.
1501 master_th->th.th_teams_size.nth = parent_team->t.t_nproc;
1502
1503#if OMPT_SUPPORT1
1504 if (ompt_enabled.enabled) {
1505 ompt_lw_taskteam_t lw_taskteam;
1506 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, &ompt_parallel_data,
1507 return_address);
1508 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1, true);
1509 }
1510#endif
1511
1512 /* Change number of threads in the team if requested */
1513 if (master_set_numthreads) { // The parallel has num_threads clause
1514 if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1515 // AC: only can reduce number of threads dynamically, can't increase
1516 kmp_info_t **other_threads = parent_team->t.t_threads;
1517 // NOTE: if using distributed barrier, we need to run this code block
1518 // even when the team size appears not to have changed from the max.
1519 int old_proc = master_th->th.th_teams_size.nth;
1520 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1521 __kmp_resize_dist_barrier(parent_team, old_proc, master_set_numthreads);
1522 __kmp_add_threads_to_team(parent_team, master_set_numthreads);
1523 }
1524 parent_team->t.t_nproc = master_set_numthreads;
1525 for (i = 0; i < master_set_numthreads; ++i) {
1526 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1527 }
1528 }
1529 // Keep extra threads hot in the team for possible next parallels
1530 master_th->th.th_set_nproc = 0;
1531 }
1532
1533#if USE_DEBUGGER0
1534 if (__kmp_debugging) { // Let debugger override number of threads.
1535 int nth = __kmp_omp_num_threads(loc);
1536 if (nth > 0) { // 0 means debugger doesn't want to change num threads
1537 master_set_numthreads = nth;
1538 }
1539 }
1540#endif
1541
1542 // Figure out the proc_bind policy for the nested parallel within teams
1543 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1544 // proc_bind_default means don't update
1545 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1546 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1547 proc_bind = proc_bind_false;
1548 } else {
1549 // No proc_bind clause specified; use current proc-bind-var
1550 if (proc_bind == proc_bind_default) {
1551 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1552 }
1553 /* else: The proc_bind policy was specified explicitly on parallel clause.
1554 This overrides proc-bind-var for this parallel region, but does not
1555 change proc-bind-var. */
1556 // Figure the value of proc-bind-var for the child threads.
1557 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1558 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1559 master_th->th.th_current_task->td_icvs.proc_bind)) {
1560 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1561 }
1562 }
1563 KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind)if ((parent_team->t.t_proc_bind) != (proc_bind)) (parent_team
->t.t_proc_bind) = (proc_bind)
;
1564 // Need to change the bind-var ICV to correct value for each implicit task
1565 if (proc_bind_icv != proc_bind_default &&
1566 master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1567 kmp_info_t **other_threads = parent_team->t.t_threads;
1568 for (i = 0; i < master_th->th.th_team_nproc; ++i) {
1569 other_threads[i]->th.th_current_task->td_icvs.proc_bind = proc_bind_icv;
1570 }
1571 }
1572 // Reset for next parallel region
1573 master_th->th.th_set_proc_bind = proc_bind_default;
1574
1575#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
1576 if (((__itt_frame_submit_v3_ptr__kmp_itt_frame_submit_v3_ptr__3_0 && __itt_get_timestamp_ptr__kmp_itt_get_timestamp_ptr__3_0) ||
1577 KMP_ITT_DEBUG0) &&
1578 __kmp_forkjoin_frames_mode == 3 &&
1579 parent_team->t.t_active_level == 1 // only report frames at level 1
1580 && master_th->th.th_teams_size.nteams == 1) {
1581 kmp_uint64 tmp_time = __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0();
1582 master_th->th.th_frame_time = tmp_time;
1583 parent_team->t.t_region_time = tmp_time;
1584 }
1585 if (__itt_stack_caller_create_ptr__kmp_itt_stack_caller_create_ptr__3_0) {
1586 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL)if (!(parent_team->t.t_stack_id == __null)) { __kmp_debug_assert
("parent_team->t.t_stack_id == __null", "openmp/runtime/src/kmp_runtime.cpp"
, 1586); }
;
1587 // create new stack stitching id before entering fork barrier
1588 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1589 }
1590#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1591#if KMP_AFFINITY_SUPPORTED1
1592 __kmp_partition_places(parent_team);
1593#endif
1594
1595 KF_TRACE(10, ("__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid
); }
1596 "master_th=%p, gtid=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid
); }
1597 root, parent_team, master_th, gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid
); }
;
1598 __kmp_internal_fork(loc, gtid, parent_team);
1599 KF_TRACE(10, ("__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid
); }
1600 "master_th=%p, gtid=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid
); }
1601 root, parent_team, master_th, gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid
); }
;
1602
1603 if (call_context == fork_context_gnu)
1604 return TRUE(!0);
1605
1606 /* Invoke microtask for PRIMARY thread */
1607 KA_TRACE(20, ("__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n"
, gtid, parent_team->t.t_id, parent_team->t.t_pkfn); }
1608 parent_team->t.t_id, parent_team->t.t_pkfn))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n"
, gtid, parent_team->t.t_id, parent_team->t.t_pkfn); }
;
1609
1610 if (!parent_team->t.t_invoke(gtid)) {
1611 KMP_ASSERT2(0, "cannot invoke microtask for PRIMARY thread")if (!(0)) { __kmp_debug_assert(("cannot invoke microtask for PRIMARY thread"
), "openmp/runtime/src/kmp_runtime.cpp", 1611); }
;
1612 }
1613 KA_TRACE(20, ("__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n"
, gtid, parent_team->t.t_id, parent_team->t.t_pkfn); }
1614 parent_team->t.t_id, parent_team->t.t_pkfn))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n"
, gtid, parent_team->t.t_id, parent_team->t.t_pkfn); }
;
1615 KMP_MB(); /* Flush all pending memory write invalidates. */
1616
1617 KA_TRACE(20, ("__kmp_fork_in_teams: parallel exit T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_in_teams: parallel exit T#%d\n"
, gtid); }
;
1618
1619 return TRUE(!0);
1620}
1621
1622// Create a serialized parallel region
1623static inline int
1624__kmp_serial_fork_call(ident_t *loc, int gtid, enum fork_context_e call_context,
1625 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1626 kmp_info_t *master_th, kmp_team_t *parent_team,
1627#if OMPT_SUPPORT1
1628 ompt_data_t *ompt_parallel_data, void **return_address,
1629 ompt_data_t **parent_task_data,
1630#endif
1631 kmp_va_list ap) {
1632 kmp_team_t *team;
1633 int i;
1634 void **argv;
1635
1636/* josh todo: hypothetical question: what do we do for OS X*? */
1637#if KMP_OS_LINUX1 && \
1638 (KMP_ARCH_X860 || KMP_ARCH_X86_641 || KMP_ARCH_ARM || KMP_ARCH_AARCH640)
1639 void *args[argc];
1640#else
1641 void **args = (void **)KMP_ALLOCA(argc * sizeof(void *))__builtin_alloca (argc * sizeof(void *));
1642#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || \
1643 KMP_ARCH_AARCH64) */
1644
1645 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_serial_fork_call: T#%d serializing parallel region\n"
, gtid); }
26
Assuming 'kmp_a_debug' is < 20
27
Taking false branch
1646 20, ("__kmp_serial_fork_call: T#%d serializing parallel region\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_serial_fork_call: T#%d serializing parallel region\n"
, gtid); }
;
1647
1648 __kmpc_serialized_parallel(loc, gtid);
1649
1650#if OMPD_SUPPORT1
1651 master_th->th.th_serial_team->t.t_pkfn = microtask;
1652#endif
1653
1654 if (call_context == fork_context_intel) {
28
Assuming 'call_context' is equal to fork_context_intel
29
Taking true branch
1655 /* TODO this sucks, use the compiler itself to pass args! :) */
1656 master_th->th.th_serial_team->t.t_ident = loc;
1657 if (!ap
29.1
'ap' is non-null
) {
30
Taking false branch
1658 // revert change made in __kmpc_serialized_parallel()
1659 master_th->th.th_serial_team->t.t_level--;
1660// Get args from parent team for teams construct
1661
1662#if OMPT_SUPPORT1
1663 void *dummy;
1664 void **exit_frame_p;
1665 ompt_task_info_t *task_info;
1666 ompt_lw_taskteam_t lw_taskteam;
1667
1668 if (ompt_enabled.enabled) {
1669 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1670 ompt_parallel_data, *return_address);
1671
1672 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1673 // don't use lw_taskteam after linking. content was swaped
1674 task_info = OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info));
1675 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1676 if (ompt_enabled.ompt_callback_implicit_task) {
1677 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num = __kmp_tid_from_gtid(gtid);
1678 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1679 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
,
1680 &(task_info->task_data), 1,
1681 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, ompt_task_implicit);
1682 }
1683
1684 /* OMPT state */
1685 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1686 } else {
1687 exit_frame_p = &dummy;
1688 }
1689#endif
1690
1691 {
1692 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel)((void)0);
1693 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK)((void)0);
1694 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1695#if OMPT_SUPPORT1
1696 ,
1697 exit_frame_p
1698#endif
1699 );
1700 }
1701
1702#if OMPT_SUPPORT1
1703 if (ompt_enabled.enabled) {
1704 *exit_frame_p = NULL__null;
1705 if (ompt_enabled.ompt_callback_implicit_task) {
1706 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1707 ompt_scope_end, NULL__null, &(task_info->task_data), 1,
1708 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, ompt_task_implicit);
1709 }
1710 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
;
1711 __ompt_lw_taskteam_unlink(master_th);
1712 if (ompt_enabled.ompt_callback_parallel_end) {
1713 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)ompt_callback_parallel_end_callback(
1714 ompt_parallel_data, *parent_task_data,
1715 OMPT_INVOKER(call_context)((call_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
| ompt_parallel_team, *return_address);
1716 }
1717 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1718 }
1719#endif
1720 } else if (microtask == (microtask_t)__kmp_teams_master) {
31
Assuming 'microtask' is equal to __kmp_teams_master
32
Taking true branch
1721 KMP_DEBUG_ASSERT(master_th->th.th_team == master_th->th.th_serial_team)if (!(master_th->th.th_team == master_th->th.th_serial_team
)) { __kmp_debug_assert("master_th->th.th_team == master_th->th.th_serial_team"
, "openmp/runtime/src/kmp_runtime.cpp", 1721); }
;
33
Assuming field 'th_team' is equal to field 'th_serial_team'
34
Taking false branch
1722 team = master_th->th.th_team;
1723 // team->t.t_pkfn = microtask;
1724 team->t.t_invoke = invoker;
1725 __kmp_alloc_argv_entries(argc, team, TRUE(!0));
1726 team->t.t_argc = argc;
1727 argv = (void **)team->t.t_argv;
1728 if (ap
34.1
'ap' is non-null
) {
35
Taking true branch
1729 for (i = argc - 1; i
35.1
'i' is >= 0
>= 0
; --i)
36
Loop condition is true. Entering loop body
37
Assuming 'i' is < 0
38
Loop condition is false. Execution continues on line 1738
1730 *argv++ = va_arg(kmp_va_deref(ap), void *)__builtin_va_arg((*(ap)), void *);
1731 } else {
1732 for (i = 0; i < argc; ++i)
1733 // Get args from parent team for teams construct
1734 argv[i] = parent_team->t.t_argv[i];
1735 }
1736 // AC: revert change made in __kmpc_serialized_parallel()
1737 // because initial code in teams should have level=0
1738 team->t.t_level--;
1739 // AC: call special invoker for outer "parallel" of teams construct
1740 invoker(gtid);
1741#if OMPT_SUPPORT1
1742 if (ompt_enabled.enabled) {
39
Assuming field 'enabled' is not equal to 0
40
Taking true branch
1743 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info));
1744 if (ompt_enabled.ompt_callback_implicit_task) {
41
Assuming field 'ompt_callback_implicit_task' is 0
42
Taking false branch
1745 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1746 ompt_scope_end, NULL__null, &(task_info->task_data), 0,
1747 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, ompt_task_initial);
1748 }
1749 if (ompt_enabled.ompt_callback_parallel_end) {
43
Assuming field 'ompt_callback_parallel_end' is not equal to 0
44
Taking true branch
1750 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)ompt_callback_parallel_end_callback(
46
2nd function call argument is an uninitialized value
1751 ompt_parallel_data, *parent_task_data,
1752 OMPT_INVOKER(call_context)((call_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
| ompt_parallel_league,
45
'?' condition is false
1753 *return_address);
1754 }
1755 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1756 }
1757#endif
1758 } else {
1759 argv = args;
1760 for (i = argc - 1; i >= 0; --i)
1761 *argv++ = va_arg(kmp_va_deref(ap), void *)__builtin_va_arg((*(ap)), void *);
1762 KMP_MB();
1763
1764#if OMPT_SUPPORT1
1765 void *dummy;
1766 void **exit_frame_p;
1767 ompt_task_info_t *task_info;
1768 ompt_lw_taskteam_t lw_taskteam;
1769 ompt_data_t *implicit_task_data;
1770
1771 if (ompt_enabled.enabled) {
1772 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1773 ompt_parallel_data, *return_address);
1774 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1775 // don't use lw_taskteam after linking. content was swaped
1776 task_info = OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info));
1777 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1778
1779 /* OMPT implicit task begin */
1780 implicit_task_data = OMPT_CUR_TASK_DATA(master_th)(&(master_th->th.th_current_task->ompt_task_info.task_data
))
;
1781 if (ompt_enabled.ompt_callback_implicit_task) {
1782 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1783 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
,
1784 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1785 ompt_task_implicit);
1786 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num = __kmp_tid_from_gtid(gtid);
1787 }
1788
1789 /* OMPT state */
1790 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1791 } else {
1792 exit_frame_p = &dummy;
1793 }
1794#endif
1795
1796 {
1797 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel)((void)0);
1798 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK)((void)0);
1799 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1800#if OMPT_SUPPORT1
1801 ,
1802 exit_frame_p
1803#endif
1804 );
1805 }
1806
1807#if OMPT_SUPPORT1
1808 if (ompt_enabled.enabled) {
1809 *exit_frame_p = NULL__null;
1810 if (ompt_enabled.ompt_callback_implicit_task) {
1811 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1812 ompt_scope_end, NULL__null, &(task_info->task_data), 1,
1813 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, ompt_task_implicit);
1814 }
1815
1816 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
;
1817 __ompt_lw_taskteam_unlink(master_th);
1818 if (ompt_enabled.ompt_callback_parallel_end) {
1819 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)ompt_callback_parallel_end_callback(
1820 ompt_parallel_data, *parent_task_data,
1821 OMPT_INVOKER(call_context)((call_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
| ompt_parallel_team, *return_address);
1822 }
1823 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1824 }
1825#endif
1826 }
1827 } else if (call_context == fork_context_gnu) {
1828#if OMPT_SUPPORT1
1829 if (ompt_enabled.enabled) {
1830 ompt_lw_taskteam_t lwt;
1831 __ompt_lw_taskteam_init(&lwt, master_th, gtid, ompt_parallel_data,
1832 *return_address);
1833
1834 lwt.ompt_task_info.frame.exit_frame = ompt_data_none{0};
1835 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1836 }
1837// don't use lw_taskteam after linking. content was swaped
1838#endif
1839
1840 // we were called from GNU native code
1841 KA_TRACE(20, ("__kmp_serial_fork_call: T#%d serial exit\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_serial_fork_call: T#%d serial exit\n"
, gtid); }
;
1842 return FALSE0;
1843 } else {
1844 KMP_ASSERT2(call_context < fork_context_last,if (!(call_context < fork_context_last)) { __kmp_debug_assert
(("__kmp_serial_fork_call: unknown fork_context parameter"), "openmp/runtime/src/kmp_runtime.cpp"
, 1845); }
1845 "__kmp_serial_fork_call: unknown fork_context parameter")if (!(call_context < fork_context_last)) { __kmp_debug_assert
(("__kmp_serial_fork_call: unknown fork_context parameter"), "openmp/runtime/src/kmp_runtime.cpp"
, 1845); }
;
1846 }
1847
1848 KA_TRACE(20, ("__kmp_serial_fork_call: T#%d serial exit\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_serial_fork_call: T#%d serial exit\n"
, gtid); }
;
1849 KMP_MB();
1850 return FALSE0;
1851}
1852
1853/* most of the work for a fork */
1854/* return true if we really went parallel, false if serialized */
1855int __kmp_fork_call(ident_t *loc, int gtid,
1856 enum fork_context_e call_context, // Intel, GNU, ...
1857 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1858 kmp_va_list ap) {
1859 void **argv;
1860 int i;
1861 int master_tid;
1862 int master_this_cons;
1863 kmp_team_t *team;
1864 kmp_team_t *parent_team;
1865 kmp_info_t *master_th;
1866 kmp_root_t *root;
1867 int nthreads;
1868 int master_active;
1869 int master_set_numthreads;
1870 int level;
1871 int active_level;
1872 int teams_level;
1873#if KMP_NESTED_HOT_TEAMS1
1874 kmp_hot_team_ptr_t **p_hot_teams;
1875#endif
1876 { // KMP_TIME_BLOCK
1877 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call)((void)0);
1878 KMP_COUNT_VALUE(OMP_PARALLEL_args, argc)((void)0);
1879
1880 KA_TRACE(20, ("__kmp_fork_call: enter T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: enter T#%d\n"
, gtid); }
;
1
Assuming 'kmp_a_debug' is < 20
1881 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL__null) {
2
Assuming '__kmp_stkpadding' is <= 0
1882 /* Some systems prefer the stack for the root thread(s) to start with */
1883 /* some gap from the parent stack to prevent false sharing. */
1884 void *dummy = KMP_ALLOCA(__kmp_stkpadding)__builtin_alloca (__kmp_stkpadding);
1885 /* These 2 lines below are so this does not get optimized out */
1886 if (__kmp_stkpadding > KMP_MAX_STKPADDING(2 * 1024 * 1024))
1887 __kmp_stkpadding += (short)((kmp_int64)dummy);
1888 }
1889
1890 /* initialize if needed */
1891 KMP_DEBUG_ASSERT(if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 1892); }
3
Assuming '__kmp_init_serial' is not equal to 0
4
Taking false branch
1892 __kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 1892); }
; // AC: potentially unsafe, not in sync with shutdown
1893 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
5
Assuming '__kmp_init_parallel' is not equal to 0
6
Taking false branch
1894 __kmp_parallel_initialize();
1895 __kmp_resume_if_soft_paused();
1896
1897 /* setup current data */
1898 // AC: potentially unsafe, not in sync with library shutdown,
1899 // __kmp_threads can be freed
1900 master_th = __kmp_threads[gtid];
1901
1902 parent_team = master_th->th.th_team;
1903 master_tid = master_th->th.th_info.ds.ds_tid;
1904 master_this_cons = master_th->th.th_local.this_construct;
1905 root = master_th->th.th_root;
1906 master_active = root->r.r_active;
1907 master_set_numthreads = master_th->th.th_set_nproc;
1908
1909#if OMPT_SUPPORT1
1910 ompt_data_t ompt_parallel_data = ompt_data_none{0};
1911 ompt_data_t *parent_task_data;
1912 ompt_frame_t *ompt_frame;
1913 void *return_address = NULL__null;
1914
1915 if (ompt_enabled.enabled) {
7
Assuming field 'enabled' is 0
8
Taking false branch
1916 __ompt_get_task_info_internal(0, NULL__null, &parent_task_data, &ompt_frame,
1917 NULL__null, NULL__null);
1918 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
1919 }
1920#endif
1921
1922 // Assign affinity to root thread if it hasn't happened yet
1923 __kmp_assign_root_init_mask();
1924
1925 // Nested level will be an index in the nested nthreads array
1926 level = parent_team->t.t_level;
1927 // used to launch non-serial teams even if nested is not allowed
1928 active_level = parent_team->t.t_active_level;
1929 // needed to check nesting inside the teams
1930 teams_level = master_th->th.th_teams_level;
1931#if KMP_NESTED_HOT_TEAMS1
1932 p_hot_teams = &master_th->th.th_hot_teams;
1933 if (*p_hot_teams == NULL__null && __kmp_hot_teams_max_level > 0) {
9
Assuming the condition is false
1934 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(___kmp_allocate((sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level
), "openmp/runtime/src/kmp_runtime.cpp", 1935)
1935 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level)___kmp_allocate((sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level
), "openmp/runtime/src/kmp_runtime.cpp", 1935)
;
1936 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1937 // it is either actual or not needed (when active_level > 0)
1938 (*p_hot_teams)[0].hot_team_nth = 1;
1939 }
1940#endif
1941
1942#if OMPT_SUPPORT1
1943 if (ompt_enabled.enabled) {
10
Assuming field 'enabled' is 0
11
Taking false branch
1944 if (ompt_enabled.ompt_callback_parallel_begin) {
1945 int team_size = master_set_numthreads
1946 ? master_set_numthreads
1947 : get__nproc_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.nproc)
;
1948 int flags = OMPT_INVOKER(call_context)((call_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
|
1949 ((microtask == (microtask_t)__kmp_teams_master)
1950 ? ompt_parallel_league
1951 : ompt_parallel_team);
1952 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)ompt_callback_parallel_begin_callback(
1953 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
1954 return_address);
1955 }
1956 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1957 }
1958#endif
1959
1960 master_th->th.th_ident = loc;
1961
1962 // Parallel closely nested in teams construct:
1963 if (__kmp_is_fork_in_teams(master_th, microtask, level, teams_level, ap)) {
12
Taking false branch
1964 return __kmp_fork_in_teams(loc, gtid, parent_team, argc, master_th, root,
1965 call_context, microtask, invoker,
1966 master_set_numthreads, level,
1967#if OMPT_SUPPORT1
1968 ompt_parallel_data, return_address,
1969#endif
1970 ap);
1971 } // End parallel closely nested in teams construct
1972
1973#if KMP_DEBUG1
1974 if (__kmp_tasking_mode != tskm_immediate_exec) {
13
Assuming '__kmp_tasking_mode' is equal to tskm_immediate_exec
14
Taking false branch
1975 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==if (!(master_th->th.th_task_team == parent_team->t.t_task_team
[master_th->th.th_task_state])) { __kmp_debug_assert("master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 1976); }
1976 parent_team->t.t_task_team[master_th->th.th_task_state])if (!(master_th->th.th_task_team == parent_team->t.t_task_team
[master_th->th.th_task_state])) { __kmp_debug_assert("master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 1976); }
;
1977 }
1978#endif
1979
1980 // Need this to happen before we determine the number of threads, not while
1981 // we are allocating the team
1982 //__kmp_push_current_task_to_thread(master_th, parent_team, 0);
1983
1984 // Determine the number of threads
1985 int enter_teams =
1986 __kmp_is_entering_teams(active_level, level, teams_level, ap);
1987 if ((!enter_teams
14.1
'enter_teams' is 0
&&
1988 (parent_team->t.t_active_level >=
15
Assuming field 't_active_level' is < field 'max_active_levels'
1989 master_th->th.th_current_task->td_icvs.max_active_levels)) ||
1990 (__kmp_library == library_serial)) {
16
Assuming '__kmp_library' is not equal to library_serial
1991 KC_TRACE(10, ("__kmp_fork_call: T#%d serializing team\n", gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: T#%d serializing team\n"
, gtid); }
;
1992 nthreads = 1;
1993 } else {
1994 nthreads = master_set_numthreads
17
Taking false branch
18
Assuming 'master_set_numthreads' is 0
19
'?' condition is false
1995 ? master_set_numthreads
1996 // TODO: get nproc directly from current task
1997 : get__nproc_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.nproc)
;
1998 // Check if we need to take forkjoin lock? (no need for serialized
1999 // parallel out of teams construct).
2000 if (nthreads > 1) {
20
Assuming 'nthreads' is <= 1
21
Taking false branch
2001 /* determine how many new threads we can use */
2002 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2003 /* AC: If we execute teams from parallel region (on host), then teams
2004 should be created but each can only have 1 thread if nesting is
2005 disabled. If teams called from serial region, then teams and their
2006 threads should be created regardless of the nesting setting. */
2007 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
2008 nthreads, enter_teams);
2009 if (nthreads == 1) {
2010 // Free lock for single thread execution here; for multi-thread
2011 // execution it will be freed later after team of threads created
2012 // and initialized
2013 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2014 }
2015 }
2016 }
2017 KMP_DEBUG_ASSERT(nthreads > 0)if (!(nthreads > 0)) { __kmp_debug_assert("nthreads > 0"
, "openmp/runtime/src/kmp_runtime.cpp", 2017); }
;
22
Assuming 'nthreads' is > 0
23
Taking false branch
2018
2019 // If we temporarily changed the set number of threads then restore it now
2020 master_th->th.th_set_nproc = 0;
2021
2022 if (nthreads
23.1
'nthreads' is equal to 1
== 1) {
24
Taking true branch
2023 return __kmp_serial_fork_call(loc, gtid, call_context, argc, microtask,
25
Calling '__kmp_serial_fork_call'
2024 invoker, master_th, parent_team,
2025#if OMPT_SUPPORT1
2026 &ompt_parallel_data, &return_address,
2027 &parent_task_data,
2028#endif
2029 ap);
2030 } // if (nthreads == 1)
2031
2032 // GEH: only modify the executing flag in the case when not serialized
2033 // serialized case is handled in kmpc_serialized_parallel
2034 KF_TRACE(10, ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
"curtask=%p, curtask_max_aclevel=%d\n", parent_team->t.t_active_level
, master_th, master_th->th.th_current_task, master_th->
th.th_current_task->td_icvs.max_active_levels); }
2035 "curtask=%p, curtask_max_aclevel=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
"curtask=%p, curtask_max_aclevel=%d\n", parent_team->t.t_active_level
, master_th, master_th->th.th_current_task, master_th->
th.th_current_task->td_icvs.max_active_levels); }
2036 parent_team->t.t_active_level, master_th,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
"curtask=%p, curtask_max_aclevel=%d\n", parent_team->t.t_active_level
, master_th, master_th->th.th_current_task, master_th->
th.th_current_task->td_icvs.max_active_levels); }
2037 master_th->th.th_current_task,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
"curtask=%p, curtask_max_aclevel=%d\n", parent_team->t.t_active_level
, master_th, master_th->th.th_current_task, master_th->
th.th_current_task->td_icvs.max_active_levels); }
2038 master_th->th.th_current_task->td_icvs.max_active_levels))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
"curtask=%p, curtask_max_aclevel=%d\n", parent_team->t.t_active_level
, master_th, master_th->th.th_current_task, master_th->
th.th_current_task->td_icvs.max_active_levels); }
;
2039 // TODO: GEH - cannot do this assertion because root thread not set up as
2040 // executing
2041 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );
2042 master_th->th.th_current_task->td_flags.executing = 0;
2043
2044 if (!master_th->th.th_teams_microtask || level > teams_level) {
2045 /* Increment our nested depth level */
2046 KMP_ATOMIC_INC(&root->r.r_in_parallel)(&root->r.r_in_parallel)->fetch_add(1, std::memory_order_acq_rel
)
;
2047 }
2048
2049 // See if we need to make a copy of the ICVs.
2050 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
2051 if ((level + 1 < __kmp_nested_nth.used) &&
2052 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
2053 nthreads_icv = __kmp_nested_nth.nth[level + 1];
2054 } else {
2055 nthreads_icv = 0; // don't update
2056 }
2057
2058 // Figure out the proc_bind_policy for the new team.
2059 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2060 // proc_bind_default means don't update
2061 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
2062 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2063 proc_bind = proc_bind_false;
2064 } else {
2065 // No proc_bind clause specified; use current proc-bind-var for this
2066 // parallel region
2067 if (proc_bind == proc_bind_default) {
2068 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2069 }
2070 // Have teams construct take proc_bind value from KMP_TEAMS_PROC_BIND
2071 if (master_th->th.th_teams_microtask &&
2072 microtask == (microtask_t)__kmp_teams_master) {
2073 proc_bind = __kmp_teams_proc_bind;
2074 }
2075 /* else: The proc_bind policy was specified explicitly on parallel clause.
2076 This overrides proc-bind-var for this parallel region, but does not
2077 change proc-bind-var. */
2078 // Figure the value of proc-bind-var for the child threads.
2079 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2080 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2081 master_th->th.th_current_task->td_icvs.proc_bind)) {
2082 // Do not modify the proc bind icv for the two teams construct forks
2083 // They just let the proc bind icv pass through
2084 if (!master_th->th.th_teams_microtask ||
2085 !(microtask == (microtask_t)__kmp_teams_master || ap == NULL__null))
2086 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2087 }
2088 }
2089
2090 // Reset for next parallel region
2091 master_th->th.th_set_proc_bind = proc_bind_default;
2092
2093 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
2094 kmp_internal_control_t new_icvs;
2095 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2096 new_icvs.next = NULL__null;
2097 if (nthreads_icv > 0) {
2098 new_icvs.nproc = nthreads_icv;
2099 }
2100 if (proc_bind_icv != proc_bind_default) {
2101 new_icvs.proc_bind = proc_bind_icv;
2102 }
2103
2104 /* allocate a new parallel team */
2105 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: before __kmp_allocate_team\n"
); }
;
2106 team = __kmp_allocate_team(root, nthreads, nthreads,
2107#if OMPT_SUPPORT1
2108 ompt_parallel_data,
2109#endif
2110 proc_bind, &new_icvs,
2111 argc USE_NESTED_HOT_ARG(master_th), master_th);
2112 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2113 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
2114 } else {
2115 /* allocate a new parallel team */
2116 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: before __kmp_allocate_team\n"
); }
;
2117 team = __kmp_allocate_team(root, nthreads, nthreads,
2118#if OMPT_SUPPORT1
2119 ompt_parallel_data,
2120#endif
2121 proc_bind,
2122 &master_th->th.th_current_task->td_icvs,
2123 argc USE_NESTED_HOT_ARG(master_th), master_th);
2124 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2125 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
2126 &master_th->th.th_current_task->td_icvs);
2127 }
2128 KF_TRACE(if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n"
, team); }
2129 10, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n"
, team); }
;
2130
2131 /* setup the new team */
2132 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid)if ((team->t.t_master_tid) != (master_tid)) (team->t.t_master_tid
) = (master_tid)
;
2133 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons)if ((team->t.t_master_this_cons) != (master_this_cons)) (team
->t.t_master_this_cons) = (master_this_cons)
;
2134 KMP_CHECK_UPDATE(team->t.t_ident, loc)if ((team->t.t_ident) != (loc)) (team->t.t_ident) = (loc
)
;
2135 KMP_CHECK_UPDATE(team->t.t_parent, parent_team)if ((team->t.t_parent) != (parent_team)) (team->t.t_parent
) = (parent_team)
;
2136 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask)if ((team->t.t_pkfn) != (microtask)) (((team->t.t_pkfn)
)) = (((microtask)))
;
2137#if OMPT_SUPPORT1
2138 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,if ((team->t.ompt_team_info.master_return_address) != (return_address
)) (((team->t.ompt_team_info.master_return_address))) = ((
(return_address)))
2139 return_address)if ((team->t.ompt_team_info.master_return_address) != (return_address
)) (((team->t.ompt_team_info.master_return_address))) = ((
(return_address)))
;
2140#endif
2141 KMP_CHECK_UPDATE(team->t.t_invoke, invoker)if ((team->t.t_invoke) != (invoker)) (team->t.t_invoke)
= (invoker)
; // TODO move to root, maybe
2142 // TODO: parent_team->t.t_level == INT_MAX ???
2143 if (!master_th->th.th_teams_microtask || level > teams_level) {
2144 int new_level = parent_team->t.t_level + 1;
2145 KMP_CHECK_UPDATE(team->t.t_level, new_level)if ((team->t.t_level) != (new_level)) (team->t.t_level)
= (new_level)
;
2146 new_level = parent_team->t.t_active_level + 1;
2147 KMP_CHECK_UPDATE(team->t.t_active_level, new_level)if ((team->t.t_active_level) != (new_level)) (team->t.t_active_level
) = (new_level)
;
2148 } else {
2149 // AC: Do not increase parallel level at start of the teams construct
2150 int new_level = parent_team->t.t_level;
2151 KMP_CHECK_UPDATE(team->t.t_level, new_level)if ((team->t.t_level) != (new_level)) (team->t.t_level)
= (new_level)
;
2152 new_level = parent_team->t.t_active_level;
2153 KMP_CHECK_UPDATE(team->t.t_active_level, new_level)if ((team->t.t_active_level) != (new_level)) (team->t.t_active_level
) = (new_level)
;
2154 }
2155 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.sched)
;
2156 // set primary thread's schedule as new run-time schedule
2157 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched)if ((team->t.t_sched.sched) != (new_sched.sched)) (team->
t.t_sched.sched) = (new_sched.sched)
;
2158
2159 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq)if ((team->t.t_cancel_request) != (cancel_noreq)) (team->
t.t_cancel_request) = (cancel_noreq)
;
2160 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator)if ((team->t.t_def_allocator) != (master_th->th.th_def_allocator
)) (team->t.t_def_allocator) = (master_th->th.th_def_allocator
)
;
2161
2162 // Update the floating point rounding in the team if required.
2163 propagateFPControl(team);
2164#if OMPD_SUPPORT1
2165 if (ompd_state & OMPD_ENABLE_BP0x1)
2166 ompd_bp_parallel_begin();
2167#endif
2168
2169 if (__kmp_tasking_mode != tskm_immediate_exec) {
2170 // Set primary thread's task team to team's task team. Unless this is hot
2171 // team, it should be NULL.
2172 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==if (!(master_th->th.th_task_team == parent_team->t.t_task_team
[master_th->th.th_task_state])) { __kmp_debug_assert("master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 2173); }
2173 parent_team->t.t_task_team[master_th->th.th_task_state])if (!(master_th->th.th_task_team == parent_team->t.t_task_team
[master_th->th.th_task_state])) { __kmp_debug_assert("master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 2173); }
;
2174 KA_TRACE(20, ("__kmp_fork_call: Primary T#%d pushing task_team %p / team "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: Primary T#%d pushing task_team %p / team "
"%p, new task_team %p / team %p\n", __kmp_gtid_from_thread(master_th
), master_th->th.th_task_team, parent_team, team->t.t_task_team
[master_th->th.th_task_state], team); }
2175 "%p, new task_team %p / team %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: Primary T#%d pushing task_team %p / team "
"%p, new task_team %p / team %p\n", __kmp_gtid_from_thread(master_th
), master_th->th.th_task_team, parent_team, team->t.t_task_team
[master_th->th.th_task_state], team); }
2176 __kmp_gtid_from_thread(master_th),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: Primary T#%d pushing task_team %p / team "
"%p, new task_team %p / team %p\n", __kmp_gtid_from_thread(master_th
), master_th->th.th_task_team, parent_team, team->t.t_task_team
[master_th->th.th_task_state], team); }
2177 master_th->th.th_task_team, parent_team,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: Primary T#%d pushing task_team %p / team "
"%p, new task_team %p / team %p\n", __kmp_gtid_from_thread(master_th
), master_th->th.th_task_team, parent_team, team->t.t_task_team
[master_th->th.th_task_state], team); }
2178 team->t.t_task_team[master_th->th.th_task_state], team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: Primary T#%d pushing task_team %p / team "
"%p, new task_team %p / team %p\n", __kmp_gtid_from_thread(master_th
), master_th->th.th_task_team, parent_team, team->t.t_task_team
[master_th->th.th_task_state], team); }
;
2179
2180 if (active_level || master_th->th.th_task_team) {
2181 // Take a memo of primary thread's task_state
2182 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack)if (!(master_th->th.th_task_state_memo_stack)) { __kmp_debug_assert
("master_th->th.th_task_state_memo_stack", "openmp/runtime/src/kmp_runtime.cpp"
, 2182); }
;
2183 if (master_th->th.th_task_state_top >=
2184 master_th->th.th_task_state_stack_sz) { // increase size
2185 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2186 kmp_uint8 *old_stack, *new_stack;
2187 kmp_uint32 i;
2188 new_stack = (kmp_uint8 *)__kmp_allocate(new_size)___kmp_allocate((new_size), "openmp/runtime/src/kmp_runtime.cpp"
, 2188)
;
2189 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2190 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2191 }
2192 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2193 ++i) { // zero-init rest of stack
2194 new_stack[i] = 0;
2195 }
2196 old_stack = master_th->th.th_task_state_memo_stack;
2197 master_th->th.th_task_state_memo_stack = new_stack;
2198 master_th->th.th_task_state_stack_sz = new_size;
2199 __kmp_free(old_stack)___kmp_free((old_stack), "openmp/runtime/src/kmp_runtime.cpp"
, 2199)
;
2200 }
2201 // Store primary thread's task_state on stack
2202 master_th->th
2203 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2204 master_th->th.th_task_state;
2205 master_th->th.th_task_state_top++;
2206#if KMP_NESTED_HOT_TEAMS1
2207 if (master_th->th.th_hot_teams &&
2208 active_level < __kmp_hot_teams_max_level &&
2209 team == master_th->th.th_hot_teams[active_level].hot_team) {
2210 // Restore primary thread's nested state if nested hot team
2211 master_th->th.th_task_state =
2212 master_th->th
2213 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2214 } else {
2215#endif
2216 master_th->th.th_task_state = 0;
2217#if KMP_NESTED_HOT_TEAMS1
2218 }
2219#endif
2220 }
2221#if !KMP_NESTED_HOT_TEAMS1
2222 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||if (!((master_th->th.th_task_team == __null) || (team == root
->r.r_hot_team))) { __kmp_debug_assert("(master_th->th.th_task_team == __null) || (team == root->r.r_hot_team)"
, "openmp/runtime/src/kmp_runtime.cpp", 2223); }
2223 (team == root->r.r_hot_team))if (!((master_th->th.th_task_team == __null) || (team == root
->r.r_hot_team))) { __kmp_debug_assert("(master_th->th.th_task_team == __null) || (team == root->r.r_hot_team)"
, "openmp/runtime/src/kmp_runtime.cpp", 2223); }
;
2224#endif
2225 }
2226
2227 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n"
, gtid, parent_team->t.t_id, team->t.t_master_tid, team
->t.t_id, team->t.t_nproc); }
2228 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n"
, gtid, parent_team->t.t_id, team->t.t_master_tid, team
->t.t_id, team->t.t_nproc); }
2229 ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n"
, gtid, parent_team->t.t_id, team->t.t_master_tid, team
->t.t_id, team->t.t_nproc); }
2230 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n"
, gtid, parent_team->t.t_id, team->t.t_master_tid, team
->t.t_id, team->t.t_nproc); }
2231 team->t.t_nproc))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n"
, gtid, parent_team->t.t_id, team->t.t_master_tid, team
->t.t_id, team->t.t_nproc); }
;
2232 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||if (!(team != root->r.r_hot_team || (team->t.t_master_tid
== 0 && (team->t.t_parent == root->r.r_root_team
|| team->t.t_parent->t.t_serialized)))) { __kmp_debug_assert
("team != root->r.r_hot_team || (team->t.t_master_tid == 0 && (team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized))"
, "openmp/runtime/src/kmp_runtime.cpp", 2235); }
2233 (team->t.t_master_tid == 0 &&if (!(team != root->r.r_hot_team || (team->t.t_master_tid
== 0 && (team->t.t_parent == root->r.r_root_team
|| team->t.t_parent->t.t_serialized)))) { __kmp_debug_assert
("team != root->r.r_hot_team || (team->t.t_master_tid == 0 && (team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized))"
, "openmp/runtime/src/kmp_runtime.cpp", 2235); }
2234 (team->t.t_parent == root->r.r_root_team ||if (!(team != root->r.r_hot_team || (team->t.t_master_tid
== 0 && (team->t.t_parent == root->r.r_root_team
|| team->t.t_parent->t.t_serialized)))) { __kmp_debug_assert
("team != root->r.r_hot_team || (team->t.t_master_tid == 0 && (team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized))"
, "openmp/runtime/src/kmp_runtime.cpp", 2235); }
2235 team->t.t_parent->t.t_serialized)))if (!(team != root->r.r_hot_team || (team->t.t_master_tid
== 0 && (team->t.t_parent == root->r.r_root_team
|| team->t.t_parent->t.t_serialized)))) { __kmp_debug_assert
("team != root->r.r_hot_team || (team->t.t_master_tid == 0 && (team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized))"
, "openmp/runtime/src/kmp_runtime.cpp", 2235); }
;
2236 KMP_MB();
2237
2238 /* now, setup the arguments */
2239 argv = (void **)team->t.t_argv;
2240 if (ap) {
2241 for (i = argc - 1; i >= 0; --i) {
2242 void *new_argv = va_arg(kmp_va_deref(ap), void *)__builtin_va_arg((*(ap)), void *);
2243 KMP_CHECK_UPDATE(*argv, new_argv)if ((*argv) != (new_argv)) (*argv) = (new_argv);
2244 argv++;
2245 }
2246 } else {
2247 for (i = 0; i < argc; ++i) {
2248 // Get args from parent team for teams construct
2249 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i])if ((argv[i]) != (team->t.t_parent->t.t_argv[i])) (argv
[i]) = (team->t.t_parent->t.t_argv[i])
;
2250 }
2251 }
2252
2253 /* now actually fork the threads */
2254 KMP_CHECK_UPDATE(team->t.t_master_active, master_active)if ((team->t.t_master_active) != (master_active)) (team->
t.t_master_active) = (master_active)
;
2255 if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
2256 root->r.r_active = TRUE(!0);
2257
2258 __kmp_fork_team_threads(root, team, master_th, gtid, !ap);
2259 __kmp_setup_icv_copy(team, nthreads,
2260 &master_th->th.th_current_task->td_icvs, loc);
2261
2262#if OMPT_SUPPORT1
2263 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2264#endif
2265
2266 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2267
2268#if USE_ITT_BUILD1
2269 if (team->t.t_active_level == 1 // only report frames at level 1
2270 && !master_th->th.th_teams_microtask) { // not in teams construct
2271#if USE_ITT_NOTIFY1
2272 if ((__itt_frame_submit_v3_ptr__kmp_itt_frame_submit_v3_ptr__3_0 || KMP_ITT_DEBUG0) &&
2273 (__kmp_forkjoin_frames_mode == 3 ||
2274 __kmp_forkjoin_frames_mode == 1)) {
2275 kmp_uint64 tmp_time = 0;
2276 if (__itt_get_timestamp_ptr__kmp_itt_get_timestamp_ptr__3_0)
2277 tmp_time = __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0();
2278 // Internal fork - report frame begin
2279 master_th->th.th_frame_time = tmp_time;
2280 if (__kmp_forkjoin_frames_mode == 3)
2281 team->t.t_region_time = tmp_time;
2282 } else
2283// only one notification scheme (either "submit" or "forking/joined", not both)
2284#endif /* USE_ITT_NOTIFY */
2285 if ((__itt_frame_begin_v3_ptr__kmp_itt_frame_begin_v3_ptr__3_0 || KMP_ITT_DEBUG0) &&
2286 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2287 // Mark start of "parallel" region for Intel(R) VTune(TM) analyzer.
2288 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2289 }
2290 }
2291#endif /* USE_ITT_BUILD */
2292
2293 /* now go on and do the work */
2294 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team)if (!(team == __kmp_threads[gtid]->th.th_team)) { __kmp_debug_assert
("team == __kmp_threads[gtid]->th.th_team", "openmp/runtime/src/kmp_runtime.cpp"
, 2294); }
;
2295 KMP_MB();
2296 KF_TRACE(10,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n"
, root, team, master_th, gtid); }
2297 ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n"
, root, team, master_th, gtid); }
2298 root, team, master_th, gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n"
, root, team, master_th, gtid); }
;
2299
2300#if USE_ITT_BUILD1
2301 if (__itt_stack_caller_create_ptr__kmp_itt_stack_caller_create_ptr__3_0) {
2302 // create new stack stitching id before entering fork barrier
2303 if (!enter_teams) {
2304 KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL)if (!(team->t.t_stack_id == __null)) { __kmp_debug_assert(
"team->t.t_stack_id == __null", "openmp/runtime/src/kmp_runtime.cpp"
, 2304); }
;
2305 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2306 } else if (parent_team->t.t_serialized) {
2307 // keep stack stitching id in the serialized parent_team;
2308 // current team will be used for parallel inside the teams;
2309 // if parent_team is active, then it already keeps stack stitching id
2310 // for the league of teams
2311 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL)if (!(parent_team->t.t_stack_id == __null)) { __kmp_debug_assert
("parent_team->t.t_stack_id == __null", "openmp/runtime/src/kmp_runtime.cpp"
, 2311); }
;
2312 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
2313 }
2314 }
2315#endif /* USE_ITT_BUILD */
2316
2317 // AC: skip __kmp_internal_fork at teams construct, let only primary
2318 // threads execute
2319 if (ap) {
2320 __kmp_internal_fork(loc, gtid, team);
2321 KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_internal_fork : after : root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, team, master_th, gtid); }
2322 "master_th=%p, gtid=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_internal_fork : after : root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, team, master_th, gtid); }
2323 root, team, master_th, gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_internal_fork : after : root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, team, master_th, gtid); }
;
2324 }
2325
2326 if (call_context == fork_context_gnu) {
2327 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: parallel exit T#%d\n"
, gtid); }
;
2328 return TRUE(!0);
2329 }
2330
2331 /* Invoke microtask for PRIMARY thread */
2332 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n"
, gtid, team->t.t_id, team->t.t_pkfn); }
2333 team->t.t_id, team->t.t_pkfn))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n"
, gtid, team->t.t_id, team->t.t_pkfn); }
;
2334 } // END of timer KMP_fork_call block
2335
2336#if KMP_STATS_ENABLED0
2337 // If beginning a teams construct, then change thread state
2338 stats_state_e previous_state = KMP_GET_THREAD_STATE()((void)0);
2339 if (!ap) {
2340 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION)((void)0);
2341 }
2342#endif
2343
2344 if (!team->t.t_invoke(gtid)) {
2345 KMP_ASSERT2(0, "cannot invoke microtask for PRIMARY thread")if (!(0)) { __kmp_debug_assert(("cannot invoke microtask for PRIMARY thread"
), "openmp/runtime/src/kmp_runtime.cpp", 2345); }
;
2346 }
2347
2348#if KMP_STATS_ENABLED0
2349 // If was beginning of a teams construct, then reset thread state
2350 if (!ap) {
2351 KMP_SET_THREAD_STATE(previous_state)((void)0);
2352 }
2353#endif
2354
2355 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n"
, gtid, team->t.t_id, team->t.t_pkfn); }
2356 team->t.t_id, team->t.t_pkfn))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n"
, gtid, team->t.t_id, team->t.t_pkfn); }
;
2357 KMP_MB(); /* Flush all pending memory write invalidates. */
2358
2359 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: parallel exit T#%d\n"
, gtid); }
;
2360#if OMPT_SUPPORT1
2361 if (ompt_enabled.enabled) {
2362 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2363 }
2364#endif
2365
2366 return TRUE(!0);
2367}
2368
2369#if OMPT_SUPPORT1
2370static inline void __kmp_join_restore_state(kmp_info_t *thread,
2371 kmp_team_t *team) {
2372 // restore state outside the region
2373 thread->th.ompt_thread_info.state =
2374 ((team->t.t_serialized) ? ompt_state_work_serial
2375 : ompt_state_work_parallel);
2376}
2377
2378static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread,
2379 kmp_team_t *team, ompt_data_t *parallel_data,
2380 int flags, void *codeptr) {
2381 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2382 if (ompt_enabled.ompt_callback_parallel_end) {
2383 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)ompt_callback_parallel_end_callback(
2384 parallel_data, &(task_info->task_data), flags, codeptr);
2385 }
2386
2387 task_info->frame.enter_frame = ompt_data_none{0};
2388 __kmp_join_restore_state(thread, team);
2389}
2390#endif
2391
2392void __kmp_join_call(ident_t *loc, int gtid
2393#if OMPT_SUPPORT1
2394 ,
2395 enum fork_context_e fork_context
2396#endif
2397 ,
2398 int exit_teams) {
2399 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call)((void)0);
2400 kmp_team_t *team;
2401 kmp_team_t *parent_team;
2402 kmp_info_t *master_th;
2403 kmp_root_t *root;
2404 int master_active;
2405
2406 KA_TRACE(20, ("__kmp_join_call: enter T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: enter T#%d\n"
, gtid); }
;
2407
2408 /* setup current data */
2409 master_th = __kmp_threads[gtid];
2410 root = master_th->th.th_root;
2411 team = master_th->th.th_team;
2412 parent_team = team->t.t_parent;
2413
2414 master_th->th.th_ident = loc;
2415
2416#if OMPT_SUPPORT1
2417 void *team_microtask = (void *)team->t.t_pkfn;
2418 // For GOMP interface with serialized parallel, need the
2419 // __kmpc_end_serialized_parallel to call hooks for OMPT end-implicit-task
2420 // and end-parallel events.
2421 if (ompt_enabled.enabled &&
2422 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2423 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2424 }
2425#endif
2426
2427#if KMP_DEBUG1
2428 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2429 KA_TRACE(20, ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
"th_task_team = %p\n", __kmp_gtid_from_thread(master_th), team
, team->t.t_task_team[master_th->th.th_task_state], master_th
->th.th_task_team); }
2430 "th_task_team = %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
"th_task_team = %p\n", __kmp_gtid_from_thread(master_th), team
, team->t.t_task_team[master_th->th.th_task_state], master_th
->th.th_task_team); }
2431 __kmp_gtid_from_thread(master_th), team,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
"th_task_team = %p\n", __kmp_gtid_from_thread(master_th), team
, team->t.t_task_team[master_th->th.th_task_state], master_th
->th.th_task_team); }
2432 team->t.t_task_team[master_th->th.th_task_state],if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
"th_task_team = %p\n", __kmp_gtid_from_thread(master_th), team
, team->t.t_task_team[master_th->th.th_task_state], master_th
->th.th_task_team); }
2433 master_th->th.th_task_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
"th_task_team = %p\n", __kmp_gtid_from_thread(master_th), team
, team->t.t_task_team[master_th->th.th_task_state], master_th
->th.th_task_team); }
;
2434 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==if (!(master_th->th.th_task_team == team->t.t_task_team
[master_th->th.th_task_state])) { __kmp_debug_assert("master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 2435); }
2435 team->t.t_task_team[master_th->th.th_task_state])if (!(master_th->th.th_task_team == team->t.t_task_team
[master_th->th.th_task_state])) { __kmp_debug_assert("master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 2435); }
;
2436 }
2437#endif
2438
2439 if (team->t.t_serialized) {
2440 if (master_th->th.th_teams_microtask) {
2441 // We are in teams construct
2442 int level = team->t.t_level;
2443 int tlevel = master_th->th.th_teams_level;
2444 if (level == tlevel) {
2445 // AC: we haven't incremented it earlier at start of teams construct,
2446 // so do it here - at the end of teams construct
2447 team->t.t_level++;
2448 } else if (level == tlevel + 1) {
2449 // AC: we are exiting parallel inside teams, need to increment
2450 // serialization in order to restore it in the next call to
2451 // __kmpc_end_serialized_parallel
2452 team->t.t_serialized++;
2453 }
2454 }
2455 __kmpc_end_serialized_parallel(loc, gtid);
2456
2457#if OMPT_SUPPORT1
2458 if (ompt_enabled.enabled) {
2459 if (fork_context == fork_context_gnu) {
2460 __ompt_lw_taskteam_unlink(master_th);
2461 }
2462 __kmp_join_restore_state(master_th, parent_team);
2463 }
2464#endif
2465
2466 return;
2467 }
2468
2469 master_active = team->t.t_master_active;
2470
2471 if (!exit_teams) {
2472 // AC: No barrier for internal teams at exit from teams construct.
2473 // But there is barrier for external team (league).
2474 __kmp_internal_join(loc, gtid, team);
2475#if USE_ITT_BUILD1
2476 if (__itt_stack_caller_create_ptr__kmp_itt_stack_caller_create_ptr__3_0) {
2477 KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL)if (!(team->t.t_stack_id != __null)) { __kmp_debug_assert(
"team->t.t_stack_id != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 2477); }
;
2478 // destroy the stack stitching id after join barrier
2479 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2480 team->t.t_stack_id = NULL__null;
2481 }
2482#endif
2483 } else {
2484 master_th->th.th_task_state =
2485 0; // AC: no tasking in teams (out of any parallel)
2486#if USE_ITT_BUILD1
2487 if (__itt_stack_caller_create_ptr__kmp_itt_stack_caller_create_ptr__3_0 && parent_team->t.t_serialized) {
2488 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL)if (!(parent_team->t.t_stack_id != __null)) { __kmp_debug_assert
("parent_team->t.t_stack_id != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 2488); }
;
2489 // destroy the stack stitching id on exit from the teams construct
2490 // if parent_team is active, then the id will be destroyed later on
2491 // by master of the league of teams
2492 __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
2493 parent_team->t.t_stack_id = NULL__null;
2494 }
2495#endif
2496 }
2497
2498 KMP_MB();
2499
2500#if OMPT_SUPPORT1
2501 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2502 void *codeptr = team->t.ompt_team_info.master_return_address;
2503#endif
2504
2505#if USE_ITT_BUILD1
2506 // Mark end of "parallel" region for Intel(R) VTune(TM) analyzer.
2507 if (team->t.t_active_level == 1 &&
2508 (!master_th->th.th_teams_microtask || /* not in teams construct */
2509 master_th->th.th_teams_size.nteams == 1)) {
2510 master_th->th.th_ident = loc;
2511 // only one notification scheme (either "submit" or "forking/joined", not
2512 // both)
2513 if ((__itt_frame_submit_v3_ptr__kmp_itt_frame_submit_v3_ptr__3_0 || KMP_ITT_DEBUG0) &&
2514 __kmp_forkjoin_frames_mode == 3)
2515 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2516 master_th->th.th_frame_time, 0, loc,
2517 master_th->th.th_team_nproc, 1);
2518 else if ((__itt_frame_end_v3_ptr__kmp_itt_frame_end_v3_ptr__3_0 || KMP_ITT_DEBUG0) &&
2519 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2520 __kmp_itt_region_joined(gtid);
2521 } // active_level == 1
2522#endif /* USE_ITT_BUILD */
2523
2524#if KMP_AFFINITY_SUPPORTED1
2525 if (!exit_teams) {
2526 // Restore master thread's partition.
2527 master_th->th.th_first_place = team->t.t_first_place;
2528 master_th->th.th_last_place = team->t.t_last_place;
2529 }
2530#endif // KMP_AFFINITY_SUPPORTED
2531
2532 if (master_th->th.th_teams_microtask && !exit_teams &&
2533 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2534 team->t.t_level == master_th->th.th_teams_level + 1) {
2535// AC: We need to leave the team structure intact at the end of parallel
2536// inside the teams construct, so that at the next parallel same (hot) team
2537// works, only adjust nesting levels
2538#if OMPT_SUPPORT1
2539 ompt_data_t ompt_parallel_data = ompt_data_none{0};
2540 if (ompt_enabled.enabled) {
2541 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2542 if (ompt_enabled.ompt_callback_implicit_task) {
2543 int ompt_team_size = team->t.t_nproc;
2544 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
2545 ompt_scope_end, NULL__null, &(task_info->task_data), ompt_team_size,
2546 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, ompt_task_implicit);
2547 }
2548 task_info->frame.exit_frame = ompt_data_none{0};
2549 task_info->task_data = ompt_data_none{0};
2550 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
;
2551 __ompt_lw_taskteam_unlink(master_th);
2552 }
2553#endif
2554 /* Decrement our nested depth level */
2555 team->t.t_level--;
2556 team->t.t_active_level--;
2557 KMP_ATOMIC_DEC(&root->r.r_in_parallel)(&root->r.r_in_parallel)->fetch_sub(1, std::memory_order_acq_rel
)
;
2558
2559 // Restore number of threads in the team if needed. This code relies on
2560 // the proper adjustment of th_teams_size.nth after the fork in
2561 // __kmp_teams_master on each teams primary thread in the case that
2562 // __kmp_reserve_threads reduced it.
2563 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2564 int old_num = master_th->th.th_team_nproc;
2565 int new_num = master_th->th.th_teams_size.nth;
2566 kmp_info_t **other_threads = team->t.t_threads;
2567 team->t.t_nproc = new_num;
2568 for (int i = 0; i < old_num; ++i) {
2569 other_threads[i]->th.th_team_nproc = new_num;
2570 }
2571 // Adjust states of non-used threads of the team
2572 for (int i = old_num; i < new_num; ++i) {
2573 // Re-initialize thread's barrier data.
2574 KMP_DEBUG_ASSERT(other_threads[i])if (!(other_threads[i])) { __kmp_debug_assert("other_threads[i]"
, "openmp/runtime/src/kmp_runtime.cpp", 2574); }
;
2575 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2576 for (int b = 0; b < bs_last_barrier; ++b) {
2577 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2578 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG)if (!(balign[b].bb.wait_flag != 2)) { __kmp_debug_assert("balign[b].bb.wait_flag != 2"
, "openmp/runtime/src/kmp_runtime.cpp", 2578); }
;
2579#if USE_DEBUGGER0
2580 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2581#endif
2582 }
2583 if (__kmp_tasking_mode != tskm_immediate_exec) {
2584 // Synchronize thread's task state
2585 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2586 }
2587 }
2588 }
2589
2590#if OMPT_SUPPORT1
2591 if (ompt_enabled.enabled) {
2592 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2593 OMPT_INVOKER(fork_context)((fork_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
| ompt_parallel_team, codeptr);
2594 }
2595#endif
2596
2597 return;
2598 }
2599
2600 /* do cleanup and restore the parent team */
2601 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2602 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2603
2604 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2605
2606 /* jc: The following lock has instructions with REL and ACQ semantics,
2607 separating the parallel user code called in this parallel region
2608 from the serial user code called after this function returns. */
2609 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2610
2611 if (!master_th->th.th_teams_microtask ||
2612 team->t.t_level > master_th->th.th_teams_level) {
2613 /* Decrement our nested depth level */
2614 KMP_ATOMIC_DEC(&root->r.r_in_parallel)(&root->r.r_in_parallel)->fetch_sub(1, std::memory_order_acq_rel
)
;
2615 }
2616 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0)if (!(root->r.r_in_parallel >= 0)) { __kmp_debug_assert
("root->r.r_in_parallel >= 0", "openmp/runtime/src/kmp_runtime.cpp"
, 2616); }
;
2617
2618#if OMPT_SUPPORT1
2619 if (ompt_enabled.enabled) {
2620 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2621 if (ompt_enabled.ompt_callback_implicit_task) {
2622 int flags = (team_microtask == (void *)__kmp_teams_master)
2623 ? ompt_task_initial
2624 : ompt_task_implicit;
2625 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2626 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
2627 ompt_scope_end, NULL__null, &(task_info->task_data), ompt_team_size,
2628 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, flags);
2629 }
2630 task_info->frame.exit_frame = ompt_data_none{0};
2631 task_info->task_data = ompt_data_none{0};
2632 }
2633#endif
2634
2635 KF_TRACE(10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n"
, 0, master_th, team); }
2636 master_th, team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n"
, 0, master_th, team); }
;
2637 __kmp_pop_current_task_from_thread(master_th);
2638
2639 master_th->th.th_def_allocator = team->t.t_def_allocator;
2640
2641#if OMPD_SUPPORT1
2642 if (ompd_state & OMPD_ENABLE_BP0x1)
2643 ompd_bp_parallel_end();
2644#endif
2645 updateHWFPControl(team);
2646
2647 if (root->r.r_active != master_active)
2648 root->r.r_active = master_active;
2649
2650 __kmp_free_team(root, team USE_NESTED_HOT_ARG(, master_th
2651 master_th), master_th); // this will free worker threads
2652
2653 /* this race was fun to find. make sure the following is in the critical
2654 region otherwise assertions may fail occasionally since the old team may be
2655 reallocated and the hierarchy appears inconsistent. it is actually safe to
2656 run and won't cause any bugs, but will cause those assertion failures. it's
2657 only one deref&assign so might as well put this in the critical region */
2658 master_th->th.th_team = parent_team;
2659 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2660 master_th->th.th_team_master = parent_team->t.t_threads[0];
2661 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2662
2663 /* restore serialized team, if need be */
2664 if (parent_team->t.t_serialized &&
2665 parent_team != master_th->th.th_serial_team &&
2666 parent_team != root->r.r_root_team) {
2667 __kmp_free_team(root,
2668 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL), __null);
2669 master_th->th.th_serial_team = parent_team;
2670 }
2671
2672 if (__kmp_tasking_mode != tskm_immediate_exec) {
2673 if (master_th->th.th_task_state_top >
2674 0) { // Restore task state from memo stack
2675 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack)if (!(master_th->th.th_task_state_memo_stack)) { __kmp_debug_assert
("master_th->th.th_task_state_memo_stack", "openmp/runtime/src/kmp_runtime.cpp"
, 2675); }
;
2676 // Remember primary thread's state if we re-use this nested hot team
2677 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2678 master_th->th.th_task_state;
2679 --master_th->th.th_task_state_top; // pop
2680 // Now restore state at this level
2681 master_th->th.th_task_state =
2682 master_th->th
2683 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2684 } else if (team != root->r.r_hot_team) {
2685 // Reset the task state of primary thread if we are not hot team because
2686 // in this case all the worker threads will be free, and their task state
2687 // will be reset. If not reset the primary's, the task state will be
2688 // inconsistent.
2689 master_th->th.th_task_state = 0;
2690 }
2691 // Copy the task team from the parent team to the primary thread
2692 master_th->th.th_task_team =
2693 parent_team->t.t_task_team[master_th->th.th_task_state];
2694 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n"
, __kmp_gtid_from_thread(master_th), master_th->th.th_task_team
, parent_team); }
2695 ("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n"
, __kmp_gtid_from_thread(master_th), master_th->th.th_task_team
, parent_team); }
2696 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n"
, __kmp_gtid_from_thread(master_th), master_th->th.th_task_team
, parent_team); }
2697 parent_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n"
, __kmp_gtid_from_thread(master_th), master_th->th.th_task_team
, parent_team); }
;
2698 }
2699
2700 // TODO: GEH - cannot do this assertion because root thread not set up as
2701 // executing
2702 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 );
2703 master_th->th.th_current_task->td_flags.executing = 1;
2704
2705 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2706
2707#if KMP_AFFINITY_SUPPORTED1
2708 if (master_th->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
2709 __kmp_reset_root_init_mask(gtid);
2710 }
2711#endif
2712#if OMPT_SUPPORT1
2713 int flags =
2714 OMPT_INVOKER(fork_context)((fork_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
|
2715 ((team_microtask == (void *)__kmp_teams_master) ? ompt_parallel_league
2716 : ompt_parallel_team);
2717 if (ompt_enabled.enabled) {
2718 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2719 codeptr);
2720 }
2721#endif
2722
2723 KMP_MB();
2724 KA_TRACE(20, ("__kmp_join_call: exit T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: exit T#%d\n"
, gtid); }
;
2725}
2726
2727/* Check whether we should push an internal control record onto the
2728 serial team stack. If so, do it. */
2729void __kmp_save_internal_controls(kmp_info_t *thread) {
2730
2731 if (thread->th.th_team != thread->th.th_serial_team) {
2732 return;
2733 }
2734 if (thread->th.th_team->t.t_serialized > 1) {
2735 int push = 0;
2736
2737 if (thread->th.th_team->t.t_control_stack_top == NULL__null) {
2738 push = 1;
2739 } else {
2740 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2741 thread->th.th_team->t.t_serialized) {
2742 push = 1;
2743 }
2744 }
2745 if (push) { /* push a record on the serial team's stack */
2746 kmp_internal_control_t *control =
2747 (kmp_internal_control_t *)__kmp_allocate(___kmp_allocate((sizeof(kmp_internal_control_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 2748)
2748 sizeof(kmp_internal_control_t))___kmp_allocate((sizeof(kmp_internal_control_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 2748)
;
2749
2750 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2751
2752 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2753
2754 control->next = thread->th.th_team->t.t_control_stack_top;
2755 thread->th.th_team->t.t_control_stack_top = control;
2756 }
2757 }
2758}
2759
2760/* Changes set_nproc */
2761void __kmp_set_num_threads(int new_nth, int gtid) {
2762 kmp_info_t *thread;
2763 kmp_root_t *root;
2764
2765 KF_TRACE(10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_num_threads: new __kmp_nth = %d\n"
, new_nth); }
;
2766 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 2766); }
;
2767
2768 if (new_nth < 1)
2769 new_nth = 1;
2770 else if (new_nth > __kmp_max_nth)
2771 new_nth = __kmp_max_nth;
2772
2773 KMP_COUNT_VALUE(OMP_set_numthreads, new_nth)((void)0);
2774 thread = __kmp_threads[gtid];
2775 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2776 return; // nothing to do
2777
2778 __kmp_save_internal_controls(thread);
2779
2780 set__nproc(thread, new_nth)(((thread)->th.th_current_task->td_icvs.nproc) = (new_nth
))
;
2781
2782 // If this omp_set_num_threads() call will cause the hot team size to be
2783 // reduced (in the absence of a num_threads clause), then reduce it now,
2784 // rather than waiting for the next parallel region.
2785 root = thread->th.th_root;
2786 if (__kmp_init_parallel && (!root->r.r_active) &&
2787 (root->r.r_hot_team->t.t_nproc > new_nth)
2788#if KMP_NESTED_HOT_TEAMS1
2789 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2790#endif
2791 ) {
2792 kmp_team_t *hot_team = root->r.r_hot_team;
2793 int f;
2794
2795 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2796
2797 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2798 __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
2799 }
2800 // Release the extra threads we don't need any more.
2801 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2802 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL)if (!(hot_team->t.t_threads[f] != __null)) { __kmp_debug_assert
("hot_team->t.t_threads[f] != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 2802); }
;
2803 if (__kmp_tasking_mode != tskm_immediate_exec) {
2804 // When decreasing team size, threads no longer in the team should unref
2805 // task team.
2806 hot_team->t.t_threads[f]->th.th_task_team = NULL__null;
2807 }
2808 __kmp_free_thread(hot_team->t.t_threads[f]);
2809 hot_team->t.t_threads[f] = NULL__null;
2810 }
2811 hot_team->t.t_nproc = new_nth;
2812#if KMP_NESTED_HOT_TEAMS1
2813 if (thread->th.th_hot_teams) {
2814 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team)if (!(hot_team == thread->th.th_hot_teams[0].hot_team)) { __kmp_debug_assert
("hot_team == thread->th.th_hot_teams[0].hot_team", "openmp/runtime/src/kmp_runtime.cpp"
, 2814); }
;
2815 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2816 }
2817#endif
2818
2819 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2820 hot_team->t.b->update_num_threads(new_nth);
2821 __kmp_add_threads_to_team(hot_team, new_nth);
2822 }
2823
2824 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2825
2826 // Update the t_nproc field in the threads that are still active.
2827 for (f = 0; f < new_nth; f++) {
2828 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL)if (!(hot_team->t.t_threads[f] != __null)) { __kmp_debug_assert
("hot_team->t.t_threads[f] != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 2828); }
;
2829 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2830 }
2831 // Special flag in case omp_set_num_threads() call
2832 hot_team->t.t_size_changed = -1;
2833 }
2834}
2835
2836/* Changes max_active_levels */
2837void __kmp_set_max_active_levels(int gtid, int max_active_levels) {
2838 kmp_info_t *thread;
2839
2840 KF_TRACE(10, ("__kmp_set_max_active_levels: new max_active_levels for thread "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: new max_active_levels for thread "
"%d = (%d)\n", gtid, max_active_levels); }
2841 "%d = (%d)\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: new max_active_levels for thread "
"%d = (%d)\n", gtid, max_active_levels); }
2842 gtid, max_active_levels))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: new max_active_levels for thread "
"%d = (%d)\n", gtid, max_active_levels); }
;
2843 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 2843); }
;
2844
2845 // validate max_active_levels
2846 if (max_active_levels < 0) {
2847 KMP_WARNING(ActiveLevelsNegative, max_active_levels)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_ActiveLevelsNegative
, max_active_levels), __kmp_msg_null)
;
2848 // We ignore this call if the user has specified a negative value.
2849 // The current setting won't be changed. The last valid setting will be
2850 // used. A warning will be issued (if warnings are allowed as controlled by
2851 // the KMP_WARNINGS env var).
2852 KF_TRACE(10, ("__kmp_set_max_active_levels: the call is ignored: new "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: the call is ignored: new "
"max_active_levels for thread %d = (%d)\n", gtid, max_active_levels
); }
2853 "max_active_levels for thread %d = (%d)\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: the call is ignored: new "
"max_active_levels for thread %d = (%d)\n", gtid, max_active_levels
); }
2854 gtid, max_active_levels))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: the call is ignored: new "
"max_active_levels for thread %d = (%d)\n", gtid, max_active_levels
); }
;
2855 return;
2856 }
2857 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT2147483647) {
2858 // it's OK, the max_active_levels is within the valid range: [ 0;
2859 // KMP_MAX_ACTIVE_LEVELS_LIMIT ]
2860 // We allow a zero value. (implementation defined behavior)
2861 } else {
2862 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_ActiveLevelsExceedLimit
, max_active_levels, 2147483647), __kmp_msg_null)
2863 KMP_MAX_ACTIVE_LEVELS_LIMIT)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_ActiveLevelsExceedLimit
, max_active_levels, 2147483647), __kmp_msg_null)
;
2864 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT2147483647;
2865 // Current upper limit is MAX_INT. (implementation defined behavior)
2866 // If the input exceeds the upper limit, we correct the input to be the
2867 // upper limit. (implementation defined behavior)
2868 // Actually, the flow should never get here until we use MAX_INT limit.
2869 }
2870 KF_TRACE(10, ("__kmp_set_max_active_levels: after validation: new "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: after validation: new "
"max_active_levels for thread %d = (%d)\n", gtid, max_active_levels
); }
2871 "max_active_levels for thread %d = (%d)\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: after validation: new "
"max_active_levels for thread %d = (%d)\n", gtid, max_active_levels
); }
2872 gtid, max_active_levels))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: after validation: new "
"max_active_levels for thread %d = (%d)\n", gtid, max_active_levels
); }
;
2873
2874 thread = __kmp_threads[gtid];
2875
2876 __kmp_save_internal_controls(thread);
2877
2878 set__max_active_levels(thread, max_active_levels)(((thread)->th.th_current_task->td_icvs.max_active_levels
) = (max_active_levels))
;
2879}
2880
2881/* Gets max_active_levels */
2882int __kmp_get_max_active_levels(int gtid) {
2883 kmp_info_t *thread;
2884
2885 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d\n", gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_max_active_levels: thread %d\n"
, gtid); }
;
2886 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 2886); }
;
2887
2888 thread = __kmp_threads[gtid];
2889 KMP_DEBUG_ASSERT(thread->th.th_current_task)if (!(thread->th.th_current_task)) { __kmp_debug_assert("thread->th.th_current_task"
, "openmp/runtime/src/kmp_runtime.cpp", 2889); }
;
2890 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d, curtask=%p, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_max_active_levels: thread %d, curtask=%p, "
"curtask_maxaclevel=%d\n", gtid, thread->th.th_current_task
, thread->th.th_current_task->td_icvs.max_active_levels
); }
2891 "curtask_maxaclevel=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_max_active_levels: thread %d, curtask=%p, "
"curtask_maxaclevel=%d\n", gtid, thread->th.th_current_task
, thread->th.th_current_task->td_icvs.max_active_levels
); }
2892 gtid, thread->th.th_current_task,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_max_active_levels: thread %d, curtask=%p, "
"curtask_maxaclevel=%d\n", gtid, thread->th.th_current_task
, thread->th.th_current_task->td_icvs.max_active_levels
); }
2893 thread->th.th_current_task->td_icvs.max_active_levels))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_max_active_levels: thread %d, curtask=%p, "
"curtask_maxaclevel=%d\n", gtid, thread->th.th_current_task
, thread->th.th_current_task->td_icvs.max_active_levels
); }
;
2894 return thread->th.th_current_task->td_icvs.max_active_levels;
2895}
2896
2897// nteams-var per-device ICV
2898void __kmp_set_num_teams(int num_teams) {
2899 if (num_teams > 0)
2900 __kmp_nteams = num_teams;
2901}
2902int __kmp_get_max_teams(void) { return __kmp_nteams; }
2903// teams-thread-limit-var per-device ICV
2904void __kmp_set_teams_thread_limit(int limit) {
2905 if (limit > 0)
2906 __kmp_teams_thread_limit = limit;
2907}
2908int __kmp_get_teams_thread_limit(void) { return __kmp_teams_thread_limit; }
2909
2910KMP_BUILD_ASSERT(sizeof(kmp_sched_t) == sizeof(int))static_assert(sizeof(kmp_sched_t) == sizeof(int), "Build condition error"
)
;
2911KMP_BUILD_ASSERT(sizeof(enum sched_type) == sizeof(int))static_assert(sizeof(enum sched_type) == sizeof(int), "Build condition error"
)
;
2912
2913/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */
2914void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) {
2915 kmp_info_t *thread;
2916 kmp_sched_t orig_kind;
2917 // kmp_team_t *team;
2918
2919 KF_TRACE(10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n"
, gtid, (int)kind, chunk); }
2920 gtid, (int)kind, chunk))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n"
, gtid, (int)kind, chunk); }
;
2921 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 2921); }
;
2922
2923 // Check if the kind parameter is valid, correct if needed.
2924 // Valid parameters should fit in one of two intervals - standard or extended:
2925 // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper>
2926 // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103
2927 orig_kind = kind;
2928 kind = __kmp_sched_without_mods(kind);
2929
2930 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2931 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2932 // TODO: Hint needs attention in case we change the default schedule.
2933 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind)__kmp_msg_format(kmp_i18n_msg_ScheduleKindOutOfRange, kind),
2934 KMP_HNT(DefaultScheduleKindUsed, "static, no chunk")__kmp_msg_format(kmp_i18n_hnt_DefaultScheduleKindUsed, "static, no chunk"
)
,
2935 __kmp_msg_null);
2936 kind = kmp_sched_default;
2937 chunk = 0; // ignore chunk value in case of bad kind
2938 }
2939
2940 thread = __kmp_threads[gtid];
2941
2942 __kmp_save_internal_controls(thread);
2943
2944 if (kind < kmp_sched_upper_std) {
2945 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK1) {
2946 // differ static chunked vs. unchunked: chunk should be invalid to
2947 // indicate unchunked schedule (which is the default)
2948 thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static;
2949 } else {
2950 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2951 __kmp_sch_map[kind - kmp_sched_lower - 1];
2952 }
2953 } else {
2954 // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2955 // kmp_sched_lower - 2 ];
2956 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2957 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2958 kmp_sched_lower - 2];
2959 }
2960 __kmp_sched_apply_mods_intkind(
2961 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2962 if (kind == kmp_sched_auto || chunk < 1) {
2963 // ignore parameter chunk for schedule auto
2964 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK1;
2965 } else {
2966 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2967 }
2968}
2969
2970/* Gets def_sched_var ICV values */
2971void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk) {
2972 kmp_info_t *thread;
2973 enum sched_type th_type;
2974
2975 KF_TRACE(10, ("__kmp_get_schedule: thread %d\n", gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_schedule: thread %d\n"
, gtid); }
;
2976 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 2976); }
;
2977
2978 thread = __kmp_threads[gtid];
2979
2980 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2981 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)(enum sched_type)( (th_type) & ~(kmp_sch_modifier_nonmonotonic
| kmp_sch_modifier_monotonic))
) {
2982 case kmp_sch_static:
2983 case kmp_sch_static_greedy:
2984 case kmp_sch_static_balanced:
2985 *kind = kmp_sched_static;
2986 __kmp_sched_apply_mods_stdkind(kind, th_type);
2987 *chunk = 0; // chunk was not set, try to show this fact via zero value
2988 return;
2989 case kmp_sch_static_chunked:
2990 *kind = kmp_sched_static;
2991 break;
2992 case kmp_sch_dynamic_chunked:
2993 *kind = kmp_sched_dynamic;
2994 break;
2995 case kmp_sch_guided_chunked:
2996 case kmp_sch_guided_iterative_chunked:
2997 case kmp_sch_guided_analytical_chunked:
2998 *kind = kmp_sched_guided;
2999 break;
3000 case kmp_sch_auto:
3001 *kind = kmp_sched_auto;
3002 break;
3003 case kmp_sch_trapezoidal:
3004 *kind = kmp_sched_trapezoidal;
3005 break;
3006#if KMP_STATIC_STEAL_ENABLED1
3007 case kmp_sch_static_steal:
3008 *kind = kmp_sched_static_steal;
3009 break;
3010#endif
3011 default:
3012 KMP_FATAL(UnknownSchedulingType, th_type)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_UnknownSchedulingType
, th_type), __kmp_msg_null)
;
3013 }
3014
3015 __kmp_sched_apply_mods_stdkind(kind, th_type);
3016 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
3017}
3018
3019int __kmp_get_ancestor_thread_num(int gtid, int level) {
3020
3021 int ii, dd;
3022 kmp_team_t *team;
3023 kmp_info_t *thr;
3024
3025 KF_TRACE(10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_ancestor_thread_num: thread %d %d\n"
, gtid, level); }
;
3026 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 3026); }
;
3027
3028 // validate level
3029 if (level == 0)
3030 return 0;
3031 if (level < 0)
3032 return -1;
3033 thr = __kmp_threads[gtid];
3034 team = thr->th.th_team;
3035 ii = team->t.t_level;
3036 if (level > ii)
3037 return -1;
3038
3039 if (thr->th.th_teams_microtask) {
3040 // AC: we are in teams region where multiple nested teams have same level
3041 int tlevel = thr->th.th_teams_level; // the level of the teams construct
3042 if (level <=
3043 tlevel) { // otherwise usual algorithm works (will not touch the teams)
3044 KMP_DEBUG_ASSERT(ii >= tlevel)if (!(ii >= tlevel)) { __kmp_debug_assert("ii >= tlevel"
, "openmp/runtime/src/kmp_runtime.cpp", 3044); }
;
3045 // AC: As we need to pass by the teams league, we need to artificially
3046 // increase ii
3047 if (ii == tlevel) {
3048 ii += 2; // three teams have same level
3049 } else {
3050 ii++; // two teams have same level
3051 }
3052 }
3053 }
3054
3055 if (ii == level)
3056 return __kmp_tid_from_gtid(gtid);
3057
3058 dd = team->t.t_serialized;
3059 level++;
3060 while (ii > level) {
3061 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3062 }
3063 if ((team->t.t_serialized) && (!dd)) {
3064 team = team->t.t_parent;
3065 continue;
3066 }
3067 if (ii > level) {
3068 team = team->t.t_parent;
3069 dd = team->t.t_serialized;
3070 ii--;
3071 }
3072 }
3073
3074 return (dd > 1) ? (0) : (team->t.t_master_tid);
3075}
3076
3077int __kmp_get_team_size(int gtid, int level) {
3078
3079 int ii, dd;
3080 kmp_team_t *team;
3081 kmp_info_t *thr;
3082
3083 KF_TRACE(10, ("__kmp_get_team_size: thread %d %d\n", gtid, level))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_team_size: thread %d %d\n"
, gtid, level); }
;
3084 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 3084); }
;
3085
3086 // validate level
3087 if (level == 0)
3088 return 1;
3089 if (level < 0)
3090 return -1;
3091 thr = __kmp_threads[gtid];
3092 team = thr->th.th_team;
3093 ii = team->t.t_level;
3094 if (level > ii)
3095 return -1;
3096
3097 if (thr->th.th_teams_microtask) {
3098 // AC: we are in teams region where multiple nested teams have same level
3099 int tlevel = thr->th.th_teams_level; // the level of the teams construct
3100 if (level <=
3101 tlevel) { // otherwise usual algorithm works (will not touch the teams)
3102 KMP_DEBUG_ASSERT(ii >= tlevel)if (!(ii >= tlevel)) { __kmp_debug_assert("ii >= tlevel"
, "openmp/runtime/src/kmp_runtime.cpp", 3102); }
;
3103 // AC: As we need to pass by the teams league, we need to artificially
3104 // increase ii
3105 if (ii == tlevel) {
3106 ii += 2; // three teams have same level
3107 } else {
3108 ii++; // two teams have same level
3109 }
3110 }
3111 }
3112
3113 while (ii > level) {
3114 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3115 }
3116 if (team->t.t_serialized && (!dd)) {
3117 team = team->t.t_parent;
3118 continue;
3119 }
3120 if (ii > level) {
3121 team = team->t.t_parent;
3122 ii--;
3123 }
3124 }
3125
3126 return team->t.t_nproc;
3127}
3128
3129kmp_r_sched_t __kmp_get_schedule_global() {
3130 // This routine created because pairs (__kmp_sched, __kmp_chunk) and
3131 // (__kmp_static, __kmp_guided) may be changed by kmp_set_defaults
3132 // independently. So one can get the updated schedule here.
3133
3134 kmp_r_sched_t r_sched;
3135
3136 // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static,
3137 // __kmp_guided. __kmp_sched should keep original value, so that user can set
3138 // KMP_SCHEDULE multiple times, and thus have different run-time schedules in
3139 // different roots (even in OMP 2.5)
3140 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched)(enum sched_type)( (__kmp_sched) & ~(kmp_sch_modifier_nonmonotonic
| kmp_sch_modifier_monotonic))
;
3141 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched)((enum sched_type)( (__kmp_sched) & (kmp_sch_modifier_nonmonotonic
| kmp_sch_modifier_monotonic)))
;
3142 if (s == kmp_sch_static) {
3143 // replace STATIC with more detailed schedule (balanced or greedy)
3144 r_sched.r_sched_type = __kmp_static;
3145 } else if (s == kmp_sch_guided_chunked) {
3146 // replace GUIDED with more detailed schedule (iterative or analytical)
3147 r_sched.r_sched_type = __kmp_guided;
3148 } else { // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other
3149 r_sched.r_sched_type = __kmp_sched;
3150 }
3151 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers)(r_sched.r_sched_type = (enum sched_type)((kmp_int32)r_sched.
r_sched_type | (kmp_int32)sched_modifiers))
;
3152
3153 if (__kmp_chunk < KMP_DEFAULT_CHUNK1) {
3154 // __kmp_chunk may be wrong here (if it was not ever set)
3155 r_sched.chunk = KMP_DEFAULT_CHUNK1;
3156 } else {
3157 r_sched.chunk = __kmp_chunk;
3158 }
3159
3160 return r_sched;
3161}
3162
3163/* Allocate (realloc == FALSE) * or reallocate (realloc == TRUE)
3164 at least argc number of *t_argv entries for the requested team. */
3165static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, int realloc) {
3166
3167 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 3167); }
;
3168 if (!realloc || argc > team->t.t_max_argc) {
3169
3170 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: needed entries=%d, "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: needed entries=%d, "
"current entries=%d\n", team->t.t_id, argc, (realloc) ? team
->t.t_max_argc : 0); }
3171 "current entries=%d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: needed entries=%d, "
"current entries=%d\n", team->t.t_id, argc, (realloc) ? team
->t.t_max_argc : 0); }
3172 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: needed entries=%d, "
"current entries=%d\n", team->t.t_id, argc, (realloc) ? team
->t.t_max_argc : 0); }
;
3173 /* if previously allocated heap space for args, free them */
3174 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3175 __kmp_free((void *)team->t.t_argv)___kmp_free(((void *)team->t.t_argv), "openmp/runtime/src/kmp_runtime.cpp"
, 3175)
;
3176
3177 if (argc <= KMP_INLINE_ARGV_ENTRIES(int)((4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 *
sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) %
64)) / (sizeof(void *)))
) {
3178 /* use unused space in the cache line for arguments */
3179 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES(int)((4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 *
sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) %
64)) / (sizeof(void *)))
;
3180 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: inline allocate %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: inline allocate %d "
"argv entries\n", team->t.t_id, team->t.t_max_argc); }
3181 "argv entries\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: inline allocate %d "
"argv entries\n", team->t.t_id, team->t.t_max_argc); }
3182 team->t.t_id, team->t.t_max_argc))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: inline allocate %d "
"argv entries\n", team->t.t_id, team->t.t_max_argc); }
;
3183 team->t.t_argv = &team->t.t_inline_argv[0];
3184 if (__kmp_storage_map) {
3185 __kmp_print_storage_map_gtid(
3186 -1, &team->t.t_inline_argv[0],
3187 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES(int)((4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 *
sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) %
64)) / (sizeof(void *)))
],
3188 (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES(int)((4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 *
sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) %
64)) / (sizeof(void *)))
), "team_%d.t_inline_argv",
3189 team->t.t_id);
3190 }
3191 } else {
3192 /* allocate space for arguments in the heap */
3193 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES100 >> 1))
3194 ? KMP_MIN_MALLOC_ARGV_ENTRIES100
3195 : 2 * argc;
3196 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
"argv entries\n", team->t.t_id, team->t.t_max_argc); }
3197 "argv entries\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
"argv entries\n", team->t.t_id, team->t.t_max_argc); }
3198 team->t.t_id, team->t.t_max_argc))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
"argv entries\n", team->t.t_id, team->t.t_max_argc); }
;
3199 team->t.t_argv =
3200 (void **)__kmp_page_allocate(sizeof(void *) * team->t.t_max_argc)___kmp_page_allocate((sizeof(void *) * team->t.t_max_argc)
, "openmp/runtime/src/kmp_runtime.cpp", 3200)
;
3201 if (__kmp_storage_map) {
3202 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3203 &team->t.t_argv[team->t.t_max_argc],
3204 sizeof(void *) * team->t.t_max_argc,
3205 "team_%d.t_argv", team->t.t_id);
3206 }
3207 }
3208 }
3209}
3210
3211static void __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) {
3212 int i;
3213 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3214 team->t.t_threads =
3215 (kmp_info_t **)__kmp_allocate(sizeof(kmp_info_t *) * max_nth)___kmp_allocate((sizeof(kmp_info_t *) * max_nth), "openmp/runtime/src/kmp_runtime.cpp"
, 3215)
;
3216 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(___kmp_allocate((sizeof(dispatch_shared_info_t) * num_disp_buff
), "openmp/runtime/src/kmp_runtime.cpp", 3217)
3217 sizeof(dispatch_shared_info_t) * num_disp_buff)___kmp_allocate((sizeof(dispatch_shared_info_t) * num_disp_buff
), "openmp/runtime/src/kmp_runtime.cpp", 3217)
;
3218 team->t.t_dispatch =
3219 (kmp_disp_t *)__kmp_allocate(sizeof(kmp_disp_t) * max_nth)___kmp_allocate((sizeof(kmp_disp_t) * max_nth), "openmp/runtime/src/kmp_runtime.cpp"
, 3219)
;
3220 team->t.t_implicit_task_taskdata =
3221 (kmp_taskdata_t *)__kmp_allocate(sizeof(kmp_taskdata_t) * max_nth)___kmp_allocate((sizeof(kmp_taskdata_t) * max_nth), "openmp/runtime/src/kmp_runtime.cpp"
, 3221)
;
3222 team->t.t_max_nproc = max_nth;
3223
3224 /* setup dispatch buffers */
3225 for (i = 0; i < num_disp_buff; ++i) {
3226 team->t.t_disp_buffer[i].buffer_index = i;
3227 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3228 }
3229}
3230
3231static void __kmp_free_team_arrays(kmp_team_t *team) {
3232 /* Note: this does not free the threads in t_threads (__kmp_free_threads) */
3233 int i;
3234 for (i = 0; i < team->t.t_max_nproc; ++i) {
3235 if (team->t.t_dispatch[i].th_disp_buffer != NULL__null) {
3236 __kmp_free(team->t.t_dispatch[i].th_disp_buffer)___kmp_free((team->t.t_dispatch[i].th_disp_buffer), "openmp/runtime/src/kmp_runtime.cpp"
, 3236)
;
3237 team->t.t_dispatch[i].th_disp_buffer = NULL__null;
3238 }
3239 }
3240#if KMP_USE_HIER_SCHED0
3241 __kmp_dispatch_free_hierarchies(team);
3242#endif
3243 __kmp_free(team->t.t_threads)___kmp_free((team->t.t_threads), "openmp/runtime/src/kmp_runtime.cpp"
, 3243)
;
3244 __kmp_free(team->t.t_disp_buffer)___kmp_free((team->t.t_disp_buffer), "openmp/runtime/src/kmp_runtime.cpp"
, 3244)
;
3245 __kmp_free(team->t.t_dispatch)___kmp_free((team->t.t_dispatch), "openmp/runtime/src/kmp_runtime.cpp"
, 3245)
;
3246 __kmp_free(team->t.t_implicit_task_taskdata)___kmp_free((team->t.t_implicit_task_taskdata), "openmp/runtime/src/kmp_runtime.cpp"
, 3246)
;
3247 team->t.t_threads = NULL__null;
3248 team->t.t_disp_buffer = NULL__null;
3249 team->t.t_dispatch = NULL__null;
3250 team->t.t_implicit_task_taskdata = 0;
3251}
3252
3253static void __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) {
3254 kmp_info_t **oldThreads = team->t.t_threads;
3255
3256 __kmp_free(team->t.t_disp_buffer)___kmp_free((team->t.t_disp_buffer), "openmp/runtime/src/kmp_runtime.cpp"
, 3256)
;
3257 __kmp_free(team->t.t_dispatch)___kmp_free((team->t.t_dispatch), "openmp/runtime/src/kmp_runtime.cpp"
, 3257)
;
3258 __kmp_free(team->t.t_implicit_task_taskdata)___kmp_free((team->t.t_implicit_task_taskdata), "openmp/runtime/src/kmp_runtime.cpp"
, 3258)
;
3259 __kmp_allocate_team_arrays(team, max_nth);
3260
3261 KMP_MEMCPYmemcpy(team->t.t_threads, oldThreads,
3262 team->t.t_nproc * sizeof(kmp_info_t *));
3263
3264 __kmp_free(oldThreads)___kmp_free((oldThreads), "openmp/runtime/src/kmp_runtime.cpp"
, 3264)
;
3265}
3266
3267static kmp_internal_control_t __kmp_get_global_icvs(void) {
3268
3269 kmp_r_sched_t r_sched =
3270 __kmp_get_schedule_global(); // get current state of scheduling globals
3271
3272 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0)if (!(__kmp_nested_proc_bind.used > 0)) { __kmp_debug_assert
("__kmp_nested_proc_bind.used > 0", "openmp/runtime/src/kmp_runtime.cpp"
, 3272); }
;
3273
3274 kmp_internal_control_t g_icvs = {
3275 0, // int serial_nesting_level; //corresponds to value of th_team_serialized
3276 (kmp_int8)__kmp_global.g.g_dynamic, // internal control for dynamic
3277 // adjustment of threads (per thread)
3278 (kmp_int8)__kmp_env_blocktime, // int bt_set; //internal control for
3279 // whether blocktime is explicitly set
3280 __kmp_dflt_blocktime, // int blocktime; //internal control for blocktime
3281#if KMP_USE_MONITOR
3282 __kmp_bt_intervals, // int bt_intervals; //internal control for blocktime
3283// intervals
3284#endif
3285 __kmp_dflt_team_nth, // int nproc; //internal control for # of threads for
3286 // next parallel region (per thread)
3287 // (use a max ub on value if __kmp_parallel_initialize not called yet)
3288 __kmp_cg_max_nth, // int thread_limit;
3289 __kmp_dflt_max_active_levels, // int max_active_levels; //internal control
3290 // for max_active_levels
3291 r_sched, // kmp_r_sched_t sched; //internal control for runtime schedule
3292 // {sched,chunk} pair
3293 __kmp_nested_proc_bind.bind_types[0],
3294 __kmp_default_device,
3295 NULL__null // struct kmp_internal_control *next;
3296 };
3297
3298 return g_icvs;
3299}
3300
3301static kmp_internal_control_t __kmp_get_x_global_icvs(const kmp_team_t *team) {
3302
3303 kmp_internal_control_t gx_icvs;
3304 gx_icvs.serial_nesting_level =
3305 0; // probably =team->t.t_serial like in save_inter_controls
3306 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3307 gx_icvs.next = NULL__null;
3308
3309 return gx_icvs;
3310}
3311
3312static void __kmp_initialize_root(kmp_root_t *root) {
3313 int f;
3314 kmp_team_t *root_team;
3315 kmp_team_t *hot_team;
3316 int hot_team_max_nth;
3317 kmp_r_sched_t r_sched =
3318 __kmp_get_schedule_global(); // get current state of scheduling globals
3319 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3320 KMP_DEBUG_ASSERT(root)if (!(root)) { __kmp_debug_assert("root", "openmp/runtime/src/kmp_runtime.cpp"
, 3320); }
;
3321 KMP_ASSERT(!root->r.r_begin)if (!(!root->r.r_begin)) { __kmp_debug_assert("!root->r.r_begin"
, "openmp/runtime/src/kmp_runtime.cpp", 3321); }
;
3322
3323 /* setup the root state structure */
3324 __kmp_init_lock(&root->r.r_begin_lock);
3325 root->r.r_begin = FALSE0;
3326 root->r.r_active = FALSE0;
3327 root->r.r_in_parallel = 0;
3328 root->r.r_blocktime = __kmp_dflt_blocktime;
3329#if KMP_AFFINITY_SUPPORTED1
3330 root->r.r_affinity_assigned = FALSE0;
3331#endif
3332
3333 /* setup the root team for this task */
3334 /* allocate the root team structure */
3335 KF_TRACE(10, ("__kmp_initialize_root: before root_team\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_root: before root_team\n"
); }
;
3336
3337 root_team =
3338 __kmp_allocate_team(root,
3339 1, // new_nproc
3340 1, // max_nproc
3341#if OMPT_SUPPORT1
3342 ompt_data_none{0}, // root parallel id
3343#endif
3344 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3345 0 // argc
3346 USE_NESTED_HOT_ARG(NULL), __null // primary thread is unknown
3347 );
3348#if USE_DEBUGGER0
3349 // Non-NULL value should be assigned to make the debugger display the root
3350 // team.
3351 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0))((root_team->t.t_pkfn)) = (((microtask_t)(~0)));
3352#endif
3353
3354 KF_TRACE(10, ("__kmp_initialize_root: after root_team = %p\n", root_team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_root: after root_team = %p\n"
, root_team); }
;
3355
3356 root->r.r_root_team = root_team;
3357 root_team->t.t_control_stack_top = NULL__null;
3358
3359 /* initialize root team */
3360 root_team->t.t_threads[0] = NULL__null;
3361 root_team->t.t_nproc = 1;
3362 root_team->t.t_serialized = 1;
3363 // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3364 root_team->t.t_sched.sched = r_sched.sched;
3365 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n"
, root_team->t.t_id, 0, 0); }
3366 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n"
, root_team->t.t_id, 0, 0); }
3367 ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n"
, root_team->t.t_id, 0, 0); }
3368 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n"
, root_team->t.t_id, 0, 0); }
;
3369
3370 /* setup the hot team for this task */
3371 /* allocate the hot team structure */
3372 KF_TRACE(10, ("__kmp_initialize_root: before hot_team\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_root: before hot_team\n"
); }
;
3373
3374 hot_team =
3375 __kmp_allocate_team(root,
3376 1, // new_nproc
3377 __kmp_dflt_team_nth_ub * 2, // max_nproc
3378#if OMPT_SUPPORT1
3379 ompt_data_none{0}, // root parallel id
3380#endif
3381 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3382 0 // argc
3383 USE_NESTED_HOT_ARG(NULL), __null // primary thread is unknown
3384 );
3385 KF_TRACE(10, ("__kmp_initialize_root: after hot_team = %p\n", hot_team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_root: after hot_team = %p\n"
, hot_team); }
;
3386
3387 root->r.r_hot_team = hot_team;
3388 root_team->t.t_control_stack_top = NULL__null;
3389
3390 /* first-time initialization */
3391 hot_team->t.t_parent = root_team;
3392
3393 /* initialize hot team */
3394 hot_team_max_nth = hot_team->t.t_max_nproc;
3395 for (f = 0; f < hot_team_max_nth; ++f) {
3396 hot_team->t.t_threads[f] = NULL__null;
3397 }
3398 hot_team->t.t_nproc = 1;
3399 // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3400 hot_team->t.t_sched.sched = r_sched.sched;
3401 hot_team->t.t_size_changed = 0;
3402}
3403
3404#ifdef KMP_DEBUG1
3405
3406typedef struct kmp_team_list_item {
3407 kmp_team_p const *entry;
3408 struct kmp_team_list_item *next;
3409} kmp_team_list_item_t;
3410typedef kmp_team_list_item_t *kmp_team_list_t;
3411
3412static void __kmp_print_structure_team_accum( // Add team to list of teams.
3413 kmp_team_list_t list, // List of teams.
3414 kmp_team_p const *team // Team to add.
3415) {
3416
3417 // List must terminate with item where both entry and next are NULL.
3418 // Team is added to the list only once.
3419 // List is sorted in ascending order by team id.
3420 // Team id is *not* a key.
3421
3422 kmp_team_list_t l;
3423
3424 KMP_DEBUG_ASSERT(list != NULL)if (!(list != __null)) { __kmp_debug_assert("list != __null",
"openmp/runtime/src/kmp_runtime.cpp", 3424); }
;
3425 if (team == NULL__null) {
3426 return;
3427 }
3428
3429 __kmp_print_structure_team_accum(list, team->t.t_parent);
3430 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3431
3432 // Search list for the team.
3433 l = list;
3434 while (l->next != NULL__null && l->entry != team) {
3435 l = l->next;
3436 }
3437 if (l->next != NULL__null) {
3438 return; // Team has been added before, exit.
3439 }
3440
3441 // Team is not found. Search list again for insertion point.
3442 l = list;
3443 while (l->next != NULL__null && l->entry->t.t_id <= team->t.t_id) {
3444 l = l->next;
3445 }
3446
3447 // Insert team.
3448 {
3449 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(malloc(sizeof(kmp_team_list_item_t))
3450 sizeof(kmp_team_list_item_t))malloc(sizeof(kmp_team_list_item_t));
3451 *item = *l;
3452 l->entry = team;
3453 l->next = item;
3454 }
3455}
3456
3457static void __kmp_print_structure_team(char const *title, kmp_team_p const *team
3458
3459) {
3460 __kmp_printf("%s", title);
3461 if (team != NULL__null) {
3462 __kmp_printf("%2x %p\n", team->t.t_id, team);
3463 } else {
3464 __kmp_printf(" - (nil)\n");
3465 }
3466}
3467
3468static void __kmp_print_structure_thread(char const *title,
3469 kmp_info_p const *thread) {
3470 __kmp_printf("%s", title);
3471 if (thread != NULL__null) {
3472 __kmp_printf("%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3473 } else {
3474 __kmp_printf(" - (nil)\n");
3475 }
3476}
3477
3478void __kmp_print_structure(void) {
3479
3480 kmp_team_list_t list;
3481
3482 // Initialize list of teams.
3483 list =
3484 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(sizeof(kmp_team_list_item_t))malloc(sizeof(kmp_team_list_item_t));
3485 list->entry = NULL__null;
3486 list->next = NULL__null;
3487
3488 __kmp_printf("\n------------------------------\nGlobal Thread "
3489 "Table\n------------------------------\n");
3490 {
3491 int gtid;
3492 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3493 __kmp_printf("%2d", gtid);
3494 if (__kmp_threads != NULL__null) {
3495 __kmp_printf(" %p", __kmp_threads[gtid]);
3496 }
3497 if (__kmp_root != NULL__null) {
3498 __kmp_printf(" %p", __kmp_root[gtid]);
3499 }
3500 __kmp_printf("\n");
3501 }
3502 }
3503
3504 // Print out __kmp_threads array.
3505 __kmp_printf("\n------------------------------\nThreads\n--------------------"
3506 "----------\n");
3507 if (__kmp_threads != NULL__null) {
3508 int gtid;
3509 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3510 kmp_info_t const *thread = __kmp_threads[gtid];
3511 if (thread != NULL__null) {
3512 __kmp_printf("GTID %2d %p:\n", gtid, thread);
3513 __kmp_printf(" Our Root: %p\n", thread->th.th_root);
3514 __kmp_print_structure_team(" Our Team: ", thread->th.th_team);
3515 __kmp_print_structure_team(" Serial Team: ",
3516 thread->th.th_serial_team);
3517 __kmp_printf(" Threads: %2d\n", thread->th.th_team_nproc);
3518 __kmp_print_structure_thread(" Primary: ",
3519 thread->th.th_team_master);
3520 __kmp_printf(" Serialized?: %2d\n", thread->th.th_team_serialized);
3521 __kmp_printf(" Set NProc: %2d\n", thread->th.th_set_nproc);
3522 __kmp_printf(" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3523 __kmp_print_structure_thread(" Next in pool: ",
3524 thread->th.th_next_pool);
3525 __kmp_printf("\n");
3526 __kmp_print_structure_team_accum(list, thread->th.th_team);
3527 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3528 }
3529 }
3530 } else {
3531 __kmp_printf("Threads array is not allocated.\n");
3532 }
3533
3534 // Print out __kmp_root array.
3535 __kmp_printf("\n------------------------------\nUbers\n----------------------"
3536 "--------\n");
3537 if (__kmp_root != NULL__null) {
3538 int gtid;
3539 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3540 kmp_root_t const *root = __kmp_root[gtid];
3541 if (root != NULL__null) {
3542 __kmp_printf("GTID %2d %p:\n", gtid, root);
3543 __kmp_print_structure_team(" Root Team: ", root->r.r_root_team);
3544 __kmp_print_structure_team(" Hot Team: ", root->r.r_hot_team);
3545 __kmp_print_structure_thread(" Uber Thread: ",
3546 root->r.r_uber_thread);
3547 __kmp_printf(" Active?: %2d\n", root->r.r_active);
3548 __kmp_printf(" In Parallel: %2d\n",
3549 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel)(&root->r.r_in_parallel)->load(std::memory_order_relaxed
)
);
3550 __kmp_printf("\n");
3551 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3552 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3553 }
3554 }
3555 } else {
3556 __kmp_printf("Ubers array is not allocated.\n");
3557 }
3558
3559 __kmp_printf("\n------------------------------\nTeams\n----------------------"
3560 "--------\n");
3561 while (list->next != NULL__null) {
3562 kmp_team_p const *team = list->entry;
3563 int i;
3564 __kmp_printf("Team %2x %p:\n", team->t.t_id, team);
3565 __kmp_print_structure_team(" Parent Team: ", team->t.t_parent);
3566 __kmp_printf(" Primary TID: %2d\n", team->t.t_master_tid);
3567 __kmp_printf(" Max threads: %2d\n", team->t.t_max_nproc);
3568 __kmp_printf(" Levels of serial: %2d\n", team->t.t_serialized);
3569 __kmp_printf(" Number threads: %2d\n", team->t.t_nproc);
3570 for (i = 0; i < team->t.t_nproc; ++i) {
3571 __kmp_printf(" Thread %2d: ", i);
3572 __kmp_print_structure_thread("", team->t.t_threads[i]);
3573 }
3574 __kmp_print_structure_team(" Next in pool: ", team->t.t_next_pool);
3575 __kmp_printf("\n");
3576 list = list->next;
3577 }
3578
3579 // Print out __kmp_thread_pool and __kmp_team_pool.
3580 __kmp_printf("\n------------------------------\nPools\n----------------------"
3581 "--------\n");
3582 __kmp_print_structure_thread("Thread pool: ",
3583 CCAST(kmp_info_t *, __kmp_thread_pool)const_cast<kmp_info_t *>(__kmp_thread_pool));
3584 __kmp_print_structure_team("Team pool: ",
3585 CCAST(kmp_team_t *, __kmp_team_pool)const_cast<kmp_team_t *>(__kmp_team_pool));
3586 __kmp_printf("\n");
3587
3588 // Free team list.
3589 while (list != NULL__null) {
3590 kmp_team_list_item_t *item = list;
3591 list = list->next;
3592 KMP_INTERNAL_FREE(item)free(item);
3593 }
3594}
3595
3596#endif
3597
3598//---------------------------------------------------------------------------
3599// Stuff for per-thread fast random number generator
3600// Table of primes
3601static const unsigned __kmp_primes[] = {
3602 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3603 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3604 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3605 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3606 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3607 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3608 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3609 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3610 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3611 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3612 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3613
3614//---------------------------------------------------------------------------
3615// __kmp_get_random: Get a random number using a linear congruential method.
3616unsigned short __kmp_get_random(kmp_info_t *thread) {
3617 unsigned x = thread->th.th_x;
3618 unsigned short r = (unsigned short)(x >> 16);
3619
3620 thread->th.th_x = x * thread->th.th_a + 1;
3621
3622 KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n",if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_get_random: THREAD: %d, RETURN: %u\n"
, thread->th.th_info.ds.ds_tid, r); }
3623 thread->th.th_info.ds.ds_tid, r))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_get_random: THREAD: %d, RETURN: %u\n"
, thread->th.th_info.ds.ds_tid, r); }
;
3624
3625 return r;
3626}
3627//--------------------------------------------------------
3628// __kmp_init_random: Initialize a random number generator
3629void __kmp_init_random(kmp_info_t *thread) {
3630 unsigned seed = thread->th.th_info.ds.ds_tid;
3631
3632 thread->th.th_a =
3633 __kmp_primes[seed % (sizeof(__kmp_primes) / sizeof(__kmp_primes[0]))];
3634 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3635 KA_TRACE(30,if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_init_random: THREAD: %u; A: %u\n"
, seed, thread->th.th_a); }
3636 ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_init_random: THREAD: %u; A: %u\n"
, seed, thread->th.th_a); }
;
3637}
3638
3639#if KMP_OS_WINDOWS0
3640/* reclaim array entries for root threads that are already dead, returns number
3641 * reclaimed */
3642static int __kmp_reclaim_dead_roots(void) {
3643 int i, r = 0;
3644
3645 for (i = 0; i < __kmp_threads_capacity; ++i) {
3646 if (KMP_UBER_GTID(i) &&
3647 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])((void *)(__kmp_threads[i]))) &&
3648 !__kmp_root[i]
3649 ->r.r_active) { // AC: reclaim only roots died in non-active state
3650 r += __kmp_unregister_root_other_thread(i);
3651 }
3652 }
3653 return r;
3654}
3655#endif
3656
3657/* This function attempts to create free entries in __kmp_threads and
3658 __kmp_root, and returns the number of free entries generated.
3659
3660 For Windows* OS static library, the first mechanism used is to reclaim array
3661 entries for root threads that are already dead.
3662
3663 On all platforms, expansion is attempted on the arrays __kmp_threads_ and
3664 __kmp_root, with appropriate update to __kmp_threads_capacity. Array
3665 capacity is increased by doubling with clipping to __kmp_tp_capacity, if
3666 threadprivate cache array has been created. Synchronization with
3667 __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock.
3668
3669 After any dead root reclamation, if the clipping value allows array expansion
3670 to result in the generation of a total of nNeed free slots, the function does
3671 that expansion. If not, nothing is done beyond the possible initial root
3672 thread reclamation.
3673
3674 If any argument is negative, the behavior is undefined. */
3675static int __kmp_expand_threads(int nNeed) {
3676 int added = 0;
3677 int minimumRequiredCapacity;
3678 int newCapacity;
3679 kmp_info_t **newThreads;
3680 kmp_root_t **newRoot;
3681
3682 // All calls to __kmp_expand_threads should be under __kmp_forkjoin_lock, so
3683 // resizing __kmp_threads does not need additional protection if foreign
3684 // threads are present
3685
3686#if KMP_OS_WINDOWS0 && !KMP_DYNAMIC_LIB1
3687 /* only for Windows static library */
3688 /* reclaim array entries for root threads that are already dead */
3689 added = __kmp_reclaim_dead_roots();
3690
3691 if (nNeed) {
3692 nNeed -= added;
3693 if (nNeed < 0)
3694 nNeed = 0;
3695 }
3696#endif
3697 if (nNeed <= 0)
3698 return added;
3699
3700 // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. If
3701 // __kmp_max_nth is set to some value less than __kmp_sys_max_nth by the
3702 // user via KMP_DEVICE_THREAD_LIMIT, then __kmp_threads_capacity may become
3703 // > __kmp_max_nth in one of two ways:
3704 //
3705 // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0]
3706 // may not be reused by another thread, so we may need to increase
3707 // __kmp_threads_capacity to __kmp_max_nth + 1.
3708 //
3709 // 2) New foreign root(s) are encountered. We always register new foreign
3710 // roots. This may cause a smaller # of threads to be allocated at
3711 // subsequent parallel regions, but the worker threads hang around (and
3712 // eventually go to sleep) and need slots in the __kmp_threads[] array.
3713 //
3714 // Anyway, that is the reason for moving the check to see if
3715 // __kmp_max_nth was exceeded into __kmp_reserve_threads()
3716 // instead of having it performed here. -BB
3717
3718 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity)if (!(__kmp_sys_max_nth >= __kmp_threads_capacity)) { __kmp_debug_assert
("__kmp_sys_max_nth >= __kmp_threads_capacity", "openmp/runtime/src/kmp_runtime.cpp"
, 3718); }
;
3719
3720 /* compute expansion headroom to check if we can expand */
3721 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3722 /* possible expansion too small -- give up */
3723 return added;
3724 }
3725 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3726
3727 newCapacity = __kmp_threads_capacity;
3728 do {
3729 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3730 : __kmp_sys_max_nth;
3731 } while (newCapacity < minimumRequiredCapacity);
3732 newThreads = (kmp_info_t **)__kmp_allocate(___kmp_allocate(((sizeof(kmp_info_t *) + sizeof(kmp_root_t *)
) * newCapacity + 64), "openmp/runtime/src/kmp_runtime.cpp", 3733
)
3733 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE)___kmp_allocate(((sizeof(kmp_info_t *) + sizeof(kmp_root_t *)
) * newCapacity + 64), "openmp/runtime/src/kmp_runtime.cpp", 3733
)
;
3734 newRoot =
3735 (kmp_root_t **)((char *)newThreads + sizeof(kmp_info_t *) * newCapacity);
3736 KMP_MEMCPYmemcpy(newThreads, __kmp_threads,
3737 __kmp_threads_capacity * sizeof(kmp_info_t *));
3738 KMP_MEMCPYmemcpy(newRoot, __kmp_root,
3739 __kmp_threads_capacity * sizeof(kmp_root_t *));
3740 // Put old __kmp_threads array on a list. Any ongoing references to the old
3741 // list will be valid. This list is cleaned up at library shutdown.
3742 kmp_old_threads_list_t *node =
3743 (kmp_old_threads_list_t *)__kmp_allocate(sizeof(kmp_old_threads_list_t))___kmp_allocate((sizeof(kmp_old_threads_list_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 3743)
;
3744 node->threads = __kmp_threads;
3745 node->next = __kmp_old_threads_list;
3746 __kmp_old_threads_list = node;
3747
3748 *(kmp_info_t * *volatile *)&__kmp_threads = newThreads;
3749 *(kmp_root_t * *volatile *)&__kmp_root = newRoot;
3750 added += newCapacity - __kmp_threads_capacity;
3751 *(volatile int *)&__kmp_threads_capacity = newCapacity;
3752
3753 if (newCapacity > __kmp_tp_capacity) {
3754 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3755 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3756 __kmp_threadprivate_resize_cache(newCapacity);
3757 } else { // increase __kmp_tp_capacity to correspond with kmp_threads size
3758 *(volatile int *)&__kmp_tp_capacity = newCapacity;
3759 }
3760 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3761 }
3762
3763 return added;
3764}
3765
3766/* Register the current thread as a root thread and obtain our gtid. We must
3767 have the __kmp_initz_lock held at this point. Argument TRUE only if are the
3768 thread that calls from __kmp_do_serial_initialize() */
3769int __kmp_register_root(int initial_thread) {
3770 kmp_info_t *root_thread;
3771 kmp_root_t *root;
3772 int gtid;
3773 int capacity;
3774 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3775 KA_TRACE(20, ("__kmp_register_root: entered\n"))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_register_root: entered\n"
); }
;
3776 KMP_MB();
3777
3778 /* 2007-03-02:
3779 If initial thread did not invoke OpenMP RTL yet, and this thread is not an
3780 initial one, "__kmp_all_nth >= __kmp_threads_capacity" condition does not
3781 work as expected -- it may return false (that means there is at least one
3782 empty slot in __kmp_threads array), but it is possible the only free slot
3783 is #0, which is reserved for initial thread and so cannot be used for this
3784 one. Following code workarounds this bug.
3785
3786 However, right solution seems to be not reserving slot #0 for initial
3787 thread because:
3788 (1) there is no magic in slot #0,
3789 (2) we cannot detect initial thread reliably (the first thread which does
3790 serial initialization may be not a real initial thread).
3791 */
3792 capacity = __kmp_threads_capacity;
3793 if (!initial_thread && TCR_PTR(__kmp_threads[0])((void *)(__kmp_threads[0])) == NULL__null) {
3794 --capacity;
3795 }
3796
3797 // If it is not for initializing the hidden helper team, we need to take
3798 // __kmp_hidden_helper_threads_num out of the capacity because it is included
3799 // in __kmp_threads_capacity.
3800 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)(__kmp_init_hidden_helper_threads)) {
3801 capacity -= __kmp_hidden_helper_threads_num;
3802 }
3803
3804 /* see if there are too many threads */
3805 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3806 if (__kmp_tp_cached) {
3807 __kmp_fatal(KMP_MSG(CantRegisterNewThread)__kmp_msg_format(kmp_i18n_msg_CantRegisterNewThread),
3808 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity)__kmp_msg_format(kmp_i18n_hnt_Set_ALL_THREADPRIVATE, __kmp_tp_capacity
)
,
3809 KMP_HNT(PossibleSystemLimitOnThreads)__kmp_msg_format(kmp_i18n_hnt_PossibleSystemLimitOnThreads), __kmp_msg_null);
3810 } else {
3811 __kmp_fatal(KMP_MSG(CantRegisterNewThread)__kmp_msg_format(kmp_i18n_msg_CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads)__kmp_msg_format(kmp_i18n_hnt_SystemLimitOnThreads),
3812 __kmp_msg_null);
3813 }
3814 }
3815
3816 // When hidden helper task is enabled, __kmp_threads is organized as follows:
3817 // 0: initial thread, also a regular OpenMP thread.
3818 // [1, __kmp_hidden_helper_threads_num]: slots for hidden helper threads.
3819 // [__kmp_hidden_helper_threads_num + 1, __kmp_threads_capacity): slots for
3820 // regular OpenMP threads.
3821 if (TCR_4(__kmp_init_hidden_helper_threads)(__kmp_init_hidden_helper_threads)) {
3822 // Find an available thread slot for hidden helper thread. Slots for hidden
3823 // helper threads start from 1 to __kmp_hidden_helper_threads_num.
3824 for (gtid = 1; TCR_PTR(__kmp_threads[gtid])((void *)(__kmp_threads[gtid])) != NULL__null &&
3825 gtid <= __kmp_hidden_helper_threads_num;
3826 gtid++)
3827 ;
3828 KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num)if (!(gtid <= __kmp_hidden_helper_threads_num)) { __kmp_debug_assert
("gtid <= __kmp_hidden_helper_threads_num", "openmp/runtime/src/kmp_runtime.cpp"
, 3828); }
;
3829 KA_TRACE(1, ("__kmp_register_root: found slot in threads array for "if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_register_root: found slot in threads array for "
"hidden helper thread: T#%d\n", gtid); }
3830 "hidden helper thread: T#%d\n",if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_register_root: found slot in threads array for "
"hidden helper thread: T#%d\n", gtid); }
3831 gtid))if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_register_root: found slot in threads array for "
"hidden helper thread: T#%d\n", gtid); }
;
3832 } else {
3833 /* find an available thread slot */
3834 // Don't reassign the zero slot since we need that to only be used by
3835 // initial thread. Slots for hidden helper threads should also be skipped.
3836 if (initial_thread && TCR_PTR(__kmp_threads[0])((void *)(__kmp_threads[0])) == NULL__null) {
3837 gtid = 0;
3838 } else {
3839 for (gtid = __kmp_hidden_helper_threads_num + 1;
3840 TCR_PTR(__kmp_threads[gtid])((void *)(__kmp_threads[gtid])) != NULL__null; gtid++)
3841 ;
3842 }
3843 KA_TRACE(if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_register_root: found slot in threads array: T#%d\n"
, gtid); }
3844 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid))if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_register_root: found slot in threads array: T#%d\n"
, gtid); }
;
3845 KMP_ASSERT(gtid < __kmp_threads_capacity)if (!(gtid < __kmp_threads_capacity)) { __kmp_debug_assert
("gtid < __kmp_threads_capacity", "openmp/runtime/src/kmp_runtime.cpp"
, 3845); }
;
3846 }
3847
3848 /* update global accounting */
3849 __kmp_all_nth++;
3850 TCW_4(__kmp_nth, __kmp_nth + 1)(__kmp_nth) = (__kmp_nth + 1);
3851
3852 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
3853 // numbers of procs, and method #2 (keyed API call) for higher numbers.
3854 if (__kmp_adjust_gtid_mode) {
3855 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3856 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) != 2) {
3857 TCW_4(__kmp_gtid_mode, 2)(__kmp_gtid_mode) = (2);
3858 }
3859 } else {
3860 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) != 1) {
3861 TCW_4(__kmp_gtid_mode, 1)(__kmp_gtid_mode) = (1);
3862 }
3863 }
3864 }
3865
3866#ifdef KMP_ADJUST_BLOCKTIME1
3867 /* Adjust blocktime to zero if necessary */
3868 /* Middle initialization might not have occurred yet */
3869 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3870 if (__kmp_nth > __kmp_avail_proc) {
3871 __kmp_zero_bt = TRUE(!0);
3872 }
3873 }
3874#endif /* KMP_ADJUST_BLOCKTIME */
3875
3876 /* setup this new hierarchy */
3877 if (!(root = __kmp_root[gtid])) {
3878 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(sizeof(kmp_root_t))___kmp_allocate((sizeof(kmp_root_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 3878)
;
3879 KMP_DEBUG_ASSERT(!root->r.r_root_team)if (!(!root->r.r_root_team)) { __kmp_debug_assert("!root->r.r_root_team"
, "openmp/runtime/src/kmp_runtime.cpp", 3879); }
;
3880 }
3881
3882#if KMP_STATS_ENABLED0
3883 // Initialize stats as soon as possible (right after gtid assignment).
3884 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3885 __kmp_stats_thread_ptr->startLife();
3886 KMP_SET_THREAD_STATE(SERIAL_REGION)((void)0);
3887 KMP_INIT_PARTITIONED_TIMERS(OMP_serial)((void)0);
3888#endif
3889 __kmp_initialize_root(root);
3890
3891 /* setup new root thread structure */
3892 if (root->r.r_uber_thread) {
3893 root_thread = root->r.r_uber_thread;
3894 } else {
3895 root_thread = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t))___kmp_allocate((sizeof(kmp_info_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 3895)
;
3896 if (__kmp_storage_map) {
3897 __kmp_print_thread_storage_map(root_thread, gtid);
3898 }
3899 root_thread->th.th_info.ds.ds_gtid = gtid;
3900#if OMPT_SUPPORT1
3901 root_thread->th.ompt_thread_info.thread_data = ompt_data_none{0};
3902#endif
3903 root_thread->th.th_root = root;
3904 if (__kmp_env_consistency_check) {
3905 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3906 }
3907#if USE_FAST_MEMORY3
3908 __kmp_initialize_fast_memory(root_thread);
3909#endif /* USE_FAST_MEMORY */
3910
3911#if KMP_USE_BGET1
3912 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL)if (!(root_thread->th.th_local.bget_data == __null)) { __kmp_debug_assert
("root_thread->th.th_local.bget_data == __null", "openmp/runtime/src/kmp_runtime.cpp"
, 3912); }
;
3913 __kmp_initialize_bget(root_thread);
3914#endif
3915 __kmp_init_random(root_thread); // Initialize random number generator
3916 }
3917
3918 /* setup the serial team held in reserve by the root thread */
3919 if (!root_thread->th.th_serial_team) {
3920 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3921 KF_TRACE(10, ("__kmp_register_root: before serial_team\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_register_root: before serial_team\n"
); }
;
3922 root_thread->th.th_serial_team = __kmp_allocate_team(
3923 root, 1, 1,
3924#if OMPT_SUPPORT1
3925 ompt_data_none{0}, // root parallel id
3926#endif
3927 proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL), __null);
3928 }
3929 KMP_ASSERT(root_thread->th.th_serial_team)if (!(root_thread->th.th_serial_team)) { __kmp_debug_assert
("root_thread->th.th_serial_team", "openmp/runtime/src/kmp_runtime.cpp"
, 3929); }
;
3930 KF_TRACE(10, ("__kmp_register_root: after serial_team = %p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_register_root: after serial_team = %p\n"
, root_thread->th.th_serial_team); }
3931 root_thread->th.th_serial_team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_register_root: after serial_team = %p\n"
, root_thread->th.th_serial_team); }
;
3932
3933 /* drop root_thread into place */
3934 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread)((__kmp_threads[gtid])) = ((root_thread));
3935
3936 root->r.r_root_team->t.t_threads[0] = root_thread;
3937 root->r.r_hot_team->t.t_threads[0] = root_thread;
3938 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3939 // AC: the team created in reserve, not for execution (it is unused for now).
3940 root_thread->th.th_serial_team->t.t_serialized = 0;
3941 root->r.r_uber_thread = root_thread;
3942
3943 /* initialize the thread, get it ready to go */
3944 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3945 TCW_4(__kmp_init_gtid, TRUE)(__kmp_init_gtid) = ((!0));
3946
3947 /* prepare the primary thread for get_gtid() */
3948 __kmp_gtid_set_specific(gtid);
3949
3950#if USE_ITT_BUILD1
3951 __kmp_itt_thread_name(gtid);
3952#endif /* USE_ITT_BUILD */
3953
3954#ifdef KMP_TDATA_GTID1
3955 __kmp_gtid = gtid;
3956#endif
3957 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3958 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid)if (!(__kmp_gtid_get_specific() == gtid)) { __kmp_debug_assert
("__kmp_gtid_get_specific() == gtid", "openmp/runtime/src/kmp_runtime.cpp"
, 3958); }
;
3959
3960 KA_TRACE(20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
"plain=%u\n", gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team
), root->r.r_hot_team->t.t_id, 0, 0, 0); }
3961 "plain=%u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
"plain=%u\n", gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team
), root->r.r_hot_team->t.t_id, 0, 0, 0); }
3962 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
"plain=%u\n", gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team
), root->r.r_hot_team->t.t_id, 0, 0, 0); }
3963 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
"plain=%u\n", gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team
), root->r.r_hot_team->t.t_id, 0, 0, 0); }
3964 KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
"plain=%u\n", gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team
), root->r.r_hot_team->t.t_id, 0, 0, 0); }
;
3965 { // Initialize barrier data.
3966 int b;
3967 for (b = 0; b < bs_last_barrier; ++b) {
3968 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE0;
3969#if USE_DEBUGGER0
3970 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3971#endif
3972 }
3973 }
3974 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==if (!(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].
b_arrived == 0)) { __kmp_debug_assert("root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived == 0"
, "openmp/runtime/src/kmp_runtime.cpp", 3975); }
3975 KMP_INIT_BARRIER_STATE)if (!(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].
b_arrived == 0)) { __kmp_debug_assert("root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived == 0"
, "openmp/runtime/src/kmp_runtime.cpp", 3975); }
;
3976
3977#if KMP_AFFINITY_SUPPORTED1
3978 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED(-2);
3979 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED(-2);
3980 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED(-2);
3981 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED(-2);
3982#endif /* KMP_AFFINITY_SUPPORTED */
3983 root_thread->th.th_def_allocator = __kmp_def_allocator;
3984 root_thread->th.th_prev_level = 0;
3985 root_thread->th.th_prev_num_threads = 1;
3986
3987 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(sizeof(kmp_cg_root_t))___kmp_allocate((sizeof(kmp_cg_root_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 3987)
;
3988 tmp->cg_root = root_thread;
3989 tmp->cg_thread_limit = __kmp_cg_max_nth;
3990 tmp->cg_nthreads = 1;
3991 KA_TRACE(100, ("__kmp_register_root: Thread %p created node %p with"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_register_root: Thread %p created node %p with"
" cg_nthreads init to 1\n", root_thread, tmp); }
3992 " cg_nthreads init to 1\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_register_root: Thread %p created node %p with"
" cg_nthreads init to 1\n", root_thread, tmp); }
3993 root_thread, tmp))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_register_root: Thread %p created node %p with"
" cg_nthreads init to 1\n", root_thread, tmp); }
;
3994 tmp->up = NULL__null;
3995 root_thread->th.th_cg_roots = tmp;
3996
3997 __kmp_root_counter++;
3998
3999#if OMPT_SUPPORT1
4000 if (!initial_thread && ompt_enabled.enabled) {
4001
4002 kmp_info_t *root_thread = ompt_get_thread();
4003
4004 ompt_set_thread_state(root_thread, ompt_state_overhead);
4005
4006 if (ompt_enabled.ompt_callback_thread_begin) {
4007 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)ompt_callback_thread_begin_callback(
4008 ompt_thread_initial, __ompt_get_thread_data_internal());
4009 }
4010 ompt_data_t *task_data;
4011 ompt_data_t *parallel_data;
4012 __ompt_get_task_info_internal(0, NULL__null, &task_data, NULL__null, &parallel_data,
4013 NULL__null);
4014 if (ompt_enabled.ompt_callback_implicit_task) {
4015 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
4016 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
4017 }
4018
4019 ompt_set_thread_state(root_thread, ompt_state_work_serial);
4020 }
4021#endif
4022#if OMPD_SUPPORT1
4023 if (ompd_state & OMPD_ENABLE_BP0x1)
4024 ompd_bp_thread_begin();
4025#endif
4026
4027 KMP_MB();
4028 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4029
4030 return gtid;
4031}
4032
4033#if KMP_NESTED_HOT_TEAMS1
4034static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr, int level,
4035 const int max_level) {
4036 int i, n, nth;
4037 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
4038 if (!hot_teams || !hot_teams[level].hot_team) {
4039 return 0;
4040 }
4041 KMP_DEBUG_ASSERT(level < max_level)if (!(level < max_level)) { __kmp_debug_assert("level < max_level"
, "openmp/runtime/src/kmp_runtime.cpp", 4041); }
;
4042 kmp_team_t *team = hot_teams[level].hot_team;
4043 nth = hot_teams[level].hot_team_nth;
4044 n = nth - 1; // primary thread is not freed
4045 if (level < max_level - 1) {
4046 for (i = 0; i < nth; ++i) {
4047 kmp_info_t *th = team->t.t_threads[i];
4048 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
4049 if (i > 0 && th->th.th_hot_teams) {
4050 __kmp_free(th->th.th_hot_teams)___kmp_free((th->th.th_hot_teams), "openmp/runtime/src/kmp_runtime.cpp"
, 4050)
;
4051 th->th.th_hot_teams = NULL__null;
4052 }
4053 }
4054 }
4055 __kmp_free_team(root, team, NULL__null);
4056 return n;
4057}
4058#endif
4059
4060// Resets a root thread and clear its root and hot teams.
4061// Returns the number of __kmp_threads entries directly and indirectly freed.
4062static int __kmp_reset_root(int gtid, kmp_root_t *root) {
4063 kmp_team_t *root_team = root->r.r_root_team;
4064 kmp_team_t *hot_team = root->r.r_hot_team;
4065 int n = hot_team->t.t_nproc;
4066 int i;
4067
4068 KMP_DEBUG_ASSERT(!root->r.r_active)if (!(!root->r.r_active)) { __kmp_debug_assert("!root->r.r_active"
, "openmp/runtime/src/kmp_runtime.cpp", 4068); }
;
4069
4070 root->r.r_root_team = NULL__null;
4071 root->r.r_hot_team = NULL__null;
4072 // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team
4073 // before call to __kmp_free_team().
4074 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL), __null);
4075#if KMP_NESTED_HOT_TEAMS1
4076 if (__kmp_hot_teams_max_level >
4077 0) { // need to free nested hot teams and their threads if any
4078 for (i = 0; i < hot_team->t.t_nproc; ++i) {
4079 kmp_info_t *th = hot_team->t.t_threads[i];
4080 if (__kmp_hot_teams_max_level > 1) {
4081 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
4082 }
4083 if (th->th.th_hot_teams) {
4084 __kmp_free(th->th.th_hot_teams)___kmp_free((th->th.th_hot_teams), "openmp/runtime/src/kmp_runtime.cpp"
, 4084)
;
4085 th->th.th_hot_teams = NULL__null;
4086 }
4087 }
4088 }
4089#endif
4090 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL), __null);
4091
4092 // Before we can reap the thread, we need to make certain that all other
4093 // threads in the teams that had this root as ancestor have stopped trying to
4094 // steal tasks.
4095 if (__kmp_tasking_mode != tskm_immediate_exec) {
4096 __kmp_wait_to_unref_task_teams();
4097 }
4098
4099#if KMP_OS_WINDOWS0
4100 /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */
4101 KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_reset_root: free handle, th = %p, handle = %"
"lu" "\n", (LPVOID) & (root->r.r_uber_thread->th),
root->r.r_uber_thread->th.th_info.ds.ds_thread); }
4102 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPECif (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_reset_root: free handle, th = %p, handle = %"
"lu" "\n", (LPVOID) & (root->r.r_uber_thread->th),
root->r.r_uber_thread->th.th_info.ds.ds_thread); }
4103 "\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_reset_root: free handle, th = %p, handle = %"
"lu" "\n", (LPVOID) & (root->r.r_uber_thread->th),
root->r.r_uber_thread->th.th_info.ds.ds_thread); }
4104 (LPVOID) & (root->r.r_uber_thread->th),if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_reset_root: free handle, th = %p, handle = %"
"lu" "\n", (LPVOID) & (root->r.r_uber_thread->th),
root->r.r_uber_thread->th.th_info.ds.ds_thread); }
4105 root->r.r_uber_thread->th.th_info.ds.ds_thread))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_reset_root: free handle, th = %p, handle = %"
"lu" "\n", (LPVOID) & (root->r.r_uber_thread->th),
root->r.r_uber_thread->th.th_info.ds.ds_thread); }
;
4106 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
4107#endif /* KMP_OS_WINDOWS */
4108
4109#if OMPD_SUPPORT1
4110 if (ompd_state & OMPD_ENABLE_BP0x1)
4111 ompd_bp_thread_end();
4112#endif
4113
4114#if OMPT_SUPPORT1
4115 ompt_data_t *task_data;
4116 ompt_data_t *parallel_data;
4117 __ompt_get_task_info_internal(0, NULL__null, &task_data, NULL__null, &parallel_data,
4118 NULL__null);
4119 if (ompt_enabled.ompt_callback_implicit_task) {
4120 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
4121 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
4122 }
4123 if (ompt_enabled.ompt_callback_thread_end) {
4124 ompt_callbacks.ompt_callback(ompt_callback_thread_end)ompt_callback_thread_end_callback(
4125 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4126 }
4127#endif
4128
4129 TCW_4(__kmp_nth,(__kmp_nth) = (__kmp_nth - 1)
4130 __kmp_nth - 1)(__kmp_nth) = (__kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
4131 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4132 KA_TRACE(100, ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
" to %d\n", root->r.r_uber_thread, root->r.r_uber_thread
->th.th_cg_roots, root->r.r_uber_thread->th.th_cg_roots
->cg_nthreads); }
4133 " to %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
" to %d\n", root->r.r_uber_thread, root->r.r_uber_thread
->th.th_cg_roots, root->r.r_uber_thread->th.th_cg_roots
->cg_nthreads); }
4134 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
" to %d\n", root->r.r_uber_thread, root->r.r_uber_thread
->th.th_cg_roots, root->r.r_uber_thread->th.th_cg_roots
->cg_nthreads); }
4135 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
" to %d\n", root->r.r_uber_thread, root->r.r_uber_thread
->th.th_cg_roots, root->r.r_uber_thread->th.th_cg_roots
->cg_nthreads); }
;
4136 if (i == 1) {
4137 // need to free contention group structure
4138 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==if (!(root->r.r_uber_thread == root->r.r_uber_thread->
th.th_cg_roots->cg_root)) { __kmp_debug_assert("root->r.r_uber_thread == root->r.r_uber_thread->th.th_cg_roots->cg_root"
, "openmp/runtime/src/kmp_runtime.cpp", 4139); }
4139 root->r.r_uber_thread->th.th_cg_roots->cg_root)if (!(root->r.r_uber_thread == root->r.r_uber_thread->
th.th_cg_roots->cg_root)) { __kmp_debug_assert("root->r.r_uber_thread == root->r.r_uber_thread->th.th_cg_roots->cg_root"
, "openmp/runtime/src/kmp_runtime.cpp", 4139); }
;
4140 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL)if (!(root->r.r_uber_thread->th.th_cg_roots->up == __null
)) { __kmp_debug_assert("root->r.r_uber_thread->th.th_cg_roots->up == __null"
, "openmp/runtime/src/kmp_runtime.cpp", 4140); }
;
4141 __kmp_free(root->r.r_uber_thread->th.th_cg_roots)___kmp_free((root->r.r_uber_thread->th.th_cg_roots), "openmp/runtime/src/kmp_runtime.cpp"
, 4141)
;
4142 root->r.r_uber_thread->th.th_cg_roots = NULL__null;
4143 }
4144 __kmp_reap_thread(root->r.r_uber_thread, 1);
4145
4146 // We canot put root thread to __kmp_thread_pool, so we have to reap it
4147 // instead of freeing.
4148 root->r.r_uber_thread = NULL__null;
4149 /* mark root as no longer in use */
4150 root->r.r_begin = FALSE0;
4151
4152 return n;
4153}
4154
4155void __kmp_unregister_root_current_thread(int gtid) {
4156 KA_TRACE(1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid))if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_unregister_root_current_thread: enter T#%d\n"
, gtid); }
;
4157 /* this lock should be ok, since unregister_root_current_thread is never
4158 called during an abort, only during a normal close. furthermore, if you
4159 have the forkjoin lock, you should never try to get the initz lock */
4160 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
4161 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done) || !__kmp_init_serial) {
4162 KC_TRACE(10, ("__kmp_unregister_root_current_thread: already finished, "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_current_thread: already finished, "
"exiting T#%d\n", gtid); }
4163 "exiting T#%d\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_current_thread: already finished, "
"exiting T#%d\n", gtid); }
4164 gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_current_thread: already finished, "
"exiting T#%d\n", gtid); }
;
4165 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4166 return;
4167 }
4168 kmp_root_t *root = __kmp_root[gtid];
4169
4170 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid])if (!(__kmp_threads && __kmp_threads[gtid])) { __kmp_debug_assert
("__kmp_threads && __kmp_threads[gtid]", "openmp/runtime/src/kmp_runtime.cpp"
, 4170); }
;
4171 KMP_ASSERT(KMP_UBER_GTID(gtid))if (!(KMP_UBER_GTID(gtid))) { __kmp_debug_assert("KMP_UBER_GTID(gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 4171); }
;
4172 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root)if (!(root == __kmp_threads[gtid]->th.th_root)) { __kmp_debug_assert
("root == __kmp_threads[gtid]->th.th_root", "openmp/runtime/src/kmp_runtime.cpp"
, 4172); }
;
4173 KMP_ASSERT(root->r.r_active == FALSE)if (!(root->r.r_active == 0)) { __kmp_debug_assert("root->r.r_active == FALSE"
, "openmp/runtime/src/kmp_runtime.cpp", 4173); }
;
4174
4175 KMP_MB();
4176
4177 kmp_info_t *thread = __kmp_threads[gtid];
4178 kmp_team_t *team = thread->th.th_team;
4179 kmp_task_team_t *task_team = thread->th.th_task_team;
4180
4181 // we need to wait for the proxy tasks before finishing the thread
4182 if (task_team != NULL__null && (task_team->tt.tt_found_proxy_tasks ||
4183 task_team->tt.tt_hidden_helper_task_encountered)) {
4184#if OMPT_SUPPORT1
4185 // the runtime is shutting down so we won't report any events
4186 thread->th.ompt_thread_info.state = ompt_state_undefined;
4187#endif
4188 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL), __null);
4189 }
4190
4191 __kmp_reset_root(gtid, root);
4192
4193 KMP_MB();
4194 KC_TRACE(10,if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_current_thread: T#%d unregistered\n"
, gtid); }
4195 ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_current_thread: T#%d unregistered\n"
, gtid); }
;
4196
4197 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4198}
4199
4200#if KMP_OS_WINDOWS0
4201/* __kmp_forkjoin_lock must be already held
4202 Unregisters a root thread that is not the current thread. Returns the number
4203 of __kmp_threads entries freed as a result. */
4204static int __kmp_unregister_root_other_thread(int gtid) {
4205 kmp_root_t *root = __kmp_root[gtid];
4206 int r;
4207
4208 KA_TRACE(1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid))if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_unregister_root_other_thread: enter T#%d\n"
, gtid); }
;
4209 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid])if (!(__kmp_threads && __kmp_threads[gtid])) { __kmp_debug_assert
("__kmp_threads && __kmp_threads[gtid]", "openmp/runtime/src/kmp_runtime.cpp"
, 4209); }
;
4210 KMP_ASSERT(KMP_UBER_GTID(gtid))if (!(KMP_UBER_GTID(gtid))) { __kmp_debug_assert("KMP_UBER_GTID(gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 4210); }
;
4211 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root)if (!(root == __kmp_threads[gtid]->th.th_root)) { __kmp_debug_assert
("root == __kmp_threads[gtid]->th.th_root", "openmp/runtime/src/kmp_runtime.cpp"
, 4211); }
;
4212 KMP_ASSERT(root->r.r_active == FALSE)if (!(root->r.r_active == 0)) { __kmp_debug_assert("root->r.r_active == FALSE"
, "openmp/runtime/src/kmp_runtime.cpp", 4212); }
;
4213
4214 r = __kmp_reset_root(gtid, root);
4215 KC_TRACE(10,if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_other_thread: T#%d unregistered\n"
, gtid); }
4216 ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_other_thread: T#%d unregistered\n"
, gtid); }
;
4217 return r;
4218}
4219#endif
4220
4221#if KMP_DEBUG1
4222void __kmp_task_info() {
4223
4224 kmp_int32 gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
4225 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4226 kmp_info_t *this_thr = __kmp_threads[gtid];
4227 kmp_team_t *steam = this_thr->th.th_serial_team;
4228 kmp_team_t *team = this_thr->th.th_team;
4229
4230 __kmp_printf(
4231 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4232 "ptask=%p\n",
4233 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4234 team->t.t_implicit_task_taskdata[tid].td_parent);
4235}
4236#endif // KMP_DEBUG
4237
4238/* TODO optimize with one big memclr, take out what isn't needed, split
4239 responsibility to workers as much as possible, and delay initialization of
4240 features as much as possible */
4241static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4242 int tid, int gtid) {
4243 /* this_thr->th.th_info.ds.ds_gtid is setup in
4244 kmp_allocate_thread/create_worker.
4245 this_thr->th.th_serial_team is setup in __kmp_allocate_thread */
4246 KMP_DEBUG_ASSERT(this_thr != NULL)if (!(this_thr != __null)) { __kmp_debug_assert("this_thr != __null"
, "openmp/runtime/src/kmp_runtime.cpp", 4246); }
;
4247 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team)if (!(this_thr->th.th_serial_team)) { __kmp_debug_assert("this_thr->th.th_serial_team"
, "openmp/runtime/src/kmp_runtime.cpp", 4247); }
;
4248 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 4248); }
;
4249 KMP_DEBUG_ASSERT(team->t.t_threads)if (!(team->t.t_threads)) { __kmp_debug_assert("team->t.t_threads"
, "openmp/runtime/src/kmp_runtime.cpp", 4249); }
;
4250 KMP_DEBUG_ASSERT(team->t.t_dispatch)if (!(team->t.t_dispatch)) { __kmp_debug_assert("team->t.t_dispatch"
, "openmp/runtime/src/kmp_runtime.cpp", 4250); }
;
4251 kmp_info_t *master = team->t.t_threads[0];
4252 KMP_DEBUG_ASSERT(master)if (!(master)) { __kmp_debug_assert("master", "openmp/runtime/src/kmp_runtime.cpp"
, 4252); }
;
4253 KMP_DEBUG_ASSERT(master->th.th_root)if (!(master->th.th_root)) { __kmp_debug_assert("master->th.th_root"
, "openmp/runtime/src/kmp_runtime.cpp", 4253); }
;
4254
4255 KMP_MB();
4256
4257 TCW_SYNC_PTR(this_thr->th.th_team, team)((this_thr->th.th_team)) = ((team));
4258
4259 this_thr->th.th_info.ds.ds_tid = tid;
4260 this_thr->th.th_set_nproc = 0;
4261 if (__kmp_tasking_mode != tskm_immediate_exec)
4262 // When tasking is possible, threads are not safe to reap until they are
4263 // done tasking; this will be set when tasking code is exited in wait
4264 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP0;
4265 else // no tasking --> always safe to reap
4266 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP1;
4267 this_thr->th.th_set_proc_bind = proc_bind_default;
4268#if KMP_AFFINITY_SUPPORTED1
4269 this_thr->th.th_new_place = this_thr->th.th_current_place;
4270#endif
4271 this_thr->th.th_root = master->th.th_root;
4272
4273 /* setup the thread's cache of the team structure */
4274 this_thr->th.th_team_nproc = team->t.t_nproc;
4275 this_thr->th.th_team_master = master;
4276 this_thr->th.th_team_serialized = team->t.t_serialized;
4277
4278 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata)if (!(team->t.t_implicit_task_taskdata)) { __kmp_debug_assert
("team->t.t_implicit_task_taskdata", "openmp/runtime/src/kmp_runtime.cpp"
, 4278); }
;
4279
4280 KF_TRACE(10, ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n"
, tid, gtid, this_thr, this_thr->th.th_current_task); }
4281 tid, gtid, this_thr, this_thr->th.th_current_task))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n"
, tid, gtid, this_thr, this_thr->th.th_current_task); }
;
4282
4283 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4284 team, tid, TRUE(!0));
4285
4286 KF_TRACE(10, ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n"
, tid, gtid, this_thr, this_thr->th.th_current_task); }
4287 tid, gtid, this_thr, this_thr->th.th_current_task))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n"
, tid, gtid, this_thr, this_thr->th.th_current_task); }
;
4288 // TODO: Initialize ICVs from parent; GEH - isn't that already done in
4289 // __kmp_initialize_team()?
4290
4291 /* TODO no worksharing in speculative threads */
4292 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4293
4294 this_thr->th.th_local.this_construct = 0;
4295
4296 if (!this_thr->th.th_pri_common) {
4297 this_thr->th.th_pri_common =
4298 (struct common_table *)__kmp_allocate(sizeof(struct common_table))___kmp_allocate((sizeof(struct common_table)), "openmp/runtime/src/kmp_runtime.cpp"
, 4298)
;
4299 if (__kmp_storage_map) {
4300 __kmp_print_storage_map_gtid(
4301 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4302 sizeof(struct common_table), "th_%d.th_pri_common\n", gtid);
4303 }
4304 this_thr->th.th_pri_head = NULL__null;
4305 }
4306
4307 if (this_thr != master && // Primary thread's CG root is initialized elsewhere
4308 this_thr->th.th_cg_roots != master->th.th_cg_roots) { // CG root not set
4309 // Make new thread's CG root same as primary thread's
4310 KMP_DEBUG_ASSERT(master->th.th_cg_roots)if (!(master->th.th_cg_roots)) { __kmp_debug_assert("master->th.th_cg_roots"
, "openmp/runtime/src/kmp_runtime.cpp", 4310); }
;
4311 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4312 if (tmp) {
4313 // worker changes CG, need to check if old CG should be freed
4314 int i = tmp->cg_nthreads--;
4315 KA_TRACE(100, ("__kmp_initialize_info: Thread %p decrement cg_nthreads"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p decrement cg_nthreads"
" on node %p of thread %p to %d\n", this_thr, tmp, tmp->cg_root
, tmp->cg_nthreads); }
4316 " on node %p of thread %p to %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p decrement cg_nthreads"
" on node %p of thread %p to %d\n", this_thr, tmp, tmp->cg_root
, tmp->cg_nthreads); }
4317 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p decrement cg_nthreads"
" on node %p of thread %p to %d\n", this_thr, tmp, tmp->cg_root
, tmp->cg_nthreads); }
;
4318 if (i == 1) {
4319 __kmp_free(tmp)___kmp_free((tmp), "openmp/runtime/src/kmp_runtime.cpp", 4319
)
; // last thread left CG --> free it
4320 }
4321 }
4322 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4323 // Increment new thread's CG root's counter to add the new thread
4324 this_thr->th.th_cg_roots->cg_nthreads++;
4325 KA_TRACE(100, ("__kmp_initialize_info: Thread %p increment cg_nthreads on"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p increment cg_nthreads on"
" node %p of thread %p to %d\n", this_thr, this_thr->th.th_cg_roots
, this_thr->th.th_cg_roots->cg_root, this_thr->th.th_cg_roots
->cg_nthreads); }
4326 " node %p of thread %p to %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p increment cg_nthreads on"
" node %p of thread %p to %d\n", this_thr, this_thr->th.th_cg_roots
, this_thr->th.th_cg_roots->cg_root, this_thr->th.th_cg_roots
->cg_nthreads); }
4327 this_thr, this_thr->th.th_cg_roots,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p increment cg_nthreads on"
" node %p of thread %p to %d\n", this_thr, this_thr->th.th_cg_roots
, this_thr->th.th_cg_roots->cg_root, this_thr->th.th_cg_roots
->cg_nthreads); }
4328 this_thr->th.th_cg_roots->cg_root,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p increment cg_nthreads on"
" node %p of thread %p to %d\n", this_thr, this_thr->th.th_cg_roots
, this_thr->th.th_cg_roots->cg_root, this_thr->th.th_cg_roots
->cg_nthreads); }
4329 this_thr->th.th_cg_roots->cg_nthreads))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p increment cg_nthreads on"
" node %p of thread %p to %d\n", this_thr, this_thr->th.th_cg_roots
, this_thr->th.th_cg_roots->cg_root, this_thr->th.th_cg_roots
->cg_nthreads); }
;
4330 this_thr->th.th_current_task->td_icvs.thread_limit =
4331 this_thr->th.th_cg_roots->cg_thread_limit;
4332 }
4333
4334 /* Initialize dynamic dispatch */
4335 {
4336 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4337 // Use team max_nproc since this will never change for the team.
4338 size_t disp_size =
4339 sizeof(dispatch_private_info_t) *
4340 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4341 KD_TRACE(10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_info: T#%d max_nproc: %d\n"
, gtid, team->t.t_max_nproc); }
4342 team->t.t_max_nproc))if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_info: T#%d max_nproc: %d\n"
, gtid, team->t.t_max_nproc); }
;
4343 KMP_ASSERT(dispatch)if (!(dispatch)) { __kmp_debug_assert("dispatch", "openmp/runtime/src/kmp_runtime.cpp"
, 4343); }
;
4344 KMP_DEBUG_ASSERT(team->t.t_dispatch)if (!(team->t.t_dispatch)) { __kmp_debug_assert("team->t.t_dispatch"
, "openmp/runtime/src/kmp_runtime.cpp", 4344); }
;
4345 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid])if (!(dispatch == &team->t.t_dispatch[tid])) { __kmp_debug_assert
("dispatch == &team->t.t_dispatch[tid]", "openmp/runtime/src/kmp_runtime.cpp"
, 4345); }
;
4346
4347 dispatch->th_disp_index = 0;
4348 dispatch->th_doacross_buf_idx = 0;
4349 if (!dispatch->th_disp_buffer) {
4350 dispatch->th_disp_buffer =
4351 (dispatch_private_info_t *)__kmp_allocate(disp_size)___kmp_allocate((disp_size), "openmp/runtime/src/kmp_runtime.cpp"
, 4351)
;
4352
4353 if (__kmp_storage_map) {
4354 __kmp_print_storage_map_gtid(
4355 gtid, &dispatch->th_disp_buffer[0],
4356 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4357 ? 1
4358 : __kmp_dispatch_num_buffers],
4359 disp_size,
4360 "th_%d.th_dispatch.th_disp_buffer "
4361 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4362 gtid, team->t.t_id, gtid);
4363 }
4364 } else {
4365 memset(&dispatch->th_disp_buffer[0], '\0', disp_size);
4366 }
4367
4368 dispatch->th_dispatch_pr_current = 0;
4369 dispatch->th_dispatch_sh_current = 0;
4370
4371 dispatch->th_deo_fcn = 0; /* ORDERED */
4372 dispatch->th_dxo_fcn = 0; /* END ORDERED */
4373 }
4374
4375 this_thr->th.th_next_pool = NULL__null;
4376
4377 if (!this_thr->th.th_task_state_memo_stack) {
4378 size_t i;
4379 this_thr->th.th_task_state_memo_stack =
4380 (kmp_uint8 *)__kmp_allocate(4 * sizeof(kmp_uint8))___kmp_allocate((4 * sizeof(kmp_uint8)), "openmp/runtime/src/kmp_runtime.cpp"
, 4380)
;
4381 this_thr->th.th_task_state_top = 0;
4382 this_thr->th.th_task_state_stack_sz = 4;
4383 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4384 ++i) // zero init the stack
4385 this_thr->th.th_task_state_memo_stack[i] = 0;
4386 }
4387
4388 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here)if (!(!this_thr->th.th_spin_here)) { __kmp_debug_assert("!this_thr->th.th_spin_here"
, "openmp/runtime/src/kmp_runtime.cpp", 4388); }
;
4389 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0)if (!(this_thr->th.th_next_waiting == 0)) { __kmp_debug_assert
("this_thr->th.th_next_waiting == 0", "openmp/runtime/src/kmp_runtime.cpp"
, 4389); }
;
4390
4391 KMP_MB();
4392}
4393
4394/* allocate a new thread for the requesting team. this is only called from
4395 within a forkjoin critical section. we will first try to get an available
4396 thread from the thread pool. if none is available, we will fork a new one
4397 assuming we are able to create a new one. this should be assured, as the
4398 caller should check on this first. */
4399kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4400 int new_tid) {
4401 kmp_team_t *serial_team;
4402 kmp_info_t *new_thr;
4403 int new_gtid;
4404
4405 KA_TRACE(20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d\n"
, __kmp_get_global_thread_id()); }
;
4406 KMP_DEBUG_ASSERT(root && team)if (!(root && team)) { __kmp_debug_assert("root && team"
, "openmp/runtime/src/kmp_runtime.cpp", 4406); }
;
4407#if !KMP_NESTED_HOT_TEAMS1
4408 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()))if (!((0 == __kmp_tid_from_gtid((__kmp_get_global_thread_id()
))))) { __kmp_debug_assert("(0 == __kmp_tid_from_gtid((__kmp_get_global_thread_id())))"
, "openmp/runtime/src/kmp_runtime.cpp", 4408); }
;
4409#endif
4410 KMP_MB();
4411
4412 /* first, try to get one from the thread pool */
4413 if (__kmp_thread_pool) {
4414 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool)const_cast<kmp_info_t *>(__kmp_thread_pool);
4415 __kmp_thread_pool = (volatile kmp_info_t *)new_thr->th.th_next_pool;
4416 if (new_thr == __kmp_thread_pool_insert_pt) {
4417 __kmp_thread_pool_insert_pt = NULL__null;
4418 }
4419 TCW_4(new_thr->th.th_in_pool, FALSE)(new_thr->th.th_in_pool) = (0);
4420 __kmp_suspend_initialize_thread(new_thr);
4421 __kmp_lock_suspend_mx(new_thr);
4422 if (new_thr->th.th_active_in_pool == TRUE(!0)) {
4423 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE)if (!(new_thr->th.th_active == (!0))) { __kmp_debug_assert
("new_thr->th.th_active == (!0)", "openmp/runtime/src/kmp_runtime.cpp"
, 4423); }
;
4424 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth)(&__kmp_thread_pool_active_nth)->fetch_sub(1, std::memory_order_acq_rel
)
;
4425 new_thr->th.th_active_in_pool = FALSE0;
4426 }
4427 __kmp_unlock_suspend_mx(new_thr);
4428
4429 KA_TRACE(20, ("__kmp_allocate_thread: T#%d using thread T#%d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d using thread T#%d\n"
, __kmp_get_global_thread_id(), new_thr->th.th_info.ds.ds_gtid
); }
4430 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d using thread T#%d\n"
, __kmp_get_global_thread_id(), new_thr->th.th_info.ds.ds_gtid
); }
;
4431 KMP_ASSERT(!new_thr->th.th_team)if (!(!new_thr->th.th_team)) { __kmp_debug_assert("!new_thr->th.th_team"
, "openmp/runtime/src/kmp_runtime.cpp", 4431); }
;
4432 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity)if (!(__kmp_nth < __kmp_threads_capacity)) { __kmp_debug_assert
("__kmp_nth < __kmp_threads_capacity", "openmp/runtime/src/kmp_runtime.cpp"
, 4432); }
;
4433
4434 /* setup the thread structure */
4435 __kmp_initialize_info(new_thr, team, new_tid,
4436 new_thr->th.th_info.ds.ds_gtid);
4437 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team)if (!(new_thr->th.th_serial_team)) { __kmp_debug_assert("new_thr->th.th_serial_team"
, "openmp/runtime/src/kmp_runtime.cpp", 4437); }
;
4438
4439 TCW_4(__kmp_nth, __kmp_nth + 1)(__kmp_nth) = (__kmp_nth + 1);
4440
4441 new_thr->th.th_task_state = 0;
4442 new_thr->th.th_task_state_top = 0;
4443 new_thr->th.th_task_state_stack_sz = 4;
4444
4445 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
4446 // Make sure pool thread has transitioned to waiting on own thread struct
4447 KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0)if (!(new_thr->th.th_used_in_team.load() == 0)) { __kmp_debug_assert
("new_thr->th.th_used_in_team.load() == 0", "openmp/runtime/src/kmp_runtime.cpp"
, 4447); }
;
4448 // Thread activated in __kmp_allocate_team when increasing team size
4449 }
4450
4451#ifdef KMP_ADJUST_BLOCKTIME1
4452 /* Adjust blocktime back to zero if necessary */
4453 /* Middle initialization might not have occurred yet */
4454 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4455 if (__kmp_nth > __kmp_avail_proc) {
4456 __kmp_zero_bt = TRUE(!0);
4457 }
4458 }
4459#endif /* KMP_ADJUST_BLOCKTIME */
4460
4461#if KMP_DEBUG1
4462 // If thread entered pool via __kmp_free_thread, wait_flag should !=
4463 // KMP_BARRIER_PARENT_FLAG.
4464 int b;
4465 kmp_balign_t *balign = new_thr->th.th_bar;
4466 for (b = 0; b < bs_last_barrier; ++b)
4467 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG)if (!(balign[b].bb.wait_flag != 2)) { __kmp_debug_assert("balign[b].bb.wait_flag != 2"
, "openmp/runtime/src/kmp_runtime.cpp", 4467); }
;
4468#endif
4469
4470 KF_TRACE(10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d using thread %p T#%d\n"
, __kmp_get_global_thread_id(), new_thr, new_thr->th.th_info
.ds.ds_gtid); }
4471 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d using thread %p T#%d\n"
, __kmp_get_global_thread_id(), new_thr, new_thr->th.th_info
.ds.ds_gtid); }
;
4472
4473 KMP_MB();
4474 return new_thr;
4475 }
4476
4477 /* no, well fork a new one */
4478 KMP_ASSERT(__kmp_nth == __kmp_all_nth)if (!(__kmp_nth == __kmp_all_nth)) { __kmp_debug_assert("__kmp_nth == __kmp_all_nth"
, "openmp/runtime/src/kmp_runtime.cpp", 4478); }
;
4479 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity)if (!(__kmp_all_nth < __kmp_threads_capacity)) { __kmp_debug_assert
("__kmp_all_nth < __kmp_threads_capacity", "openmp/runtime/src/kmp_runtime.cpp"
, 4479); }
;
4480
4481#if KMP_USE_MONITOR
4482 // If this is the first worker thread the RTL is creating, then also
4483 // launch the monitor thread. We try to do this as early as possible.
4484 if (!TCR_4(__kmp_init_monitor)(__kmp_init_monitor)) {
4485 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4486 if (!TCR_4(__kmp_init_monitor)(__kmp_init_monitor)) {
4487 KF_TRACE(10, ("before __kmp_create_monitor\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("before __kmp_create_monitor\n"
); }
;
4488 TCW_4(__kmp_init_monitor, 1)(__kmp_init_monitor) = (1);
4489 __kmp_create_monitor(&__kmp_monitor);
4490 KF_TRACE(10, ("after __kmp_create_monitor\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("after __kmp_create_monitor\n"
); }
;
4491#if KMP_OS_WINDOWS0
4492 // AC: wait until monitor has started. This is a fix for CQ232808.
4493 // The reason is that if the library is loaded/unloaded in a loop with
4494 // small (parallel) work in between, then there is high probability that
4495 // monitor thread started after the library shutdown. At shutdown it is
4496 // too late to cope with the problem, because when the primary thread is
4497 // in DllMain (process detach) the monitor has no chances to start (it is
4498 // blocked), and primary thread has no means to inform the monitor that
4499 // the library has gone, because all the memory which the monitor can
4500 // access is going to be released/reset.
4501 while (TCR_4(__kmp_init_monitor)(__kmp_init_monitor) < 2) {
4502 KMP_YIELD(TRUE){ __kmp_x86_pause(); if (((!0)) && (((__kmp_use_yield
== 1) || (__kmp_use_yield == 2 && (((__kmp_nth) >
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))))))) __kmp_yield
(); }
;
4503 }
4504 KF_TRACE(10, ("after monitor thread has started\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("after monitor thread has started\n"
); }
;
4505#endif
4506 }
4507 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4508 }
4509#endif
4510
4511 KMP_MB();
4512
4513 {
4514 int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)(__kmp_init_hidden_helper_threads)
4515 ? 1
4516 : __kmp_hidden_helper_threads_num + 1;
4517
4518 for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid])((void *)(__kmp_threads[new_gtid])) != NULL__null;
4519 ++new_gtid) {
4520 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity)if (!(new_gtid < __kmp_threads_capacity)) { __kmp_debug_assert
("new_gtid < __kmp_threads_capacity", "openmp/runtime/src/kmp_runtime.cpp"
, 4520); }
;
4521 }
4522
4523 if (TCR_4(__kmp_init_hidden_helper_threads)(__kmp_init_hidden_helper_threads)) {
4524 KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num)if (!(new_gtid <= __kmp_hidden_helper_threads_num)) { __kmp_debug_assert
("new_gtid <= __kmp_hidden_helper_threads_num", "openmp/runtime/src/kmp_runtime.cpp"
, 4524); }
;
4525 }
4526 }
4527
4528 /* allocate space for it. */
4529 new_thr = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t))___kmp_allocate((sizeof(kmp_info_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 4529)
;
4530
4531 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr)((__kmp_threads[new_gtid])) = ((new_thr));
4532
4533#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 && KMP_DEBUG1
4534 // suppress race conditions detection on synchronization flags in debug mode
4535 // this helps to analyze library internals eliminating false positives
4536 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(
4537 __itt_suppress_range, __itt_suppress_threading_errors0x000000ff,
4538 &new_thr->th.th_sleep_loc, sizeof(new_thr->th.th_sleep_loc));
4539 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(
4540 __itt_suppress_range, __itt_suppress_threading_errors0x000000ff,
4541 &new_thr->th.th_reap_state, sizeof(new_thr->th.th_reap_state));
4542#if KMP_OS_WINDOWS0
4543 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(
4544 __itt_suppress_range, __itt_suppress_threading_errors0x000000ff,
4545 &new_thr->th.th_suspend_init, sizeof(new_thr->th.th_suspend_init));
4546#else
4547 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(__itt_suppress_range,
4548 __itt_suppress_threading_errors0x000000ff,
4549 &new_thr->th.th_suspend_init_count,
4550 sizeof(new_thr->th.th_suspend_init_count));
4551#endif
4552 // TODO: check if we need to also suppress b_arrived flags
4553 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(__itt_suppress_range,
4554 __itt_suppress_threading_errors0x000000ff,
4555 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go)const_cast<kmp_uint64 *>(&new_thr->th.th_bar[0].
bb.b_go)
,
4556 sizeof(new_thr->th.th_bar[0].bb.b_go));
4557 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(__itt_suppress_range,
4558 __itt_suppress_threading_errors0x000000ff,
4559 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go)const_cast<kmp_uint64 *>(&new_thr->th.th_bar[1].
bb.b_go)
,
4560 sizeof(new_thr->th.th_bar[1].bb.b_go));
4561 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(__itt_suppress_range,
4562 __itt_suppress_threading_errors0x000000ff,
4563 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go)const_cast<kmp_uint64 *>(&new_thr->th.th_bar[2].
bb.b_go)
,
4564 sizeof(new_thr->th.th_bar[2].bb.b_go));
4565#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG */
4566 if (__kmp_storage_map) {
4567 __kmp_print_thread_storage_map(new_thr, new_gtid);
4568 }
4569
4570 // add the reserve serialized team, initialized from the team's primary thread
4571 {
4572 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4573 KF_TRACE(10, ("__kmp_allocate_thread: before th_serial/serial_team\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: before th_serial/serial_team\n"
); }
;
4574 new_thr->th.th_serial_team = serial_team =
4575 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4576#if OMPT_SUPPORT1
4577 ompt_data_none{0}, // root parallel id
4578#endif
4579 proc_bind_default, &r_icvs,
4580 0 USE_NESTED_HOT_ARG(NULL), __null);
4581 }
4582 KMP_ASSERT(serial_team)if (!(serial_team)) { __kmp_debug_assert("serial_team", "openmp/runtime/src/kmp_runtime.cpp"
, 4582); }
;
4583 serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for
4584 // execution (it is unused for now).
4585 serial_team->t.t_threads[0] = new_thr;
4586 KF_TRACE(10,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n"
, new_thr); }
4587 ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n"
, new_thr); }
4588 new_thr))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n"
, new_thr); }
;
4589
4590 /* setup the thread structures */
4591 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4592
4593#if USE_FAST_MEMORY3
4594 __kmp_initialize_fast_memory(new_thr);
4595#endif /* USE_FAST_MEMORY */
4596
4597#if KMP_USE_BGET1
4598 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL)if (!(new_thr->th.th_local.bget_data == __null)) { __kmp_debug_assert
("new_thr->th.th_local.bget_data == __null", "openmp/runtime/src/kmp_runtime.cpp"
, 4598); }
;
4599 __kmp_initialize_bget(new_thr);
4600#endif
4601
4602 __kmp_init_random(new_thr); // Initialize random number generator
4603
4604 /* Initialize these only once when thread is grabbed for a team allocation */
4605 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n"
, __kmp_get_global_thread_id(), 0, 0); }
4606 ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n"
, __kmp_get_global_thread_id(), 0, 0); }
4607 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n"
, __kmp_get_global_thread_id(), 0, 0); }
;
4608
4609 int b;
4610 kmp_balign_t *balign = new_thr->th.th_bar;
4611 for (b = 0; b < bs_last_barrier; ++b) {
4612 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE0;
4613 balign[b].bb.team = NULL__null;
4614 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING0;
4615 balign[b].bb.use_oncore_barrier = 0;
4616 }
4617
4618 TCW_PTR(new_thr->th.th_sleep_loc, NULL)((new_thr->th.th_sleep_loc)) = ((__null));
4619 new_thr->th.th_sleep_loc_type = flag_unset;
4620
4621 new_thr->th.th_spin_here = FALSE0;
4622 new_thr->th.th_next_waiting = 0;
4623#if KMP_OS_UNIX1
4624 new_thr->th.th_blocking = false;
4625#endif
4626
4627#if KMP_AFFINITY_SUPPORTED1
4628 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED(-2);
4629 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED(-2);
4630 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED(-2);
4631 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED(-2);
4632#endif
4633 new_thr->th.th_def_allocator = __kmp_def_allocator;
4634 new_thr->th.th_prev_level = 0;
4635 new_thr->th.th_prev_num_threads = 1;
4636
4637 TCW_4(new_thr->th.th_in_pool, FALSE)(new_thr->th.th_in_pool) = (0);
4638 new_thr->th.th_active_in_pool = FALSE0;
4639 TCW_4(new_thr->th.th_active, TRUE)(new_thr->th.th_active) = ((!0));
4640
4641 /* adjust the global counters */
4642 __kmp_all_nth++;
4643 __kmp_nth++;
4644
4645 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
4646 // numbers of procs, and method #2 (keyed API call) for higher numbers.
4647 if (__kmp_adjust_gtid_mode) {
4648 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4649 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) != 2) {
4650 TCW_4(__kmp_gtid_mode, 2)(__kmp_gtid_mode) = (2);
4651 }
4652 } else {
4653 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) != 1) {
4654 TCW_4(__kmp_gtid_mode, 1)(__kmp_gtid_mode) = (1);
4655 }
4656 }
4657 }
4658
4659#ifdef KMP_ADJUST_BLOCKTIME1
4660 /* Adjust blocktime back to zero if necessary */
4661 /* Middle initialization might not have occurred yet */
4662 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4663 if (__kmp_nth > __kmp_avail_proc) {
4664 __kmp_zero_bt = TRUE(!0);
4665 }
4666 }
4667#endif /* KMP_ADJUST_BLOCKTIME */
4668
4669 /* actually fork it and create the new worker thread */
4670 KF_TRACE(if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: before __kmp_create_worker: %p\n"
, new_thr); }
4671 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: before __kmp_create_worker: %p\n"
, new_thr); }
;
4672 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4673 KF_TRACE(10,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: after __kmp_create_worker: %p\n"
, new_thr); }
4674 ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: after __kmp_create_worker: %p\n"
, new_thr); }
;
4675
4676 KA_TRACE(20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d forked T#%d\n"
, __kmp_get_global_thread_id(), new_gtid); }
4677 new_gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d forked T#%d\n"
, __kmp_get_global_thread_id(), new_gtid); }
;
4678 KMP_MB();
4679 return new_thr;
4680}
4681
4682/* Reinitialize team for reuse.
4683 The hot team code calls this case at every fork barrier, so EPCC barrier
4684 test are extremely sensitive to changes in it, esp. writes to the team
4685 struct, which cause a cache invalidation in all threads.
4686 IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! */
4687static void __kmp_reinitialize_team(kmp_team_t *team,
4688 kmp_internal_control_t *new_icvs,
4689 ident_t *loc) {
4690 KF_TRACE(10, ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n"
, team->t.t_threads[0], team); }
4691 team->t.t_threads[0], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n"
, team->t.t_threads[0], team); }
;
4692 KMP_DEBUG_ASSERT(team && new_icvs)if (!(team && new_icvs)) { __kmp_debug_assert("team && new_icvs"
, "openmp/runtime/src/kmp_runtime.cpp", 4692); }
;
4693 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc)if (!((!(__kmp_init_parallel)) || new_icvs->nproc)) { __kmp_debug_assert
("(!(__kmp_init_parallel)) || new_icvs->nproc", "openmp/runtime/src/kmp_runtime.cpp"
, 4693); }
;
4694 KMP_CHECK_UPDATE(team->t.t_ident, loc)if ((team->t.t_ident) != (loc)) (team->t.t_ident) = (loc
)
;
4695
4696 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID())if ((team->t.t_id) != ((~0))) (team->t.t_id) = ((~0));
4697 // Copy ICVs to the primary thread's implicit taskdata
4698 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE0);
4699 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4700
4701 KF_TRACE(10, ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n"
, team->t.t_threads[0], team); }
4702 team->t.t_threads[0], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n"
, team->t.t_threads[0], team); }
;
4703}
4704
4705/* Initialize the team data structure.
4706 This assumes the t_threads and t_max_nproc are already set.
4707 Also, we don't touch the arguments */
4708static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
4709 kmp_internal_control_t *new_icvs,
4710 ident_t *loc) {
4711 KF_TRACE(10, ("__kmp_initialize_team: enter: team=%p\n", team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_team: enter: team=%p\n"
, team); }
;
4712
4713 /* verify */
4714 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 4714); }
;
4715 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc)if (!(new_nproc <= team->t.t_max_nproc)) { __kmp_debug_assert
("new_nproc <= team->t.t_max_nproc", "openmp/runtime/src/kmp_runtime.cpp"
, 4715); }
;
4716 KMP_DEBUG_ASSERT(team->t.t_threads)if (!(team->t.t_threads)) { __kmp_debug_assert("team->t.t_threads"
, "openmp/runtime/src/kmp_runtime.cpp", 4716); }
;
4717 KMP_MB();
4718
4719 team->t.t_master_tid = 0; /* not needed */
4720 /* team->t.t_master_bar; not needed */
4721 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4722 team->t.t_nproc = new_nproc;
4723
4724 /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */
4725 team->t.t_next_pool = NULL__null;
4726 /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess
4727 * up hot team */
4728
4729 TCW_SYNC_PTR(team->t.t_pkfn, NULL)((team->t.t_pkfn)) = ((__null)); /* not needed */
4730 team->t.t_invoke = NULL__null; /* not needed */
4731
4732 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4733 team->t.t_sched.sched = new_icvs->sched.sched;
4734
4735#if KMP_ARCH_X860 || KMP_ARCH_X86_641
4736 team->t.t_fp_control_saved = FALSE0; /* not needed */
4737 team->t.t_x87_fpu_control_word = 0; /* not needed */
4738 team->t.t_mxcsr = 0; /* not needed */
4739#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
4740
4741 team->t.t_construct = 0;
4742
4743 team->t.t_ordered.dt.t_value = 0;
4744 team->t.t_master_active = FALSE0;
4745
4746#ifdef KMP_DEBUG1
4747 team->t.t_copypriv_data = NULL__null; /* not necessary, but nice for debugging */
4748#endif
4749#if KMP_OS_WINDOWS0
4750 team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */
4751#endif
4752
4753 team->t.t_control_stack_top = NULL__null;
4754
4755 __kmp_reinitialize_team(team, new_icvs, loc);
4756
4757 KMP_MB();
4758 KF_TRACE(10, ("__kmp_initialize_team: exit: team=%p\n", team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_team: exit: team=%p\n"
, team); }
;
4759}
4760
4761#if (KMP_OS_LINUX1 || KMP_OS_FREEBSD0) && KMP_AFFINITY_SUPPORTED1
4762/* Sets full mask for thread and returns old mask, no changes to structures. */
4763static void
4764__kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4765 if (KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {
4766 int status;
4767 if (old_mask != NULL__null) {
4768 status = __kmp_get_system_affinity(old_mask, TRUE)(old_mask)->get_system_affinity((!0));
4769 int error = errno(*__errno_location ());
4770 if (status != 0) {
4771 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError)__kmp_msg_format(kmp_i18n_msg_ChangeThreadAffMaskError), KMP_ERR(error)__kmp_msg_error_code(error),
4772 __kmp_msg_null);
4773 }
4774 }
4775 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE)(__kmp_affin_fullMask)->set_system_affinity((!0));
4776 }
4777}
4778#endif
4779
4780#if KMP_AFFINITY_SUPPORTED1
4781
4782// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
4783// It calculates the worker + primary thread's partition based upon the parent
4784// thread's partition, and binds each worker to a thread in their partition.
4785// The primary thread's partition should already include its current binding.
4786static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
4787 // Do not partition places for the hidden helper team
4788 if (KMP_HIDDEN_HELPER_TEAM(team)(team->t.t_threads[0] == __kmp_hidden_helper_main_thread))
4789 return;
4790 // Copy the primary thread's place partition to the team struct
4791 kmp_info_t *master_th = team->t.t_threads[0];
4792 KMP_DEBUG_ASSERT(master_th != NULL)if (!(master_th != __null)) { __kmp_debug_assert("master_th != __null"
, "openmp/runtime/src/kmp_runtime.cpp", 4792); }
;
4793 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4794 int first_place = master_th->th.th_first_place;
4795 int last_place = master_th->th.th_last_place;
4796 int masters_place = master_th->th.th_current_place;
4797 int num_masks = __kmp_affinity.num_masks;
4798 team->t.t_first_place = first_place;
4799 team->t.t_last_place = last_place;
4800
4801 KA_TRACE(20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
"bound to place %d partition = [%d,%d]\n", proc_bind, __kmp_gtid_from_thread
(team->t.t_threads[0]), team->t.t_id, masters_place, first_place
, last_place); }
4802 "bound to place %d partition = [%d,%d]\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
"bound to place %d partition = [%d,%d]\n", proc_bind, __kmp_gtid_from_thread
(team->t.t_threads[0]), team->t.t_id, masters_place, first_place
, last_place); }
4803 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
"bound to place %d partition = [%d,%d]\n", proc_bind, __kmp_gtid_from_thread
(team->t.t_threads[0]), team->t.t_id, masters_place, first_place
, last_place); }
4804 team->t.t_id, masters_place, first_place, last_place))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
"bound to place %d partition = [%d,%d]\n", proc_bind, __kmp_gtid_from_thread
(team->t.t_threads[0]), team->t.t_id, masters_place, first_place
, last_place); }
;
4805
4806 switch (proc_bind) {
4807
4808 case proc_bind_default:
4809 // Serial teams might have the proc_bind policy set to proc_bind_default.
4810 // Not an issue -- we don't rebind primary thread for any proc_bind policy.
4811 KMP_DEBUG_ASSERT(team->t.t_nproc == 1)if (!(team->t.t_nproc == 1)) { __kmp_debug_assert("team->t.t_nproc == 1"
, "openmp/runtime/src/kmp_runtime.cpp", 4811); }
;
4812 break;
4813
4814 case proc_bind_primary: {
4815 int f;
4816 int n_th = team->t.t_nproc;
4817 for (f = 1; f < n_th; f++) {
4818 kmp_info_t *th = team->t.t_threads[f];
4819 KMP_DEBUG_ASSERT(th != NULL)if (!(th != __null)) { __kmp_debug_assert("th != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 4819); }
;
4820 th->th.th_first_place = first_place;
4821 th->th.th_last_place = last_place;
4822 th->th.th_new_place = masters_place;
4823 if (__kmp_display_affinity && masters_place != th->th.th_current_place &&
4824 team->t.t_display_affinity != 1) {
4825 team->t.t_display_affinity = 1;
4826 }
4827
4828 KA_TRACE(100, ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, masters_place, first_place, last_place
); }
4829 "partition = [%d,%d]\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, masters_place, first_place, last_place
); }
4830 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, masters_place, first_place, last_place
); }
4831 f, masters_place, first_place, last_place))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, masters_place, first_place, last_place
); }
;
4832 }
4833 } break;
4834
4835 case proc_bind_close: {
4836 int f;
4837 int n_th = team->t.t_nproc;
4838 int n_places;
4839 if (first_place <= last_place) {
4840 n_places = last_place - first_place + 1;
4841 } else {
4842 n_places = num_masks - first_place + last_place + 1;
4843 }
4844 if (n_th <= n_places) {
4845 int place = masters_place;
4846 for (f = 1; f < n_th; f++) {
4847 kmp_info_t *th = team->t.t_threads[f];
4848 KMP_DEBUG_ASSERT(th != NULL)if (!(th != __null)) { __kmp_debug_assert("th != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 4848); }
;
4849
4850 if (place == last_place) {
4851 place = first_place;
4852 } else if (place == (num_masks - 1)) {
4853 place = 0;
4854 } else {
4855 place++;
4856 }
4857 th->th.th_first_place = first_place;
4858 th->th.th_last_place = last_place;
4859 th->th.th_new_place = place;
4860 if (__kmp_display_affinity && place != th->th.th_current_place &&
4861 team->t.t_display_affinity != 1) {
4862 team->t.t_display_affinity = 1;
4863 }
4864
4865 KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, place, first_place, last_place); }
4866 "partition = [%d,%d]\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, place, first_place, last_place); }
4867 __kmp_gtid_from_thread(team->t.t_threads[f]),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, place, first_place, last_place); }
4868 team->t.t_id, f, place, first_place, last_place))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, place, first_place, last_place); }
;
4869 }
4870 } else {
4871 int S, rem, gap, s_count;
4872 S = n_th / n_places;
4873 s_count = 0;
4874 rem = n_th - (S * n_places);
4875 gap = rem > 0 ? n_places / rem : n_places;
4876 int place = masters_place;
4877 int gap_ct = gap;
4878 for (f = 0; f < n_th; f++) {
4879 kmp_info_t *th = team->t.t_threads[f];
4880 KMP_DEBUG_ASSERT(th != NULL)if (!(th != __null)) { __kmp_debug_assert("th != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 4880); }
;
4881
4882 th->th.th_first_place = first_place;
4883 th->th.th_last_place = last_place;
4884 th->th.th_new_place = place;
4885 if (__kmp_display_affinity && place != th->th.th_current_place &&
4886 team->t.t_display_affinity != 1) {
4887 team->t.t_display_affinity = 1;
4888 }
4889 s_count++;
4890
4891 if ((s_count == S) && rem && (gap_ct == gap)) {
4892 // do nothing, add an extra thread to place on next iteration
4893 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4894 // we added an extra thread to this place; move to next place
4895 if (place == last_place) {
4896 place = first_place;
4897 } else if (place == (num_masks - 1)) {
4898 place = 0;
4899 } else {
4900 place++;
4901 }
4902 s_count = 0;
4903 gap_ct = 1;
4904 rem--;
4905 } else if (s_count == S) { // place full; don't add extra
4906 if (place == last_place) {
4907 place = first_place;
4908 } else if (place == (num_masks - 1)) {
4909 place = 0;
4910 } else {
4911 place++;
4912 }
4913 gap_ct++;
4914 s_count = 0;
4915 }
4916
4917 KA_TRACE(100,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, first_place
, last_place); }
4918 ("__kmp_partition_places: close: T#%d(%d:%d) place %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, first_place
, last_place); }
4919 "partition = [%d,%d]\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, first_place
, last_place); }
4920 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, first_place
, last_place); }
4921 th->th.th_new_place, first_place, last_place))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, first_place
, last_place); }
;
4922 }
4923 KMP_DEBUG_ASSERT(place == masters_place)if (!(place == masters_place)) { __kmp_debug_assert("place == masters_place"
, "openmp/runtime/src/kmp_runtime.cpp", 4923); }
;
4924 }
4925 } break;
4926
4927 case proc_bind_spread: {
4928 int f;
4929 int n_th = team->t.t_nproc;
4930 int n_places;
4931 int thidx;
4932 if (first_place <= last_place) {
4933 n_places = last_place - first_place + 1;
4934 } else {
4935 n_places = num_masks - first_place + last_place + 1;
4936 }
4937 if (n_th <= n_places) {
4938 int place = -1;
4939
4940 if (n_places != num_masks) {
4941 int S = n_places / n_th;
4942 int s_count, rem, gap, gap_ct;
4943
4944 place = masters_place;
4945 rem = n_places - n_th * S;
4946 gap = rem ? n_th / rem : 1;
4947 gap_ct = gap;
4948 thidx = n_th;
4949 if (update_master_only == 1)
4950 thidx = 1;
4951 for (f = 0; f < thidx; f++) {
4952 kmp_info_t *th = team->t.t_threads[f];
4953 KMP_DEBUG_ASSERT(th != NULL)if (!(th != __null)) { __kmp_debug_assert("th != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 4953); }
;
4954
4955 th->th.th_first_place = place;
4956 th->th.th_new_place = place;
4957 if (__kmp_display_affinity && place != th->th.th_current_place &&
4958 team->t.t_display_affinity != 1) {
4959 team->t.t_display_affinity = 1;
4960 }
4961 s_count = 1;
4962 while (s_count < S) {
4963 if (place == last_place) {
4964 place = first_place;
4965 } else if (place == (num_masks - 1)) {
4966 place = 0;
4967 } else {
4968 place++;
4969 }
4970 s_count++;
4971 }
4972 if (rem && (gap_ct == gap)) {
4973 if (place == last_place) {
4974 place = first_place;
4975 } else if (place == (num_masks - 1)) {
4976 place = 0;
4977 } else {
4978 place++;
4979 }
4980 rem--;
4981 gap_ct = 0;
4982 }
4983 th->th.th_last_place = place;
4984 gap_ct++;
4985
4986 if (place == last_place) {
4987 place = first_place;
4988 } else if (place == (num_masks - 1)) {
4989 place = 0;
4990 } else {
4991 place++;
4992 }
4993
4994 KA_TRACE(100,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], num_masks: %u\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, num_masks
); }
4995 ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], num_masks: %u\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, num_masks
); }
4996 "partition = [%d,%d], num_masks: %u\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], num_masks: %u\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, num_masks
); }
4997 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], num_masks: %u\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, num_masks
); }
4998 f, th->th.th_new_place, th->th.th_first_place,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], num_masks: %u\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, num_masks
); }
4999 th->th.th_last_place, num_masks))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], num_masks: %u\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, num_masks
); }
;
5000 }
5001 } else {
5002 /* Having uniform space of available computation places I can create
5003 T partitions of round(P/T) size and put threads into the first
5004 place of each partition. */
5005 double current = static_cast<double>(masters_place);
5006 double spacing =
5007 (static_cast<double>(n_places + 1) / static_cast<double>(n_th));
5008 int first, last;
5009 kmp_info_t *th;
5010
5011 thidx = n_th + 1;
5012 if (update_master_only == 1)
5013 thidx = 1;
5014 for (f = 0; f < thidx; f++) {
5015 first = static_cast<int>(current);
5016 last = static_cast<int>(current + spacing) - 1;
5017 KMP_DEBUG_ASSERT(last >= first)if (!(last >= first)) { __kmp_debug_assert("last >= first"
, "openmp/runtime/src/kmp_runtime.cpp", 5017); }
;
5018 if (first >= n_places) {
5019 if (masters_place) {
5020 first -= n_places;
5021 last -= n_places;
5022 if (first == (masters_place + 1)) {
5023 KMP_DEBUG_ASSERT(f == n_th)if (!(f == n_th)) { __kmp_debug_assert("f == n_th", "openmp/runtime/src/kmp_runtime.cpp"
, 5023); }
;
5024 first--;
5025 }
5026 if (last == masters_place) {
5027 KMP_DEBUG_ASSERT(f == (n_th - 1))if (!(f == (n_th - 1))) { __kmp_debug_assert("f == (n_th - 1)"
, "openmp/runtime/src/kmp_runtime.cpp", 5027); }
;
5028 last--;
5029 }
5030 } else {
5031 KMP_DEBUG_ASSERT(f == n_th)if (!(f == n_th)) { __kmp_debug_assert("f == n_th", "openmp/runtime/src/kmp_runtime.cpp"
, 5031); }
;
5032 first = 0;
5033 last = 0;
5034 }
5035 }
5036 if (last >= n_places) {
5037 last = (n_places - 1);
5038 }
5039 place = first;
5040 current += spacing;
5041 if (f < n_th) {
5042 KMP_DEBUG_ASSERT(0 <= first)if (!(0 <= first)) { __kmp_debug_assert("0 <= first", "openmp/runtime/src/kmp_runtime.cpp"
, 5042); }
;
5043 KMP_DEBUG_ASSERT(n_places > first)if (!(n_places > first)) { __kmp_debug_assert("n_places > first"
, "openmp/runtime/src/kmp_runtime.cpp", 5043); }
;
5044 KMP_DEBUG_ASSERT(0 <= last)if (!(0 <= last)) { __kmp_debug_assert("0 <= last", "openmp/runtime/src/kmp_runtime.cpp"
, 5044); }
;
5045 KMP_DEBUG_ASSERT(n_places > last)if (!(n_places > last)) { __kmp_debug_assert("n_places > last"
, "openmp/runtime/src/kmp_runtime.cpp", 5045); }
;
5046 KMP_DEBUG_ASSERT(last_place >= first_place)if (!(last_place >= first_place)) { __kmp_debug_assert("last_place >= first_place"
, "openmp/runtime/src/kmp_runtime.cpp", 5046); }
;
5047 th = team->t.t_threads[f];
5048 KMP_DEBUG_ASSERT(th)if (!(th)) { __kmp_debug_assert("th", "openmp/runtime/src/kmp_runtime.cpp"
, 5048); }
;
5049 th->th.th_first_place = first;
5050 th->th.th_new_place = place;
5051 th->th.th_last_place = last;
5052 if (__kmp_display_affinity && place != th->th.th_current_place &&
5053 team->t.t_display_affinity != 1) {
5054 team->t.t_display_affinity = 1;
5055 }
5056 KA_TRACE(100,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, spacing)
; }
5057 ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, spacing)
; }
5058 "partition = [%d,%d], spacing = %.4f\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, spacing)
; }
5059 __kmp_gtid_from_thread(team->t.t_threads[f]),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, spacing)
; }
5060 team->t.t_id, f, th->th.th_new_place,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, spacing)
; }
5061 th->th.th_first_place, th->th.th_last_place, spacing))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, spacing)
; }
;
5062 }
5063 }
5064 }
5065 KMP_DEBUG_ASSERT(update_master_only || place == masters_place)if (!(update_master_only || place == masters_place)) { __kmp_debug_assert
("update_master_only || place == masters_place", "openmp/runtime/src/kmp_runtime.cpp"
, 5065); }
;
5066 } else {
5067 int S, rem, gap, s_count;
5068 S = n_th / n_places;
5069 s_count = 0;
5070 rem = n_th - (S * n_places);
5071 gap = rem > 0 ? n_places / rem : n_places;
5072 int place = masters_place;
5073 int gap_ct = gap;
5074 thidx = n_th;
5075 if (update_master_only == 1)
5076 thidx = 1;
5077 for (f = 0; f < thidx; f++) {
5078 kmp_info_t *th = team->t.t_threads[f];
5079 KMP_DEBUG_ASSERT(th != NULL)if (!(th != __null)) { __kmp_debug_assert("th != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 5079); }
;
5080
5081 th->th.th_first_place = place;
5082 th->th.th_last_place = place;
5083 th->th.th_new_place = place;
5084 if (__kmp_display_affinity && place != th->th.th_current_place &&
5085 team->t.t_display_affinity != 1) {
5086 team->t.t_display_affinity = 1;
5087 }
5088 s_count++;
5089
5090 if ((s_count == S) && rem && (gap_ct == gap)) {
5091 // do nothing, add an extra thread to place on next iteration
5092 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
5093 // we added an extra thread to this place; move on to next place
5094 if (place == last_place) {
5095 place = first_place;
5096 } else if (place == (num_masks - 1)) {
5097 place = 0;
5098 } else {
5099 place++;
5100 }
5101 s_count = 0;
5102 gap_ct = 1;
5103 rem--;
5104 } else if (s_count == S) { // place is full; don't add extra thread
5105 if (place == last_place) {
5106 place = first_place;
5107 } else if (place == (num_masks - 1)) {
5108 place = 0;
5109 } else {
5110 place++;
5111 }
5112 gap_ct++;
5113 s_count = 0;
5114 }
5115
5116 KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, th->th.th_first_place
, th->th.th_last_place); }
5117 "partition = [%d,%d]\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, th->th.th_first_place
, th->th.th_last_place); }
5118 __kmp_gtid_from_thread(team->t.t_threads[f]),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, th->th.th_first_place
, th->th.th_last_place); }
5119 team->t.t_id, f, th->th.th_new_place,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, th->th.th_first_place
, th->th.th_last_place); }
5120 th->th.th_first_place, th->th.th_last_place))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, th->th.th_first_place
, th->th.th_last_place); }
;
5121 }
5122 KMP_DEBUG_ASSERT(update_master_only || place == masters_place)if (!(update_master_only || place == masters_place)) { __kmp_debug_assert
("update_master_only || place == masters_place", "openmp/runtime/src/kmp_runtime.cpp"
, 5122); }
;
5123 }
5124 } break;
5125
5126 default:
5127 break;
5128 }
5129
5130 KA_TRACE(20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_partition_places: exit T#%d\n"
, team->t.t_id); }
;
5131}
5132
5133#endif // KMP_AFFINITY_SUPPORTED
5134
5135/* allocate a new team data structure to use. take one off of the free pool if
5136 available */
5137kmp_team_t *
5138__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
5139#if OMPT_SUPPORT1
5140 ompt_data_t ompt_parallel_data,
5141#endif
5142 kmp_proc_bind_t new_proc_bind,
5143 kmp_internal_control_t *new_icvs,
5144 int argc USE_NESTED_HOT_ARG(kmp_info_t *master), kmp_info_t *master) {
5145 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team)((void)0);
5146 int f;
5147 kmp_team_t *team;
5148 int use_hot_team = !root->r.r_active;
5149 int level = 0;
5150 int do_place_partition = 1;
5151
5152 KA_TRACE(20, ("__kmp_allocate_team: called\n"))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: called\n"
); }
;
5153 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0)if (!(new_nproc >= 1 && argc >= 0)) { __kmp_debug_assert
("new_nproc >= 1 && argc >= 0", "openmp/runtime/src/kmp_runtime.cpp"
, 5153); }
;
5154 KMP_DEBUG_ASSERT(max_nproc >= new_nproc)if (!(max_nproc >= new_nproc)) { __kmp_debug_assert("max_nproc >= new_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5154); }
;
5155 KMP_MB();
5156
5157#if KMP_NESTED_HOT_TEAMS1
5158 kmp_hot_team_ptr_t *hot_teams;
5159 if (master) {
5160 team = master->th.th_team;
5161 level = team->t.t_active_level;
5162 if (master->th.th_teams_microtask) { // in teams construct?
5163 if (master->th.th_teams_size.nteams > 1 &&
5164 ( // #teams > 1
5165 team->t.t_pkfn ==
5166 (microtask_t)__kmp_teams_master || // inner fork of the teams
5167 master->th.th_teams_level <
5168 team->t.t_level)) { // or nested parallel inside the teams
5169 ++level; // not increment if #teams==1, or for outer fork of the teams;
5170 // increment otherwise
5171 }
5172 // Do not perform the place partition if inner fork of the teams
5173 // Wait until nested parallel region encountered inside teams construct
5174 if ((master->th.th_teams_size.nteams == 1 &&
5175 master->th.th_teams_level >= team->t.t_level) ||
5176 (team->t.t_pkfn == (microtask_t)__kmp_teams_master))
5177 do_place_partition = 0;
5178 }
5179 hot_teams = master->th.th_hot_teams;
5180 if (level < __kmp_hot_teams_max_level && hot_teams &&
5181 hot_teams[level].hot_team) {
5182 // hot team has already been allocated for given level
5183 use_hot_team = 1;
5184 } else {
5185 use_hot_team = 0;
5186 }
5187 } else {
5188 // check we won't access uninitialized hot_teams, just in case
5189 KMP_DEBUG_ASSERT(new_nproc == 1)if (!(new_nproc == 1)) { __kmp_debug_assert("new_nproc == 1",
"openmp/runtime/src/kmp_runtime.cpp", 5189); }
;
5190 }
5191#endif
5192 // Optimization to use a "hot" team
5193 if (use_hot_team && new_nproc > 1) {
5194 KMP_DEBUG_ASSERT(new_nproc <= max_nproc)if (!(new_nproc <= max_nproc)) { __kmp_debug_assert("new_nproc <= max_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5194); }
;
5195#if KMP_NESTED_HOT_TEAMS1
5196 team = hot_teams[level].hot_team;
5197#else
5198 team = root->r.r_hot_team;
5199#endif
5200#if KMP_DEBUG1
5201 if (__kmp_tasking_mode != tskm_immediate_exec) {
5202 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: hot team task_team[0] = %p "
"task_team[1] = %p before reinit\n", team->t.t_task_team[
0], team->t.t_task_team[1]); }
5203 "task_team[1] = %p before reinit\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: hot team task_team[0] = %p "
"task_team[1] = %p before reinit\n", team->t.t_task_team[
0], team->t.t_task_team[1]); }
5204 team->t.t_task_team[0], team->t.t_task_team[1]))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: hot team task_team[0] = %p "
"task_team[1] = %p before reinit\n", team->t.t_task_team[
0], team->t.t_task_team[1]); }
;
5205 }
5206#endif
5207
5208 if (team->t.t_nproc != new_nproc &&
5209 __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5210 // Distributed barrier may need a resize
5211 int old_nthr = team->t.t_nproc;
5212 __kmp_resize_dist_barrier(team, old_nthr, new_nproc);
5213 }
5214
5215 // If not doing the place partition, then reset the team's proc bind
5216 // to indicate that partitioning of all threads still needs to take place
5217 if (do_place_partition == 0)
5218 team->t.t_proc_bind = proc_bind_default;
5219 // Has the number of threads changed?
5220 /* Let's assume the most common case is that the number of threads is
5221 unchanged, and put that case first. */
5222 if (team->t.t_nproc == new_nproc) { // Check changes in number of threads
5223 KA_TRACE(20, ("__kmp_allocate_team: reusing hot team\n"))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: reusing hot team\n"
); }
;
5224 // This case can mean that omp_set_num_threads() was called and the hot
5225 // team size was already reduced, so we check the special flag
5226 if (team->t.t_size_changed == -1) {
5227 team->t.t_size_changed = 1;
5228 } else {
5229 KMP_CHECK_UPDATE(team->t.t_size_changed, 0)if ((team->t.t_size_changed) != (0)) (team->t.t_size_changed
) = (0)
;
5230 }
5231
5232 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
5233 kmp_r_sched_t new_sched = new_icvs->sched;
5234 // set primary thread's schedule as new run-time schedule
5235 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched)if ((team->t.t_sched.sched) != (new_sched.sched)) (team->
t.t_sched.sched) = (new_sched.sched)
;
5236
5237 __kmp_reinitialize_team(team, new_icvs,
5238 root->r.r_uber_thread->th.th_ident);
5239
5240 KF_TRACE(10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n"
, 0, team->t.t_threads[0], team); }
5241 team->t.t_threads[0], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n"
, 0, team->t.t_threads[0], team); }
;
5242 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5243
5244#if KMP_AFFINITY_SUPPORTED1
5245 if ((team->t.t_size_changed == 0) &&
5246 (team->t.t_proc_bind == new_proc_bind)) {
5247 if (new_proc_bind == proc_bind_spread) {
5248 if (do_place_partition) {
5249 // add flag to update only master for spread
5250 __kmp_partition_places(team, 1);
5251 }
5252 }
5253 KA_TRACE(200, ("__kmp_allocate_team: reusing hot team #%d bindings: "if (kmp_a_debug >= 200) { __kmp_debug_printf ("__kmp_allocate_team: reusing hot team #%d bindings: "
"proc_bind = %d, partition = [%d,%d]\n", team->t.t_id, new_proc_bind
, team->t.t_first_place, team->t.t_last_place); }
5254 "proc_bind = %d, partition = [%d,%d]\n",if (kmp_a_debug >= 200) { __kmp_debug_printf ("__kmp_allocate_team: reusing hot team #%d bindings: "
"proc_bind = %d, partition = [%d,%d]\n", team->t.t_id, new_proc_bind
, team->t.t_first_place, team->t.t_last_place); }
5255 team->t.t_id, new_proc_bind, team->t.t_first_place,if (kmp_a_debug >= 200) { __kmp_debug_printf ("__kmp_allocate_team: reusing hot team #%d bindings: "
"proc_bind = %d, partition = [%d,%d]\n", team->t.t_id, new_proc_bind
, team->t.t_first_place, team->t.t_last_place); }
5256 team->t.t_last_place))if (kmp_a_debug >= 200) { __kmp_debug_printf ("__kmp_allocate_team: reusing hot team #%d bindings: "
"proc_bind = %d, partition = [%d,%d]\n", team->t.t_id, new_proc_bind
, team->t.t_first_place, team->t.t_last_place); }
;
5257 } else {
5258 if (do_place_partition) {
5259 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind)if ((team->t.t_proc_bind) != (new_proc_bind)) (team->t.
t_proc_bind) = (new_proc_bind)
;
5260 __kmp_partition_places(team);
5261 }
5262 }
5263#else
5264 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind)if ((team->t.t_proc_bind) != (new_proc_bind)) (team->t.
t_proc_bind) = (new_proc_bind)
;
5265#endif /* KMP_AFFINITY_SUPPORTED */
5266 } else if (team->t.t_nproc > new_nproc) {
5267 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: decreasing hot team thread count to %d\n"
, new_nproc); }
5268 ("__kmp_allocate_team: decreasing hot team thread count to %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: decreasing hot team thread count to %d\n"
, new_nproc); }
5269 new_nproc))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: decreasing hot team thread count to %d\n"
, new_nproc); }
;
5270
5271 team->t.t_size_changed = 1;
5272 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5273 // Barrier size already reduced earlier in this function
5274 // Activate team threads via th_used_in_team
5275 __kmp_add_threads_to_team(team, new_nproc);
5276 }
5277#if KMP_NESTED_HOT_TEAMS1
5278 if (__kmp_hot_teams_mode == 0) {
5279 // AC: saved number of threads should correspond to team's value in this
5280 // mode, can be bigger in mode 1, when hot team has threads in reserve
5281 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc)if (!(hot_teams[level].hot_team_nth == team->t.t_nproc)) {
__kmp_debug_assert("hot_teams[level].hot_team_nth == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5281); }
;
5282 hot_teams[level].hot_team_nth = new_nproc;
5283#endif // KMP_NESTED_HOT_TEAMS
5284 /* release the extra threads we don't need any more */
5285 for (f = new_nproc; f < team->t.t_nproc; f++) {
5286 KMP_DEBUG_ASSERT(team->t.t_threads[f])if (!(team->t.t_threads[f])) { __kmp_debug_assert("team->t.t_threads[f]"
, "openmp/runtime/src/kmp_runtime.cpp", 5286); }
;
5287 if (__kmp_tasking_mode != tskm_immediate_exec) {
5288 // When decreasing team size, threads no longer in the team should
5289 // unref task team.
5290 team->t.t_threads[f]->th.th_task_team = NULL__null;
5291 }
5292 __kmp_free_thread(team->t.t_threads[f]);
5293 team->t.t_threads[f] = NULL__null;
5294 }
5295#if KMP_NESTED_HOT_TEAMS1
5296 } // (__kmp_hot_teams_mode == 0)
5297 else {
5298 // When keeping extra threads in team, switch threads to wait on own
5299 // b_go flag
5300 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5301 KMP_DEBUG_ASSERT(team->t.t_threads[f])if (!(team->t.t_threads[f])) { __kmp_debug_assert("team->t.t_threads[f]"
, "openmp/runtime/src/kmp_runtime.cpp", 5301); }
;
5302 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5303 for (int b = 0; b < bs_last_barrier; ++b) {
5304 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG2) {
5305 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG3;
5306 }
5307 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0)if ((balign[b].bb.leaf_kids) != (0)) (balign[b].bb.leaf_kids)
= (0)
;
5308 }
5309 }
5310 }
5311#endif // KMP_NESTED_HOT_TEAMS
5312 team->t.t_nproc = new_nproc;
5313 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
5314 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched)if ((team->t.t_sched.sched) != (new_icvs->sched.sched))
(team->t.t_sched.sched) = (new_icvs->sched.sched)
;
5315 __kmp_reinitialize_team(team, new_icvs,
5316 root->r.r_uber_thread->th.th_ident);
5317
5318 // Update remaining threads
5319 for (f = 0; f < new_nproc; ++f) {
5320 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5321 }
5322
5323 // restore the current task state of the primary thread: should be the
5324 // implicit task
5325 KF_TRACE(10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n"
, 0, team->t.t_threads[0], team); }
5326 team->t.t_threads[0], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n"
, 0, team->t.t_threads[0], team); }
;
5327
5328 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5329
5330#ifdef KMP_DEBUG1
5331 for (f = 0; f < team->t.t_nproc; f++) {
5332 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&if (!(team->t.t_threads[f] && team->t.t_threads
[f]->th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("team->t.t_threads[f] && team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5334); }
5333 team->t.t_threads[f]->th.th_team_nproc ==if (!(team->t.t_threads[f] && team->t.t_threads
[f]->th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("team->t.t_threads[f] && team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5334); }
5334 team->t.t_nproc)if (!(team->t.t_threads[f] && team->t.t_threads
[f]->th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("team->t.t_threads[f] && team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5334); }
;
5335 }
5336#endif
5337
5338 if (do_place_partition) {
5339 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind)if ((team->t.t_proc_bind) != (new_proc_bind)) (team->t.
t_proc_bind) = (new_proc_bind)
;
5340#if KMP_AFFINITY_SUPPORTED1
5341 __kmp_partition_places(team);
5342#endif
5343 }
5344 } else { // team->t.t_nproc < new_nproc
5345#if (KMP_OS_LINUX1 || KMP_OS_FREEBSD0) && KMP_AFFINITY_SUPPORTED1
5346 kmp_affin_mask_t *old_mask;
5347 if (KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {
5348 KMP_CPU_ALLOC(old_mask)(old_mask = __kmp_affinity_dispatch->allocate_mask());
5349 }
5350#endif
5351
5352 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: increasing hot team thread count to %d\n"
, new_nproc); }
5353 ("__kmp_allocate_team: increasing hot team thread count to %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: increasing hot team thread count to %d\n"
, new_nproc); }
5354 new_nproc))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: increasing hot team thread count to %d\n"
, new_nproc); }
;
5355 int old_nproc = team->t.t_nproc; // save old value and use to update only
5356 team->t.t_size_changed = 1;
5357
5358#if KMP_NESTED_HOT_TEAMS1
5359 int avail_threads = hot_teams[level].hot_team_nth;
5360 if (new_nproc < avail_threads)
5361 avail_threads = new_nproc;
5362 kmp_info_t **other_threads = team->t.t_threads;
5363 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5364 // Adjust barrier data of reserved threads (if any) of the team
5365 // Other data will be set in __kmp_initialize_info() below.
5366 int b;
5367 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5368 for (b = 0; b < bs_last_barrier; ++b) {
5369 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5370 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG)if (!(balign[b].bb.wait_flag != 2)) { __kmp_debug_assert("balign[b].bb.wait_flag != 2"
, "openmp/runtime/src/kmp_runtime.cpp", 5370); }
;
5371#if USE_DEBUGGER0
5372 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5373#endif
5374 }
5375 }
5376 if (hot_teams[level].hot_team_nth >= new_nproc) {
5377 // we have all needed threads in reserve, no need to allocate any
5378 // this only possible in mode 1, cannot have reserved threads in mode 0
5379 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1)if (!(__kmp_hot_teams_mode == 1)) { __kmp_debug_assert("__kmp_hot_teams_mode == 1"
, "openmp/runtime/src/kmp_runtime.cpp", 5379); }
;
5380 team->t.t_nproc = new_nproc; // just get reserved threads involved
5381 } else {
5382 // We may have some threads in reserve, but not enough;
5383 // get reserved threads involved if any.
5384 team->t.t_nproc = hot_teams[level].hot_team_nth;
5385 hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size
5386#endif // KMP_NESTED_HOT_TEAMS
5387 if (team->t.t_max_nproc < new_nproc) {
5388 /* reallocate larger arrays */
5389 __kmp_reallocate_team_arrays(team, new_nproc);
5390 __kmp_reinitialize_team(team, new_icvs, NULL__null);
5391 }
5392
5393#if (KMP_OS_LINUX1 || KMP_OS_FREEBSD0) && KMP_AFFINITY_SUPPORTED1
5394 /* Temporarily set full mask for primary thread before creation of
5395 workers. The reason is that workers inherit the affinity from the
5396 primary thread, so if a lot of workers are created on the single
5397 core quickly, they don't get a chance to set their own affinity for
5398 a long time. */
5399 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
5400#endif
5401
5402 /* allocate new threads for the hot team */
5403 for (f = team->t.t_nproc; f < new_nproc; f++) {
5404 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5405 KMP_DEBUG_ASSERT(new_worker)if (!(new_worker)) { __kmp_debug_assert("new_worker", "openmp/runtime/src/kmp_runtime.cpp"
, 5405); }
;
5406 team->t.t_threads[f] = new_worker;
5407
5408 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init T#%d arrived: "
"join=%llu, plain=%llu\n", team->t.t_id, __kmp_gtid_from_tid
(f, team), team->t.t_id, f, team->t.t_bar[bs_forkjoin_barrier
].b_arrived, team->t.t_bar[bs_plain_barrier].b_arrived); }
5409 ("__kmp_allocate_team: team %d init T#%d arrived: "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init T#%d arrived: "
"join=%llu, plain=%llu\n", team->t.t_id, __kmp_gtid_from_tid
(f, team), team->t.t_id, f, team->t.t_bar[bs_forkjoin_barrier
].b_arrived, team->t.t_bar[bs_plain_barrier].b_arrived); }
5410 "join=%llu, plain=%llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init T#%d arrived: "
"join=%llu, plain=%llu\n", team->t.t_id, __kmp_gtid_from_tid
(f, team), team->t.t_id, f, team->t.t_bar[bs_forkjoin_barrier
].b_arrived, team->t.t_bar[bs_plain_barrier].b_arrived); }
5411 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init T#%d arrived: "
"join=%llu, plain=%llu\n", team->t.t_id, __kmp_gtid_from_tid
(f, team), team->t.t_id, f, team->t.t_bar[bs_forkjoin_barrier
].b_arrived, team->t.t_bar[bs_plain_barrier].b_arrived); }
5412 team->t.t_bar[bs_forkjoin_barrier].b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init T#%d arrived: "
"join=%llu, plain=%llu\n", team->t.t_id, __kmp_gtid_from_tid
(f, team), team->t.t_id, f, team->t.t_bar[bs_forkjoin_barrier
].b_arrived, team->t.t_bar[bs_plain_barrier].b_arrived); }
5413 team->t.t_bar[bs_plain_barrier].b_arrived))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init T#%d arrived: "
"join=%llu, plain=%llu\n", team->t.t_id, __kmp_gtid_from_tid
(f, team), team->t.t_id, f, team->t.t_bar[bs_forkjoin_barrier
].b_arrived, team->t.t_bar[bs_plain_barrier].b_arrived); }
;
5414
5415 { // Initialize barrier data for new threads.
5416 int b;
5417 kmp_balign_t *balign = new_worker->th.th_bar;
5418 for (b = 0; b < bs_last_barrier; ++b) {
5419 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5420 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=if (!(balign[b].bb.wait_flag != 2)) { __kmp_debug_assert("balign[b].bb.wait_flag != 2"
, "openmp/runtime/src/kmp_runtime.cpp", 5421); }
5421 KMP_BARRIER_PARENT_FLAG)if (!(balign[b].bb.wait_flag != 2)) { __kmp_debug_assert("balign[b].bb.wait_flag != 2"
, "openmp/runtime/src/kmp_runtime.cpp", 5421); }
;
5422#if USE_DEBUGGER0
5423 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5424#endif
5425 }
5426 }
5427 }
5428
5429#if (KMP_OS_LINUX1 || KMP_OS_FREEBSD0) && KMP_AFFINITY_SUPPORTED1
5430 if (KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {
5431 /* Restore initial primary thread's affinity mask */
5432 __kmp_set_system_affinity(old_mask, TRUE)(old_mask)->set_system_affinity((!0));
5433 KMP_CPU_FREE(old_mask)__kmp_affinity_dispatch->deallocate_mask(old_mask);
5434 }
5435#endif
5436#if KMP_NESTED_HOT_TEAMS1
5437 } // end of check of t_nproc vs. new_nproc vs. hot_team_nth
5438#endif // KMP_NESTED_HOT_TEAMS
5439 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5440 // Barrier size already increased earlier in this function
5441 // Activate team threads via th_used_in_team
5442 __kmp_add_threads_to_team(team, new_nproc);
5443 }
5444 /* make sure everyone is syncronized */
5445 // new threads below
5446 __kmp_initialize_team(team, new_nproc, new_icvs,
5447 root->r.r_uber_thread->th.th_ident);
5448
5449 /* reinitialize the threads */
5450 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc)if (!(team->t.t_nproc == new_nproc)) { __kmp_debug_assert(
"team->t.t_nproc == new_nproc", "openmp/runtime/src/kmp_runtime.cpp"
, 5450); }
;
5451 for (f = 0; f < team->t.t_nproc; ++f)
5452 __kmp_initialize_info(team->t.t_threads[f], team, f,
5453 __kmp_gtid_from_tid(f, team));
5454
5455 // set th_task_state for new threads in hot team with older thread's state
5456 kmp_uint8 old_state = team->t.t_threads[old_nproc - 1]->th.th_task_state;
5457 for (f = old_nproc; f < team->t.t_nproc; ++f)
5458 team->t.t_threads[f]->th.th_task_state = old_state;
5459
5460#ifdef KMP_DEBUG1
5461 for (f = 0; f < team->t.t_nproc; ++f) {
5462 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&if (!(team->t.t_threads[f] && team->t.t_threads
[f]->th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("team->t.t_threads[f] && team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5464); }
5463 team->t.t_threads[f]->th.th_team_nproc ==if (!(team->t.t_threads[f] && team->t.t_threads
[f]->th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("team->t.t_threads[f] && team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5464); }
5464 team->t.t_nproc)if (!(team->t.t_threads[f] && team->t.t_threads
[f]->th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("team->t.t_threads[f] && team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5464); }
;
5465 }
5466#endif
5467
5468 if (do_place_partition) {
5469 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind)if ((team->t.t_proc_bind) != (new_proc_bind)) (team->t.
t_proc_bind) = (new_proc_bind)
;
5470#if KMP_AFFINITY_SUPPORTED1
5471 __kmp_partition_places(team);
5472#endif
5473 }
5474 } // Check changes in number of threads
5475
5476 kmp_info_t *master = team->t.t_threads[0];
5477 if (master->th.th_teams_microtask) {
5478 for (f = 1; f < new_nproc; ++f) {
5479 // propagate teams construct specific info to workers
5480 kmp_info_t *thr = team->t.t_threads[f];
5481 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5482 thr->th.th_teams_level = master->th.th_teams_level;
5483 thr->th.th_teams_size = master->th.th_teams_size;
5484 }
5485 }
5486#if KMP_NESTED_HOT_TEAMS1
5487 if (level) {
5488 // Sync barrier state for nested hot teams, not needed for outermost hot
5489 // team.
5490 for (f = 1; f < new_nproc; ++f) {
5491 kmp_info_t *thr = team->t.t_threads[f];
5492 int b;
5493 kmp_balign_t *balign = thr->th.th_bar;
5494 for (b = 0; b < bs_last_barrier; ++b) {
5495 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5496 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG)if (!(balign[b].bb.wait_flag != 2)) { __kmp_debug_assert("balign[b].bb.wait_flag != 2"
, "openmp/runtime/src/kmp_runtime.cpp", 5496); }
;
5497#if USE_DEBUGGER0
5498 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5499#endif
5500 }
5501 }
5502 }
5503#endif // KMP_NESTED_HOT_TEAMS
5504
5505 /* reallocate space for arguments if necessary */
5506 __kmp_alloc_argv_entries(argc, team, TRUE(!0));
5507 KMP_CHECK_UPDATE(team->t.t_argc, argc)if ((team->t.t_argc) != (argc)) (team->t.t_argc) = (argc
)
;
5508 // The hot team re-uses the previous task team,
5509 // if untouched during the previous release->gather phase.
5510
5511 KF_TRACE(10, (" hot_team = %p\n", team))if (kmp_f_debug >= 10) { __kmp_debug_printf (" hot_team = %p\n"
, team); }
;
5512
5513#if KMP_DEBUG1
5514 if (__kmp_tasking_mode != tskm_immediate_exec) {
5515 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: hot team task_team[0] = %p "
"task_team[1] = %p after reinit\n", team->t.t_task_team[0
], team->t.t_task_team[1]); }
5516 "task_team[1] = %p after reinit\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: hot team task_team[0] = %p "
"task_team[1] = %p after reinit\n", team->t.t_task_team[0
], team->t.t_task_team[1]); }
5517 team->t.t_task_team[0], team->t.t_task_team[1]))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: hot team task_team[0] = %p "
"task_team[1] = %p after reinit\n", team->t.t_task_team[0
], team->t.t_task_team[1]); }
;
5518 }
5519#endif
5520
5521#if OMPT_SUPPORT1
5522 __ompt_team_assign_id(team, ompt_parallel_data);
5523#endif
5524
5525 KMP_MB();
5526
5527 return team;
5528 }
5529
5530 /* next, let's try to take one from the team pool */
5531 KMP_MB();
5532 for (team = CCAST(kmp_team_t *, __kmp_team_pool)const_cast<kmp_team_t *>(__kmp_team_pool); (team);) {
5533 /* TODO: consider resizing undersized teams instead of reaping them, now
5534 that we have a resizing mechanism */
5535 if (team->t.t_max_nproc >= max_nproc) {
5536 /* take this team from the team pool */
5537 __kmp_team_pool = team->t.t_next_pool;
5538
5539 if (max_nproc > 1 &&
5540 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5541 if (!team->t.b) { // Allocate barrier structure
5542 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5543 }
5544 }
5545
5546 /* setup the team for fresh use */
5547 __kmp_initialize_team(team, new_nproc, new_icvs, NULL__null);
5548
5549 KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: setting task_team[0] %p and "
"task_team[1] %p to NULL\n", &team->t.t_task_team[0],
&team->t.t_task_team[1]); }
5550 "task_team[1] %p to NULL\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: setting task_team[0] %p and "
"task_team[1] %p to NULL\n", &team->t.t_task_team[0],
&team->t.t_task_team[1]); }
5551 &team->t.t_task_team[0], &team->t.t_task_team[1]))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: setting task_team[0] %p and "
"task_team[1] %p to NULL\n", &team->t.t_task_team[0],
&team->t.t_task_team[1]); }
;
5552 team->t.t_task_team[0] = NULL__null;
5553 team->t.t_task_team[1] = NULL__null;
5554
5555 /* reallocate space for arguments if necessary */
5556 __kmp_alloc_argv_entries(argc, team, TRUE(!0));
5557 KMP_CHECK_UPDATE(team->t.t_argc, argc)if ((team->t.t_argc) != (argc)) (team->t.t_argc) = (argc
)
;
5558
5559 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n"
, team->t.t_id, 0, 0); }
5560 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n"
, team->t.t_id, 0, 0); }
5561 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n"
, team->t.t_id, 0, 0); }
;
5562 { // Initialize barrier data.
5563 int b;
5564 for (b = 0; b < bs_last_barrier; ++b) {
5565 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE0;
5566#if USE_DEBUGGER0
5567 team->t.t_bar[b].b_master_arrived = 0;
5568 team->t.t_bar[b].b_team_arrived = 0;
5569#endif
5570 }
5571 }
5572
5573 team->t.t_proc_bind = new_proc_bind;
5574
5575 KA_TRACE(20, ("__kmp_allocate_team: using team from pool %d.\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: using team from pool %d.\n"
, team->t.t_id); }
5576 team->t.t_id))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: using team from pool %d.\n"
, team->t.t_id); }
;
5577
5578#if OMPT_SUPPORT1
5579 __ompt_team_assign_id(team, ompt_parallel_data);
5580#endif
5581
5582 KMP_MB();
5583
5584 return team;
5585 }
5586
5587 /* reap team if it is too small, then loop back and check the next one */
5588 // not sure if this is wise, but, will be redone during the hot-teams
5589 // rewrite.
5590 /* TODO: Use technique to find the right size hot-team, don't reap them */
5591 team = __kmp_reap_team(team);
5592 __kmp_team_pool = team;
5593 }
5594
5595 /* nothing available in the pool, no matter, make a new team! */
5596 KMP_MB();
5597 team = (kmp_team_t *)__kmp_allocate(sizeof(kmp_team_t))___kmp_allocate((sizeof(kmp_team_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 5597)
;
5598
5599 /* and set it up */
5600 team->t.t_max_nproc = max_nproc;
5601 if (max_nproc > 1 &&
5602 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5603 // Allocate barrier structure
5604 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5605 }
5606
5607 /* NOTE well, for some reason allocating one big buffer and dividing it up
5608 seems to really hurt performance a lot on the P4, so, let's not use this */
5609 __kmp_allocate_team_arrays(team, max_nproc);
5610
5611 KA_TRACE(20, ("__kmp_allocate_team: making a new team\n"))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: making a new team\n"
); }
;
5612 __kmp_initialize_team(team, new_nproc, new_icvs, NULL__null);
5613
5614 KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
"%p to NULL\n", &team->t.t_task_team[0], &team->
t.t_task_team[1]); }
5615 "%p to NULL\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
"%p to NULL\n", &team->t.t_task_team[0], &team->
t.t_task_team[1]); }
5616 &team->t.t_task_team[0], &team->t.t_task_team[1]))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
"%p to NULL\n", &team->t.t_task_team[0], &team->
t.t_task_team[1]); }
;
5617 team->t.t_task_team[0] = NULL__null; // to be removed, as __kmp_allocate zeroes
5618 // memory, no need to duplicate
5619 team->t.t_task_team[1] = NULL__null; // to be removed, as __kmp_allocate zeroes
5620 // memory, no need to duplicate
5621
5622 if (__kmp_storage_map) {
5623 __kmp_print_team_storage_map("team", team, team->t.t_id, new_nproc);
5624 }
5625
5626 /* allocate space for arguments */
5627 __kmp_alloc_argv_entries(argc, team, FALSE0);
5628 team->t.t_argc = argc;
5629
5630 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n"
, team->t.t_id, 0, 0); }
5631 ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n"
, team->t.t_id, 0, 0); }
5632 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n"
, team->t.t_id, 0, 0); }
;
5633 { // Initialize barrier data.
5634 int b;
5635 for (b = 0; b < bs_last_barrier; ++b) {
5636 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE0;
5637#if USE_DEBUGGER0
5638 team->t.t_bar[b].b_master_arrived = 0;
5639 team->t.t_bar[b].b_team_arrived = 0;
5640#endif
5641 }
5642 }
5643
5644 team->t.t_proc_bind = new_proc_bind;
5645
5646#if OMPT_SUPPORT1
5647 __ompt_team_assign_id(team, ompt_parallel_data);
5648 team->t.ompt_serialized_team_info = NULL__null;
5649#endif
5650
5651 KMP_MB();
5652
5653 KA_TRACE(20, ("__kmp_allocate_team: done creating a new team %d.\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: done creating a new team %d.\n"
, team->t.t_id); }
5654 team->t.t_id))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: done creating a new team %d.\n"
, team->t.t_id); }
;
5655
5656 return team;
5657}
5658
5659/* TODO implement hot-teams at all levels */
5660/* TODO implement lazy thread release on demand (disband request) */
5661
5662/* free the team. return it to the team pool. release all the threads
5663 * associated with it */
5664void __kmp_free_team(kmp_root_t *root,
5665 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master), kmp_info_t *master) {
5666 int f;
5667 KA_TRACE(20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_team: T#%d freeing team %d\n"
, __kmp_get_global_thread_id(), team->t.t_id); }
5668 team->t.t_id))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_team: T#%d freeing team %d\n"
, __kmp_get_global_thread_id(), team->t.t_id); }
;
5669
5670 /* verify state */
5671 KMP_DEBUG_ASSERT(root)if (!(root)) { __kmp_debug_assert("root", "openmp/runtime/src/kmp_runtime.cpp"
, 5671); }
;
5672 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 5672); }
;
5673 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc)if (!(team->t.t_nproc <= team->t.t_max_nproc)) { __kmp_debug_assert
("team->t.t_nproc <= team->t.t_max_nproc", "openmp/runtime/src/kmp_runtime.cpp"
, 5673); }
;
5674 KMP_DEBUG_ASSERT(team->t.t_threads)if (!(team->t.t_threads)) { __kmp_debug_assert("team->t.t_threads"
, "openmp/runtime/src/kmp_runtime.cpp", 5674); }
;
5675
5676 int use_hot_team = team == root->r.r_hot_team;
5677#if KMP_NESTED_HOT_TEAMS1
5678 int level;
5679 if (master) {
5680 level = team->t.t_active_level - 1;
5681 if (master->th.th_teams_microtask) { // in teams construct?
5682 if (master->th.th_teams_size.nteams > 1) {
5683 ++level; // level was not increased in teams construct for
5684 // team_of_masters
5685 }
5686 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5687 master->th.th_teams_level == team->t.t_level) {
5688 ++level; // level was not increased in teams construct for
5689 // team_of_workers before the parallel
5690 } // team->t.t_level will be increased inside parallel
5691 }
5692#if KMP_DEBUG1
5693 kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;
5694#endif
5695 if (level < __kmp_hot_teams_max_level) {
5696 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team)if (!(team == hot_teams[level].hot_team)) { __kmp_debug_assert
("team == hot_teams[level].hot_team", "openmp/runtime/src/kmp_runtime.cpp"
, 5696); }
;
5697 use_hot_team = 1;
5698 }
5699 }
5700#endif // KMP_NESTED_HOT_TEAMS
5701
5702 /* team is done working */
5703 TCW_SYNC_PTR(team->t.t_pkfn,((team->t.t_pkfn)) = ((__null))
5704 NULL)((team->t.t_pkfn)) = ((__null)); // Important for Debugging Support Library.
5705#if KMP_OS_WINDOWS0
5706 team->t.t_copyin_counter = 0; // init counter for possible reuse
5707#endif
5708 // Do not reset pointer to parent team to NULL for hot teams.
5709
5710 /* if we are non-hot team, release our threads */
5711 if (!use_hot_team) {
5712 if (__kmp_tasking_mode != tskm_immediate_exec) {
5713 // Wait for threads to reach reapable state
5714 for (f = 1; f < team->t.t_nproc; ++f) {
5715 KMP_DEBUG_ASSERT(team->t.t_threads[f])if (!(team->t.t_threads[f])) { __kmp_debug_assert("team->t.t_threads[f]"
, "openmp/runtime/src/kmp_runtime.cpp", 5715); }
;
5716 kmp_info_t *th = team->t.t_threads[f];
5717 volatile kmp_uint32 *state = &th->th.th_reap_state;
5718 while (*state != KMP_SAFE_TO_REAP1) {
5719#if KMP_OS_WINDOWS0
5720 // On Windows a thread can be killed at any time, check this
5721 DWORD ecode;
5722 if (!__kmp_is_thread_alive(th, &ecode)) {
5723 *state = KMP_SAFE_TO_REAP1; // reset the flag for dead thread
5724 break;
5725 }
5726#endif
5727 // first check if thread is sleeping
5728 kmp_flag_64<> fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5729 if (fl.is_sleeping())
5730 fl.resume(__kmp_gtid_from_thread(th));
5731 KMP_CPU_PAUSE()__kmp_x86_pause();
5732 }
5733 }
5734
5735 // Delete task teams
5736 int tt_idx;
5737 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5738 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5739 if (task_team != NULL__null) {
5740 for (f = 0; f < team->t.t_nproc; ++f) { // threads unref task teams
5741 KMP_DEBUG_ASSERT(team->t.t_threads[f])if (!(team->t.t_threads[f])) { __kmp_debug_assert("team->t.t_threads[f]"
, "openmp/runtime/src/kmp_runtime.cpp", 5741); }
;
5742 team->t.t_threads[f]->th.th_task_team = NULL__null;
5743 }
5744 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n"
, __kmp_get_global_thread_id(), task_team, team->t.t_id); }
5745 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n"
, __kmp_get_global_thread_id(), task_team, team->t.t_id); }
5746 ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n"
, __kmp_get_global_thread_id(), task_team, team->t.t_id); }
5747 __kmp_get_gtid(), task_team, team->t.t_id))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n"
, __kmp_get_global_thread_id(), task_team, team->t.t_id); }
;
5748#if KMP_NESTED_HOT_TEAMS1
5749 __kmp_free_task_team(master, task_team);
5750#endif
5751 team->t.t_task_team[tt_idx] = NULL__null;
5752 }
5753 }
5754 }
5755
5756 // Reset pointer to parent team only for non-hot teams.
5757 team->t.t_parent = NULL__null;
5758 team->t.t_level = 0;
5759 team->t.t_active_level = 0;
5760
5761 /* free the worker threads */
5762 for (f = 1; f < team->t.t_nproc; ++f) {
5763 KMP_DEBUG_ASSERT(team->t.t_threads[f])if (!(team->t.t_threads[f])) { __kmp_debug_assert("team->t.t_threads[f]"
, "openmp/runtime/src/kmp_runtime.cpp", 5763); }
;
5764 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5765 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team),__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(team
->t.t_threads[f]->th.th_used_in_team)), (kmp_uint32)(1)
, (kmp_uint32)(2))
5766 1, 2)__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(team
->t.t_threads[f]->th.th_used_in_team)), (kmp_uint32)(1)
, (kmp_uint32)(2))
;
5767 }
5768 __kmp_free_thread(team->t.t_threads[f]);
5769 }
5770
5771 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5772 if (team->t.b) {
5773 // wake up thread at old location
5774 team->t.b->go_release();
5775 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) {
5776 for (f = 1; f < team->t.t_nproc; ++f) {
5777 if (team->t.b->sleep[f].sleep) {
5778 __kmp_atomic_resume_64(
5779 team->t.t_threads[f]->th.th_info.ds.ds_gtid,
5780 (kmp_atomic_flag_64<> *)NULL__null);
5781 }
5782 }
5783 }
5784 // Wait for threads to be removed from team
5785 for (int f = 1; f < team->t.t_nproc; ++f) {
5786 while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
5787 KMP_CPU_PAUSE()__kmp_x86_pause();
5788 }
5789 }
5790 }
5791
5792 for (f = 1; f < team->t.t_nproc; ++f) {
5793 team->t.t_threads[f] = NULL__null;
5794 }
5795
5796 if (team->t.t_max_nproc > 1 &&
5797 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5798 distributedBarrier::deallocate(team->t.b);
5799 team->t.b = NULL__null;
5800 }
5801 /* put the team back in the team pool */
5802 /* TODO limit size of team pool, call reap_team if pool too large */
5803 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool)const_cast<kmp_team_t *>(__kmp_team_pool);
5804 __kmp_team_pool = (volatile kmp_team_t *)team;
5805 } else { // Check if team was created for primary threads in teams construct
5806 // See if first worker is a CG root
5807 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&if (!(team->t.t_threads[1] && team->t.t_threads
[1]->th.th_cg_roots)) { __kmp_debug_assert("team->t.t_threads[1] && team->t.t_threads[1]->th.th_cg_roots"
, "openmp/runtime/src/kmp_runtime.cpp", 5808); }
5808 team->t.t_threads[1]->th.th_cg_roots)if (!(team->t.t_threads[1] && team->t.t_threads
[1]->th.th_cg_roots)) { __kmp_debug_assert("team->t.t_threads[1] && team->t.t_threads[1]->th.th_cg_roots"
, "openmp/runtime/src/kmp_runtime.cpp", 5808); }
;
5809 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5810 // Clean up the CG root nodes on workers so that this team can be re-used
5811 for (f = 1; f < team->t.t_nproc; ++f) {
5812 kmp_info_t *thr = team->t.t_threads[f];
5813 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&if (!(thr && thr->th.th_cg_roots && thr->
th.th_cg_roots->cg_root == thr)) { __kmp_debug_assert("thr && thr->th.th_cg_roots && thr->th.th_cg_roots->cg_root == thr"
, "openmp/runtime/src/kmp_runtime.cpp", 5814); }
5814 thr->th.th_cg_roots->cg_root == thr)if (!(thr && thr->th.th_cg_roots && thr->
th.th_cg_roots->cg_root == thr)) { __kmp_debug_assert("thr && thr->th.th_cg_roots && thr->th.th_cg_roots->cg_root == thr"
, "openmp/runtime/src/kmp_runtime.cpp", 5814); }
;
5815 // Pop current CG root off list
5816 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5817 thr->th.th_cg_roots = tmp->up;
5818 KA_TRACE(100, ("__kmp_free_team: Thread %p popping node %p and moving"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_team: Thread %p popping node %p and moving"
" up to node %p. cg_nthreads was %d\n", thr, tmp, thr->th
.th_cg_roots, tmp->cg_nthreads); }
5819 " up to node %p. cg_nthreads was %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_team: Thread %p popping node %p and moving"
" up to node %p. cg_nthreads was %d\n", thr, tmp, thr->th
.th_cg_roots, tmp->cg_nthreads); }
5820 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_team: Thread %p popping node %p and moving"
" up to node %p. cg_nthreads was %d\n", thr, tmp, thr->th
.th_cg_roots, tmp->cg_nthreads); }
;
5821 int i = tmp->cg_nthreads--;
5822 if (i == 1) {
5823 __kmp_free(tmp)___kmp_free((tmp), "openmp/runtime/src/kmp_runtime.cpp", 5823
)
; // free CG if we are the last thread in it
5824 }
5825 // Restore current task's thread_limit from CG root
5826 if (thr->th.th_cg_roots)
5827 thr->th.th_current_task->td_icvs.thread_limit =
5828 thr->th.th_cg_roots->cg_thread_limit;
5829 }
5830 }
5831 }
5832
5833 KMP_MB();
5834}
5835
5836/* reap the team. destroy it, reclaim all its resources and free its memory */
5837kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5838 kmp_team_t *next_pool = team->t.t_next_pool;
5839
5840 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 5840); }
;
5841 KMP_DEBUG_ASSERT(team->t.t_dispatch)if (!(team->t.t_dispatch)) { __kmp_debug_assert("team->t.t_dispatch"
, "openmp/runtime/src/kmp_runtime.cpp", 5841); }
;
5842 KMP_DEBUG_ASSERT(team->t.t_disp_buffer)if (!(team->t.t_disp_buffer)) { __kmp_debug_assert("team->t.t_disp_buffer"
, "openmp/runtime/src/kmp_runtime.cpp", 5842); }
;
5843 KMP_DEBUG_ASSERT(team->t.t_threads)if (!(team->t.t_threads)) { __kmp_debug_assert("team->t.t_threads"
, "openmp/runtime/src/kmp_runtime.cpp", 5843); }
;
5844 KMP_DEBUG_ASSERT(team->t.t_argv)if (!(team->t.t_argv)) { __kmp_debug_assert("team->t.t_argv"
, "openmp/runtime/src/kmp_runtime.cpp", 5844); }
;
5845
5846 /* TODO clean the threads that are a part of this? */
5847
5848 /* free stuff */
5849 __kmp_free_team_arrays(team);
5850 if (team->t.t_argv != &team->t.t_inline_argv[0])
5851 __kmp_free((void *)team->t.t_argv)___kmp_free(((void *)team->t.t_argv), "openmp/runtime/src/kmp_runtime.cpp"
, 5851)
;
5852 __kmp_free(team)___kmp_free((team), "openmp/runtime/src/kmp_runtime.cpp", 5852
)
;
5853
5854 KMP_MB();
5855 return next_pool;
5856}
5857
5858// Free the thread. Don't reap it, just place it on the pool of available
5859// threads.
5860//
5861// Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid
5862// binding for the affinity mechanism to be useful.
5863//
5864// Now, we always keep the free list (__kmp_thread_pool) sorted by gtid.
5865// However, we want to avoid a potential performance problem by always
5866// scanning through the list to find the correct point at which to insert
5867// the thread (potential N**2 behavior). To do this we keep track of the
5868// last place a thread struct was inserted (__kmp_thread_pool_insert_pt).
5869// With single-level parallelism, threads will always be added to the tail
5870// of the list, kept track of by __kmp_thread_pool_insert_pt. With nested
5871// parallelism, all bets are off and we may need to scan through the entire
5872// free list.
5873//
5874// This change also has a potentially large performance benefit, for some
5875// applications. Previously, as threads were freed from the hot team, they
5876// would be placed back on the free list in inverse order. If the hot team
5877// grew back to it's original size, then the freed thread would be placed
5878// back on the hot team in reverse order. This could cause bad cache
5879// locality problems on programs where the size of the hot team regularly
5880// grew and shrunk.
5881//
5882// Now, for single-level parallelism, the OMP tid is always == gtid.
5883void __kmp_free_thread(kmp_info_t *this_th) {
5884 int gtid;
5885 kmp_info_t **scan;
5886
5887 KA_TRACE(20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n"
, __kmp_get_global_thread_id(), this_th->th.th_info.ds.ds_gtid
); }
5888 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n"
, __kmp_get_global_thread_id(), this_th->th.th_info.ds.ds_gtid
); }
;
5889
5890 KMP_DEBUG_ASSERT(this_th)if (!(this_th)) { __kmp_debug_assert("this_th", "openmp/runtime/src/kmp_runtime.cpp"
, 5890); }
;
5891
5892 // When moving thread to pool, switch thread to wait on own b_go flag, and
5893 // uninitialized (NULL team).
5894 int b;
5895 kmp_balign_t *balign = this_th->th.th_bar;
5896 for (b = 0; b < bs_last_barrier; ++b) {
5897 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG2)
5898 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG3;
5899 balign[b].bb.team = NULL__null;
5900 balign[b].bb.leaf_kids = 0;
5901 }
5902 this_th->th.th_task_state = 0;
5903 this_th->th.th_reap_state = KMP_SAFE_TO_REAP1;
5904
5905 /* put thread back on the free pool */
5906 TCW_PTR(this_th->th.th_team, NULL)((this_th->th.th_team)) = ((__null));
5907 TCW_PTR(this_th->th.th_root, NULL)((this_th->th.th_root)) = ((__null));
5908 TCW_PTR(this_th->th.th_dispatch, NULL)((this_th->th.th_dispatch)) = ((__null)); /* NOT NEEDED */
5909
5910 while (this_th->th.th_cg_roots) {
5911 this_th->th.th_cg_roots->cg_nthreads--;
5912 KA_TRACE(100, ("__kmp_free_thread: Thread %p decrement cg_nthreads on node"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_thread: Thread %p decrement cg_nthreads on node"
" %p of thread %p to %d\n", this_th, this_th->th.th_cg_roots
, this_th->th.th_cg_roots->cg_root, this_th->th.th_cg_roots
->cg_nthreads); }
5913 " %p of thread %p to %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_thread: Thread %p decrement cg_nthreads on node"
" %p of thread %p to %d\n", this_th, this_th->th.th_cg_roots
, this_th->th.th_cg_roots->cg_root, this_th->th.th_cg_roots
->cg_nthreads); }
5914 this_th, this_th->th.th_cg_roots,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_thread: Thread %p decrement cg_nthreads on node"
" %p of thread %p to %d\n", this_th, this_th->th.th_cg_roots
, this_th->th.th_cg_roots->cg_root, this_th->th.th_cg_roots
->cg_nthreads); }
5915 this_th->th.th_cg_roots->cg_root,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_thread: Thread %p decrement cg_nthreads on node"
" %p of thread %p to %d\n", this_th, this_th->th.th_cg_roots
, this_th->th.th_cg_roots->cg_root, this_th->th.th_cg_roots
->cg_nthreads); }
5916 this_th->th.th_cg_roots->cg_nthreads))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_thread: Thread %p decrement cg_nthreads on node"
" %p of thread %p to %d\n", this_th, this_th->th.th_cg_roots
, this_th->th.th_cg_roots->cg_root, this_th->th.th_cg_roots
->cg_nthreads); }
;
5917 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5918 if (tmp->cg_root == this_th) { // Thread is a cg_root
5919 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0)if (!(tmp->cg_nthreads == 0)) { __kmp_debug_assert("tmp->cg_nthreads == 0"
, "openmp/runtime/src/kmp_runtime.cpp", 5919); }
;
5920 KA_TRACE(if (kmp_a_debug >= 5) { __kmp_debug_printf ("__kmp_free_thread: Thread %p freeing node %p\n"
, this_th, tmp); }
5921 5, ("__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp))if (kmp_a_debug >= 5) { __kmp_debug_printf ("__kmp_free_thread: Thread %p freeing node %p\n"
, this_th, tmp); }
;
5922 this_th->th.th_cg_roots = tmp->up;
5923 __kmp_free(tmp)___kmp_free((tmp), "openmp/runtime/src/kmp_runtime.cpp", 5923
)
;
5924 } else { // Worker thread
5925 if (tmp->cg_nthreads == 0) { // last thread leaves contention group
5926 __kmp_free(tmp)___kmp_free((tmp), "openmp/runtime/src/kmp_runtime.cpp", 5926
)
;
5927 }
5928 this_th->th.th_cg_roots = NULL__null;
5929 break;
5930 }
5931 }
5932
5933 /* If the implicit task assigned to this thread can be used by other threads
5934 * -> multiple threads can share the data and try to free the task at
5935 * __kmp_reap_thread at exit. This duplicate use of the task data can happen
5936 * with higher probability when hot team is disabled but can occurs even when
5937 * the hot team is enabled */
5938 __kmp_free_implicit_task(this_th);
5939 this_th->th.th_current_task = NULL__null;
5940
5941 // If the __kmp_thread_pool_insert_pt is already past the new insert
5942 // point, then we need to re-scan the entire list.
5943 gtid = this_th->th.th_info.ds.ds_gtid;
5944 if (__kmp_thread_pool_insert_pt != NULL__null) {
5945 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL)if (!(__kmp_thread_pool != __null)) { __kmp_debug_assert("__kmp_thread_pool != __null"
, "openmp/runtime/src/kmp_runtime.cpp", 5945); }
;
5946 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5947 __kmp_thread_pool_insert_pt = NULL__null;
5948 }
5949 }
5950
5951 // Scan down the list to find the place to insert the thread.
5952 // scan is the address of a link in the list, possibly the address of
5953 // __kmp_thread_pool itself.
5954 //
5955 // In the absence of nested parallelism, the for loop will have 0 iterations.
5956 if (__kmp_thread_pool_insert_pt != NULL__null) {
5957 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5958 } else {
5959 scan = CCAST(kmp_info_t **, &__kmp_thread_pool)const_cast<kmp_info_t **>(&__kmp_thread_pool);
5960 }
5961 for (; (*scan != NULL__null) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5962 scan = &((*scan)->th.th_next_pool))
5963 ;
5964
5965 // Insert the new element on the list, and set __kmp_thread_pool_insert_pt
5966 // to its address.
5967 TCW_PTR(this_th->th.th_next_pool, *scan)((this_th->th.th_next_pool)) = ((*scan));
5968 __kmp_thread_pool_insert_pt = *scan = this_th;
5969 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||if (!((this_th->th.th_next_pool == __null) || (this_th->
th.th_info.ds.ds_gtid < this_th->th.th_next_pool->th
.th_info.ds.ds_gtid))) { __kmp_debug_assert("(this_th->th.th_next_pool == __null) || (this_th->th.th_info.ds.ds_gtid < this_th->th.th_next_pool->th.th_info.ds.ds_gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 5971); }
5970 (this_th->th.th_info.ds.ds_gtid <if (!((this_th->th.th_next_pool == __null) || (this_th->
th.th_info.ds.ds_gtid < this_th->th.th_next_pool->th
.th_info.ds.ds_gtid))) { __kmp_debug_assert("(this_th->th.th_next_pool == __null) || (this_th->th.th_info.ds.ds_gtid < this_th->th.th_next_pool->th.th_info.ds.ds_gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 5971); }
5971 this_th->th.th_next_pool->th.th_info.ds.ds_gtid))if (!((this_th->th.th_next_pool == __null) || (this_th->
th.th_info.ds.ds_gtid < this_th->th.th_next_pool->th
.th_info.ds.ds_gtid))) { __kmp_debug_assert("(this_th->th.th_next_pool == __null) || (this_th->th.th_info.ds.ds_gtid < this_th->th.th_next_pool->th.th_info.ds.ds_gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 5971); }
;
5972 TCW_4(this_th->th.th_in_pool, TRUE)(this_th->th.th_in_pool) = ((!0));
5973 __kmp_suspend_initialize_thread(this_th);
5974 __kmp_lock_suspend_mx(this_th);
5975 if (this_th->th.th_active == TRUE(!0)) {
5976 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth)(&__kmp_thread_pool_active_nth)->fetch_add(1, std::memory_order_acq_rel
)
;
5977 this_th->th.th_active_in_pool = TRUE(!0);
5978 }
5979#if KMP_DEBUG1
5980 else {
5981 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE)if (!(this_th->th.th_active_in_pool == 0)) { __kmp_debug_assert
("this_th->th.th_active_in_pool == 0", "openmp/runtime/src/kmp_runtime.cpp"
, 5981); }
;
5982 }
5983#endif
5984 __kmp_unlock_suspend_mx(this_th);
5985
5986 TCW_4(__kmp_nth, __kmp_nth - 1)(__kmp_nth) = (__kmp_nth - 1);
5987
5988#ifdef KMP_ADJUST_BLOCKTIME1
5989 /* Adjust blocktime back to user setting or default if necessary */
5990 /* Middle initialization might never have occurred */
5991 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5992 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0)if (!(__kmp_avail_proc > 0)) { __kmp_debug_assert("__kmp_avail_proc > 0"
, "openmp/runtime/src/kmp_runtime.cpp", 5992); }
;
5993 if (__kmp_nth <= __kmp_avail_proc) {
5994 __kmp_zero_bt = FALSE0;
5995 }
5996 }
5997#endif /* KMP_ADJUST_BLOCKTIME */
5998
5999 KMP_MB();
6000}
6001
6002/* ------------------------------------------------------------------------ */
6003
6004void *__kmp_launch_thread(kmp_info_t *this_thr) {
6005#if OMP_PROFILING_SUPPORT0
6006 ProfileTraceFile = getenv("LIBOMPTARGET_PROFILE");
6007 // TODO: add a configuration option for time granularity
6008 if (ProfileTraceFile)
6009 llvm::timeTraceProfilerInitialize(500 /* us */, "libomptarget");
6010#endif
6011
6012 int gtid = this_thr->th.th_info.ds.ds_gtid;
6013 /* void *stack_data;*/
6014 kmp_team_t **volatile pteam;
6015
6016 KMP_MB();
6017 KA_TRACE(10, ("__kmp_launch_thread: T#%d start\n", gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_launch_thread: T#%d start\n"
, gtid); }
;
6018
6019 if (__kmp_env_consistency_check) {
6020 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid); // ATT: Memory leak?
6021 }
6022
6023#if OMPD_SUPPORT1
6024 if (ompd_state & OMPD_ENABLE_BP0x1)
6025 ompd_bp_thread_begin();
6026#endif
6027
6028#if OMPT_SUPPORT1
6029 ompt_data_t *thread_data = nullptr;
6030 if (ompt_enabled.enabled) {
6031 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
6032 *thread_data = ompt_data_none{0};
6033
6034 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6035 this_thr->th.ompt_thread_info.wait_id = 0;
6036 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
6037 this_thr->th.ompt_thread_info.parallel_flags = 0;
6038 if (ompt_enabled.ompt_callback_thread_begin) {
6039 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)ompt_callback_thread_begin_callback(
6040 ompt_thread_worker, thread_data);
6041 }
6042 this_thr->th.ompt_thread_info.state = ompt_state_idle;
6043 }
6044#endif
6045
6046 /* This is the place where threads wait for work */
6047 while (!TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) {
6048 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid])if (!(this_thr == __kmp_threads[gtid])) { __kmp_debug_assert(
"this_thr == __kmp_threads[gtid]", "openmp/runtime/src/kmp_runtime.cpp"
, 6048); }
;
6049 KMP_MB();
6050
6051 /* wait for work to do */
6052 KA_TRACE(20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_launch_thread: T#%d waiting for work\n"
, gtid); }
;
6053
6054 /* No tid yet since not part of a team */
6055 __kmp_fork_barrier(gtid, KMP_GTID_DNE(-2));
6056
6057#if OMPT_SUPPORT1
6058 if (ompt_enabled.enabled) {
6059 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6060 }
6061#endif
6062
6063 pteam = &this_thr->th.th_team;
6064
6065 /* have we been allocated? */
6066 if (TCR_SYNC_PTR(*pteam)((void *)(*pteam)) && !TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) {
6067 /* we were just woken up, so run our new task */
6068 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn)((void *)((*pteam)->t.t_pkfn)) != NULL__null) {
6069 int rc;
6070 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n"
, gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam
)->t.t_pkfn); }
6071 ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n"
, gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam
)->t.t_pkfn); }
6072 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n"
, gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam
)->t.t_pkfn); }
6073 (*pteam)->t.t_pkfn))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n"
, gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam
)->t.t_pkfn); }
;
6074
6075 updateHWFPControl(*pteam);
6076
6077#if OMPT_SUPPORT1
6078 if (ompt_enabled.enabled) {
6079 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
6080 }
6081#endif
6082
6083 rc = (*pteam)->t.t_invoke(gtid);
6084 KMP_ASSERT(rc)if (!(rc)) { __kmp_debug_assert("rc", "openmp/runtime/src/kmp_runtime.cpp"
, 6084); }
;
6085
6086 KMP_MB();
6087 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n"
, gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam
)->t.t_pkfn); }
6088 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n"
, gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam
)->t.t_pkfn); }
6089 (*pteam)->t.t_pkfn))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n"
, gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam
)->t.t_pkfn); }
;
6090 }
6091#if OMPT_SUPPORT1
6092 if (ompt_enabled.enabled) {
6093 /* no frame set while outside task */
6094 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none{0};
6095
6096 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6097 }
6098#endif
6099 /* join barrier after parallel region */
6100 __kmp_join_barrier(gtid);
6101 }
6102 }
6103 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done)((void *)((intptr_t)__kmp_global.g.g_done));
6104
6105#if OMPD_SUPPORT1
6106 if (ompd_state & OMPD_ENABLE_BP0x1)
6107 ompd_bp_thread_end();
6108#endif
6109
6110#if OMPT_SUPPORT1
6111 if (ompt_enabled.ompt_callback_thread_end) {
6112 ompt_callbacks.ompt_callback(ompt_callback_thread_end)ompt_callback_thread_end_callback(thread_data);
6113 }
6114#endif
6115
6116 this_thr->th.th_task_team = NULL__null;
6117 /* run the destructors for the threadprivate data for this thread */
6118 __kmp_common_destroy_gtid(gtid);
6119
6120 KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_launch_thread: T#%d done\n"
, gtid); }
;
6121 KMP_MB();
6122
6123#if OMP_PROFILING_SUPPORT0
6124 llvm::timeTraceProfilerFinishThread();
6125#endif
6126 return this_thr;
6127}
6128
6129/* ------------------------------------------------------------------------ */
6130
6131void __kmp_internal_end_dest(void *specific_gtid) {
6132 // Make sure no significant bits are lost
6133 int gtid;
6134 __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, &gtid);
6135
6136 KA_TRACE(30, ("__kmp_internal_end_dest: T#%d\n", gtid))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_internal_end_dest: T#%d\n"
, gtid); }
;
6137 /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage
6138 * this is because 0 is reserved for the nothing-stored case */
6139
6140 __kmp_internal_end_thread(gtid);
6141}
6142
6143#if KMP_OS_UNIX1 && KMP_DYNAMIC_LIB1
6144
6145__attribute__((destructor)) void __kmp_internal_end_dtor(void) {
6146 __kmp_internal_end_atexit();
6147}
6148
6149#endif
6150
6151/* [Windows] josh: when the atexit handler is called, there may still be more
6152 than one thread alive */
6153void __kmp_internal_end_atexit(void) {
6154 KA_TRACE(30, ("__kmp_internal_end_atexit\n"))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_internal_end_atexit\n"
); }
;
6155 /* [Windows]
6156 josh: ideally, we want to completely shutdown the library in this atexit
6157 handler, but stat code that depends on thread specific data for gtid fails
6158 because that data becomes unavailable at some point during the shutdown, so
6159 we call __kmp_internal_end_thread instead. We should eventually remove the
6160 dependency on __kmp_get_specific_gtid in the stat code and use
6161 __kmp_internal_end_library to cleanly shutdown the library.
6162
6163 // TODO: Can some of this comment about GVS be removed?
6164 I suspect that the offending stat code is executed when the calling thread
6165 tries to clean up a dead root thread's data structures, resulting in GVS
6166 code trying to close the GVS structures for that thread, but since the stat
6167 code uses __kmp_get_specific_gtid to get the gtid with the assumption that
6168 the calling thread is cleaning up itself instead of another thread, it get
6169 confused. This happens because allowing a thread to unregister and cleanup
6170 another thread is a recent modification for addressing an issue.
6171 Based on the current design (20050722), a thread may end up
6172 trying to unregister another thread only if thread death does not trigger
6173 the calling of __kmp_internal_end_thread. For Linux* OS, there is the
6174 thread specific data destructor function to detect thread death. For
6175 Windows dynamic, there is DllMain(THREAD_DETACH). For Windows static, there
6176 is nothing. Thus, the workaround is applicable only for Windows static
6177 stat library. */
6178 __kmp_internal_end_library(-1);
6179#if KMP_OS_WINDOWS0
6180 __kmp_close_console();
6181#endif
6182}
6183
6184static void __kmp_reap_thread(kmp_info_t *thread, int is_root) {
6185 // It is assumed __kmp_forkjoin_lock is acquired.
6186
6187 int gtid;
6188
6189 KMP_DEBUG_ASSERT(thread != NULL)if (!(thread != __null)) { __kmp_debug_assert("thread != __null"
, "openmp/runtime/src/kmp_runtime.cpp", 6189); }
;
6190
6191 gtid = thread->th.th_info.ds.ds_gtid;
6192
6193 if (!is_root) {
6194 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) {
6195 /* Assume the threads are at the fork barrier here */
6196 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n"
, gtid); }
6197 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n"
, gtid); }
6198 gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n"
, gtid); }
;
6199 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
6200 while (
6201 !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3)__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(thread
->th.th_used_in_team)), (kmp_uint32)(0), (kmp_uint32)(3))
)
6202 KMP_CPU_PAUSE()__kmp_x86_pause();
6203 __kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL__null);
6204 } else {
6205 /* Need release fence here to prevent seg faults for tree forkjoin
6206 barrier (GEH) */
6207 kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
6208 thread);
6209 __kmp_release_64(&flag);
6210 }
6211 }
6212
6213 // Terminate OS thread.
6214 __kmp_reap_worker(thread);
6215
6216 // The thread was killed asynchronously. If it was actively
6217 // spinning in the thread pool, decrement the global count.
6218 //
6219 // There is a small timing hole here - if the worker thread was just waking
6220 // up after sleeping in the pool, had reset it's th_active_in_pool flag but
6221 // not decremented the global counter __kmp_thread_pool_active_nth yet, then
6222 // the global counter might not get updated.
6223 //
6224 // Currently, this can only happen as the library is unloaded,
6225 // so there are no harmful side effects.
6226 if (thread->th.th_active_in_pool) {
6227 thread->th.th_active_in_pool = FALSE0;
6228 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth)(&__kmp_thread_pool_active_nth)->fetch_sub(1, std::memory_order_acq_rel
)
;
6229 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0)if (!(__kmp_thread_pool_active_nth >= 0)) { __kmp_debug_assert
("__kmp_thread_pool_active_nth >= 0", "openmp/runtime/src/kmp_runtime.cpp"
, 6229); }
;
6230 }
6231 }
6232
6233 __kmp_free_implicit_task(thread);
6234
6235// Free the fast memory for tasking
6236#if USE_FAST_MEMORY3
6237 __kmp_free_fast_memory(thread);
6238#endif /* USE_FAST_MEMORY */
6239
6240 __kmp_suspend_uninitialize_thread(thread);
6241
6242 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread)if (!(__kmp_threads[gtid] == thread)) { __kmp_debug_assert("__kmp_threads[gtid] == thread"
, "openmp/runtime/src/kmp_runtime.cpp", 6242); }
;
6243 TCW_SYNC_PTR(__kmp_threads[gtid], NULL)((__kmp_threads[gtid])) = ((__null));
6244
6245 --__kmp_all_nth;
6246 // __kmp_nth was decremented when thread is added to the pool.
6247
6248#ifdef KMP_ADJUST_BLOCKTIME1
6249 /* Adjust blocktime back to user setting or default if necessary */
6250 /* Middle initialization might never have occurred */
6251 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6252 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0)if (!(__kmp_avail_proc > 0)) { __kmp_debug_assert("__kmp_avail_proc > 0"
, "openmp/runtime/src/kmp_runtime.cpp", 6252); }
;
6253 if (__kmp_nth <= __kmp_avail_proc) {
6254 __kmp_zero_bt = FALSE0;
6255 }
6256 }
6257#endif /* KMP_ADJUST_BLOCKTIME */
6258
6259 /* free the memory being used */
6260 if (__kmp_env_consistency_check) {
6261 if (thread->th.th_cons) {
6262 __kmp_free_cons_stack(thread->th.th_cons);
6263 thread->th.th_cons = NULL__null;
6264 }
6265 }
6266
6267 if (thread->th.th_pri_common != NULL__null) {
6268 __kmp_free(thread->th.th_pri_common)___kmp_free((thread->th.th_pri_common), "openmp/runtime/src/kmp_runtime.cpp"
, 6268)
;
6269 thread->th.th_pri_common = NULL__null;
6270 }
6271
6272 if (thread->th.th_task_state_memo_stack != NULL__null) {
6273 __kmp_free(thread->th.th_task_state_memo_stack)___kmp_free((thread->th.th_task_state_memo_stack), "openmp/runtime/src/kmp_runtime.cpp"
, 6273)
;
6274 thread->th.th_task_state_memo_stack = NULL__null;
6275 }
6276
6277#if KMP_USE_BGET1
6278 if (thread->th.th_local.bget_data != NULL__null) {
6279 __kmp_finalize_bget(thread);
6280 }
6281#endif
6282
6283#if KMP_AFFINITY_SUPPORTED1
6284 if (thread->th.th_affin_mask != NULL__null) {
6285 KMP_CPU_FREE(thread->th.th_affin_mask)__kmp_affinity_dispatch->deallocate_mask(thread->th.th_affin_mask
)
;
6286 thread->th.th_affin_mask = NULL__null;
6287 }
6288#endif /* KMP_AFFINITY_SUPPORTED */
6289
6290#if KMP_USE_HIER_SCHED0
6291 if (thread->th.th_hier_bar_data != NULL__null) {
6292 __kmp_free(thread->th.th_hier_bar_data)___kmp_free((thread->th.th_hier_bar_data), "openmp/runtime/src/kmp_runtime.cpp"
, 6292)
;
6293 thread->th.th_hier_bar_data = NULL__null;
6294 }
6295#endif
6296
6297 __kmp_reap_team(thread->th.th_serial_team);
6298 thread->th.th_serial_team = NULL__null;
6299 __kmp_free(thread)___kmp_free((thread), "openmp/runtime/src/kmp_runtime.cpp", 6299
)
;
6300
6301 KMP_MB();
6302
6303} // __kmp_reap_thread
6304
6305static void __kmp_itthash_clean(kmp_info_t *th) {
6306#if USE_ITT_NOTIFY1
6307 if (__kmp_itt_region_domains.count > 0) {
6308 for (int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6309 kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i];
6310 while (bucket) {
6311 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6312 __kmp_thread_free(th, bucket)___kmp_thread_free((th), (bucket), "openmp/runtime/src/kmp_runtime.cpp"
, 6312)
;
6313 bucket = next;
6314 }
6315 }
6316 }
6317 if (__kmp_itt_barrier_domains.count > 0) {
6318 for (int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6319 kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i];
6320 while (bucket) {
6321 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6322 __kmp_thread_free(th, bucket)___kmp_thread_free((th), (bucket), "openmp/runtime/src/kmp_runtime.cpp"
, 6322)
;
6323 bucket = next;
6324 }
6325 }
6326 }
6327#endif
6328}
6329
6330static void __kmp_internal_end(void) {
6331 int i;
6332
6333 /* First, unregister the library */
6334 __kmp_unregister_library();
6335
6336#if KMP_OS_WINDOWS0
6337 /* In Win static library, we can't tell when a root actually dies, so we
6338 reclaim the data structures for any root threads that have died but not
6339 unregistered themselves, in order to shut down cleanly.
6340 In Win dynamic library we also can't tell when a thread dies. */
6341 __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of
6342// dead roots
6343#endif
6344
6345 for (i = 0; i < __kmp_threads_capacity; i++)
6346 if (__kmp_root[i])
6347 if (__kmp_root[i]->r.r_active)
6348 break;
6349 KMP_MB(); /* Flush all pending memory write invalidates. */
6350 TCW_SYNC_4(__kmp_global.g.g_done, TRUE)(__kmp_global.g.g_done) = ((!0));
6351
6352 if (i < __kmp_threads_capacity) {
6353#if KMP_USE_MONITOR
6354 // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor??
6355 KMP_MB(); /* Flush all pending memory write invalidates. */
6356
6357 // Need to check that monitor was initialized before reaping it. If we are
6358 // called form __kmp_atfork_child (which sets __kmp_init_parallel = 0), then
6359 // __kmp_monitor will appear to contain valid data, but it is only valid in
6360 // the parent process, not the child.
6361 // New behavior (201008): instead of keying off of the flag
6362 // __kmp_init_parallel, the monitor thread creation is keyed off
6363 // of the new flag __kmp_init_monitor.
6364 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6365 if (TCR_4(__kmp_init_monitor)(__kmp_init_monitor)) {
6366 __kmp_reap_monitor(&__kmp_monitor);
6367 TCW_4(__kmp_init_monitor, 0)(__kmp_init_monitor) = (0);
6368 }
6369 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6370 KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end: monitor reaped\n"
); }
;
6371#endif // KMP_USE_MONITOR
6372 } else {
6373/* TODO move this to cleanup code */
6374#ifdef KMP_DEBUG1
6375 /* make sure that everything has properly ended */
6376 for (i = 0; i < __kmp_threads_capacity; i++) {
6377 if (__kmp_root[i]) {
6378 // KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC:
6379 // there can be uber threads alive here
6380 KMP_ASSERT(!__kmp_root[i]->r.r_active)if (!(!__kmp_root[i]->r.r_active)) { __kmp_debug_assert("!__kmp_root[i]->r.r_active"
, "openmp/runtime/src/kmp_runtime.cpp", 6380); }
; // TODO: can they be active?
6381 }
6382 }
6383#endif
6384
6385 KMP_MB();
6386
6387 // Reap the worker threads.
6388 // This is valid for now, but be careful if threads are reaped sooner.
6389 while (__kmp_thread_pool != NULL__null) { // Loop thru all the thread in the pool.
6390 // Get the next thread from the pool.
6391 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool)const_cast<kmp_info_t *>(__kmp_thread_pool);
6392 __kmp_thread_pool = thread->th.th_next_pool;
6393 // Reap it.
6394 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP)if (!(thread->th.th_reap_state == 1)) { __kmp_debug_assert
("thread->th.th_reap_state == 1", "openmp/runtime/src/kmp_runtime.cpp"
, 6394); }
;
6395 thread->th.th_next_pool = NULL__null;
6396 thread->th.th_in_pool = FALSE0;
6397 __kmp_reap_thread(thread, 0);
6398 }
6399 __kmp_thread_pool_insert_pt = NULL__null;
6400
6401 // Reap teams.
6402 while (__kmp_team_pool != NULL__null) { // Loop thru all the teams in the pool.
6403 // Get the next team from the pool.
6404 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool)const_cast<kmp_team_t *>(__kmp_team_pool);
6405 __kmp_team_pool = team->t.t_next_pool;
6406 // Reap it.
6407 team->t.t_next_pool = NULL__null;
6408 __kmp_reap_team(team);
6409 }
6410
6411 __kmp_reap_task_teams();
6412
6413#if KMP_OS_UNIX1
6414 // Threads that are not reaped should not access any resources since they
6415 // are going to be deallocated soon, so the shutdown sequence should wait
6416 // until all threads either exit the final spin-waiting loop or begin
6417 // sleeping after the given blocktime.
6418 for (i = 0; i < __kmp_threads_capacity; i++) {
6419 kmp_info_t *thr = __kmp_threads[i];
6420 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking)(&thr->th.th_blocking)->load(std::memory_order_acquire
)
)
6421 KMP_CPU_PAUSE()__kmp_x86_pause();
6422 }
6423#endif
6424
6425 for (i = 0; i < __kmp_threads_capacity; ++i) {
6426 // TBD: Add some checking...
6427 // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL );
6428 }
6429
6430 /* Make sure all threadprivate destructors get run by joining with all
6431 worker threads before resetting this flag */
6432 TCW_SYNC_4(__kmp_init_common, FALSE)(__kmp_init_common) = (0);
6433
6434 KA_TRACE(10, ("__kmp_internal_end: all workers reaped\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end: all workers reaped\n"
); }
;
6435 KMP_MB();
6436
6437#if KMP_USE_MONITOR
6438 // See note above: One of the possible fixes for CQ138434 / CQ140126
6439 //
6440 // FIXME: push both code fragments down and CSE them?
6441 // push them into __kmp_cleanup() ?
6442 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6443 if (TCR_4(__kmp_init_monitor)(__kmp_init_monitor)) {
6444 __kmp_reap_monitor(&__kmp_monitor);
6445 TCW_4(__kmp_init_monitor, 0)(__kmp_init_monitor) = (0);
6446 }
6447 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6448 KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end: monitor reaped\n"
); }
;
6449#endif
6450 } /* else !__kmp_global.t_active */
6451 TCW_4(__kmp_init_gtid, FALSE)(__kmp_init_gtid) = (0);
6452 KMP_MB(); /* Flush all pending memory write invalidates. */
6453
6454 __kmp_cleanup();
6455#if OMPT_SUPPORT1
6456 ompt_fini();
6457#endif
6458}
6459
6460void __kmp_internal_end_library(int gtid_req) {
6461 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
6462 /* this shouldn't be a race condition because __kmp_internal_end() is the
6463 only place to clear __kmp_serial_init */
6464 /* we'll check this later too, after we get the lock */
6465 // 2009-09-06: We do not set g_abort without setting g_done. This check looks
6466 // redundant, because the next check will work in any case.
6467 if (__kmp_global.g.g_abort) {
6468 KA_TRACE(11, ("__kmp_internal_end_library: abort, exiting\n"))if (kmp_a_debug >= 11) { __kmp_debug_printf ("__kmp_internal_end_library: abort, exiting\n"
); }
;
6469 /* TODO abort? */
6470 return;
6471 }
6472 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done) || !__kmp_init_serial) {
6473 KA_TRACE(10, ("__kmp_internal_end_library: already finished\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: already finished\n"
); }
;
6474 return;
6475 }
6476
6477 // If hidden helper team has been initialized, we need to deinit it
6478 if (TCR_4(__kmp_init_hidden_helper)(__kmp_init_hidden_helper) &&
6479 !TCR_4(__kmp_hidden_helper_team_done)(__kmp_hidden_helper_team_done)) {
6480 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE)(__kmp_hidden_helper_team_done) = ((!0));
6481 // First release the main thread to let it continue its work
6482 __kmp_hidden_helper_main_thread_release();
6483 // Wait until the hidden helper team has been destroyed
6484 __kmp_hidden_helper_threads_deinitz_wait();
6485 }
6486
6487 KMP_MB(); /* Flush all pending memory write invalidates. */
6488 /* find out who we are and what we should do */
6489 {
6490 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6491 KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: enter T#%d (%d)\n"
, gtid, gtid_req); }
6492 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: enter T#%d (%d)\n"
, gtid, gtid_req); }
;
6493 if (gtid == KMP_GTID_SHUTDOWN(-3)) {
6494 KA_TRACE(10, ("__kmp_internal_end_library: !__kmp_init_runtime, system "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: !__kmp_init_runtime, system "
"already shutdown\n"); }
6495 "already shutdown\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: !__kmp_init_runtime, system "
"already shutdown\n"); }
;
6496 return;
6497 } else if (gtid == KMP_GTID_MONITOR(-4)) {
6498 KA_TRACE(10, ("__kmp_internal_end_library: monitor thread, gtid not "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: monitor thread, gtid not "
"registered, or system shutdown\n"); }
6499 "registered, or system shutdown\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: monitor thread, gtid not "
"registered, or system shutdown\n"); }
;
6500 return;
6501 } else if (gtid == KMP_GTID_DNE(-2)) {
6502 KA_TRACE(10, ("__kmp_internal_end_library: gtid not registered or system "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: gtid not registered or system "
"shutdown\n"); }
6503 "shutdown\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: gtid not registered or system "
"shutdown\n"); }
;
6504 /* we don't know who we are, but we may still shutdown the library */
6505 } else if (KMP_UBER_GTID(gtid)) {
6506 /* unregister ourselves as an uber thread. gtid is no longer valid */
6507 if (__kmp_root[gtid]->r.r_active) {
6508 __kmp_global.g.g_abort = -1;
6509 TCW_SYNC_4(__kmp_global.g.g_done, TRUE)(__kmp_global.g.g_done) = ((!0));
6510 __kmp_unregister_library();
6511 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: root still active, abort T#%d\n"
, gtid); }
6512 ("__kmp_internal_end_library: root still active, abort T#%d\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: root still active, abort T#%d\n"
, gtid); }
6513 gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: root still active, abort T#%d\n"
, gtid); }
;
6514 return;
6515 } else {
6516 __kmp_itthash_clean(__kmp_threads[gtid]);
6517 KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: unregistering sibling T#%d\n"
, gtid); }
6518 10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: unregistering sibling T#%d\n"
, gtid); }
6519 ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: unregistering sibling T#%d\n"
, gtid); }
;
6520 __kmp_unregister_root_current_thread(gtid);
6521 }
6522 } else {
6523/* worker threads may call this function through the atexit handler, if they
6524 * call exit() */
6525/* For now, skip the usual subsequent processing and just dump the debug buffer.
6526 TODO: do a thorough shutdown instead */
6527#ifdef DUMP_DEBUG_ON_EXIT
6528 if (__kmp_debug_buf)
6529 __kmp_dump_debug_buffer();
6530#endif
6531 // added unregister library call here when we switch to shm linux
6532 // if we don't, it will leave lots of files in /dev/shm
6533 // cleanup shared memory file before exiting.
6534 __kmp_unregister_library();
6535 return;
6536 }
6537 }
6538 /* synchronize the termination process */
6539 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6540
6541 /* have we already finished */
6542 if (__kmp_global.g.g_abort) {
6543 KA_TRACE(10, ("__kmp_internal_end_library: abort, exiting\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: abort, exiting\n"
); }
;
6544 /* TODO abort? */
6545 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6546 return;
6547 }
6548 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done) || !__kmp_init_serial) {
6549 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6550 return;
6551 }
6552
6553 /* We need this lock to enforce mutex between this reading of
6554 __kmp_threads_capacity and the writing by __kmp_register_root.
6555 Alternatively, we can use a counter of roots that is atomically updated by
6556 __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and
6557 __kmp_internal_end_*. */
6558 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6559
6560 /* now we can safely conduct the actual termination */
6561 __kmp_internal_end();
6562
6563 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6564 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6565
6566 KA_TRACE(10, ("__kmp_internal_end_library: exit\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: exit\n"
); }
;
6567
6568#ifdef DUMP_DEBUG_ON_EXIT
6569 if (__kmp_debug_buf)
6570 __kmp_dump_debug_buffer();
6571#endif
6572
6573#if KMP_OS_WINDOWS0
6574 __kmp_close_console();
6575#endif
6576
6577 __kmp_fini_allocator();
6578
6579} // __kmp_internal_end_library
6580
6581void __kmp_internal_end_thread(int gtid_req) {
6582 int i;
6583
6584 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
6585 /* this shouldn't be a race condition because __kmp_internal_end() is the
6586 * only place to clear __kmp_serial_init */
6587 /* we'll check this later too, after we get the lock */
6588 // 2009-09-06: We do not set g_abort without setting g_done. This check looks
6589 // redundant, because the next check will work in any case.
6590 if (__kmp_global.g.g_abort) {
6591 KA_TRACE(11, ("__kmp_internal_end_thread: abort, exiting\n"))if (kmp_a_debug >= 11) { __kmp_debug_printf ("__kmp_internal_end_thread: abort, exiting\n"
); }
;
6592 /* TODO abort? */
6593 return;
6594 }
6595 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done) || !__kmp_init_serial) {
6596 KA_TRACE(10, ("__kmp_internal_end_thread: already finished\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: already finished\n"
); }
;
6597 return;
6598 }
6599
6600 // If hidden helper team has been initialized, we need to deinit it
6601 if (TCR_4(__kmp_init_hidden_helper)(__kmp_init_hidden_helper) &&
6602 !TCR_4(__kmp_hidden_helper_team_done)(__kmp_hidden_helper_team_done)) {
6603 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE)(__kmp_hidden_helper_team_done) = ((!0));
6604 // First release the main thread to let it continue its work
6605 __kmp_hidden_helper_main_thread_release();
6606 // Wait until the hidden helper team has been destroyed
6607 __kmp_hidden_helper_threads_deinitz_wait();
6608 }
6609
6610 KMP_MB(); /* Flush all pending memory write invalidates. */
6611
6612 /* find out who we are and what we should do */
6613 {
6614 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6615 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: enter T#%d (%d)\n"
, gtid, gtid_req); }
6616 ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: enter T#%d (%d)\n"
, gtid, gtid_req); }
;
6617 if (gtid == KMP_GTID_SHUTDOWN(-3)) {
6618 KA_TRACE(10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: !__kmp_init_runtime, system "
"already shutdown\n"); }
6619 "already shutdown\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: !__kmp_init_runtime, system "
"already shutdown\n"); }
;
6620 return;
6621 } else if (gtid == KMP_GTID_MONITOR(-4)) {
6622 KA_TRACE(10, ("__kmp_internal_end_thread: monitor thread, gtid not "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: monitor thread, gtid not "
"registered, or system shutdown\n"); }
6623 "registered, or system shutdown\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: monitor thread, gtid not "
"registered, or system shutdown\n"); }
;
6624 return;
6625 } else if (gtid == KMP_GTID_DNE(-2)) {
6626 KA_TRACE(10, ("__kmp_internal_end_thread: gtid not registered or system "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: gtid not registered or system "
"shutdown\n"); }
6627 "shutdown\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: gtid not registered or system "
"shutdown\n"); }
;
6628 return;
6629 /* we don't know who we are */
6630 } else if (KMP_UBER_GTID(gtid)) {
6631 /* unregister ourselves as an uber thread. gtid is no longer valid */
6632 if (__kmp_root[gtid]->r.r_active) {
6633 __kmp_global.g.g_abort = -1;
6634 TCW_SYNC_4(__kmp_global.g.g_done, TRUE)(__kmp_global.g.g_done) = ((!0));
6635 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: root still active, abort T#%d\n"
, gtid); }
6636 ("__kmp_internal_end_thread: root still active, abort T#%d\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: root still active, abort T#%d\n"
, gtid); }
6637 gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: root still active, abort T#%d\n"
, gtid); }
;
6638 return;
6639 } else {
6640 KA_TRACE(10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: unregistering sibling T#%d\n"
, gtid); }
6641 gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: unregistering sibling T#%d\n"
, gtid); }
;
6642 __kmp_unregister_root_current_thread(gtid);
6643 }
6644 } else {
6645 /* just a worker thread, let's leave */
6646 KA_TRACE(10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: worker thread T#%d\n"
, gtid); }
;
6647
6648 if (gtid >= 0) {
6649 __kmp_threads[gtid]->th.th_task_team = NULL__null;
6650 }
6651
6652 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n"
, gtid); }
6653 ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n"
, gtid); }
6654 gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n"
, gtid); }
;
6655 return;
6656 }
6657 }
6658#if KMP_DYNAMIC_LIB1
6659 if (__kmp_pause_status != kmp_hard_paused)
6660 // AC: lets not shutdown the dynamic library at the exit of uber thread,
6661 // because we will better shutdown later in the library destructor.
6662 {
6663 KA_TRACE(10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: exiting T#%d\n"
, gtid_req); }
;
6664 return;
6665 }
6666#endif
6667 /* synchronize the termination process */
6668 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6669
6670 /* have we already finished */
6671 if (__kmp_global.g.g_abort) {
6672 KA_TRACE(10, ("__kmp_internal_end_thread: abort, exiting\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: abort, exiting\n"
); }
;
6673 /* TODO abort? */
6674 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6675 return;
6676 }
6677 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done) || !__kmp_init_serial) {
6678 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6679 return;
6680 }
6681
6682 /* We need this lock to enforce mutex between this reading of
6683 __kmp_threads_capacity and the writing by __kmp_register_root.
6684 Alternatively, we can use a counter of roots that is atomically updated by
6685 __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and
6686 __kmp_internal_end_*. */
6687
6688 /* should we finish the run-time? are all siblings done? */
6689 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6690
6691 for (i = 0; i < __kmp_threads_capacity; ++i) {
6692 if (KMP_UBER_GTID(i)) {
6693 KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n"
, i); }
6694 10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n"
, i); }
6695 ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n"
, i); }
;
6696 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6697 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6698 return;
6699 }
6700 }
6701
6702 /* now we can safely conduct the actual termination */
6703
6704 __kmp_internal_end();
6705
6706 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6707 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6708
6709 KA_TRACE(10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: exit T#%d\n"
, gtid_req); }
;
6710
6711#ifdef DUMP_DEBUG_ON_EXIT
6712 if (__kmp_debug_buf)
6713 __kmp_dump_debug_buffer();
6714#endif
6715} // __kmp_internal_end_thread
6716
6717// -----------------------------------------------------------------------------
6718// Library registration stuff.
6719
6720static long __kmp_registration_flag = 0;
6721// Random value used to indicate library initialization.
6722static char *__kmp_registration_str = NULL__null;
6723// Value to be saved in env var __KMP_REGISTERED_LIB_<pid>.
6724
6725static inline char *__kmp_reg_status_name() {
6726/* On RHEL 3u5 if linked statically, getpid() returns different values in
6727 each thread. If registration and unregistration go in different threads
6728 (omp_misc_other_root_exit.cpp test case), the name of registered_lib_env
6729 env var can not be found, because the name will contain different pid. */
6730// macOS* complains about name being too long with additional getuid()
6731#if KMP_OS_UNIX1 && !KMP_OS_DARWIN0 && KMP_DYNAMIC_LIB1
6732 return __kmp_str_format("__KMP_REGISTERED_LIB_%d_%d", (int)getpid(),
6733 (int)getuid());
6734#else
6735 return __kmp_str_format("__KMP_REGISTERED_LIB_%d", (int)getpid());
6736#endif
6737} // __kmp_reg_status_get
6738
6739#if defined(KMP_USE_SHM)
6740// If /dev/shm is not accessible, we will create a temporary file under /tmp.
6741char *temp_reg_status_file_name = nullptr;
6742#endif
6743
6744void __kmp_register_library_startup(void) {
6745
6746 char *name = __kmp_reg_status_name(); // Name of the environment variable.
6747 int done = 0;
6748 union {
6749 double dtime;
6750 long ltime;
6751 } time;
6752#if KMP_ARCH_X860 || KMP_ARCH_X86_641
6753 __kmp_initialize_system_tick();
6754#endif
6755 __kmp_read_system_time(&time.dtime);
6756 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6757 __kmp_registration_str =
6758 __kmp_str_format("%p-%lx-%s", &__kmp_registration_flag,
6759 __kmp_registration_flag, KMP_LIBRARY_FILE"libomp.so.5");
6760
6761 KA_TRACE(50, ("__kmp_register_library_startup: %s=\"%s\"\n", name,if (kmp_a_debug >= 50) { __kmp_debug_printf ("__kmp_register_library_startup: %s=\"%s\"\n"
, name, __kmp_registration_str); }
6762 __kmp_registration_str))if (kmp_a_debug >= 50) { __kmp_debug_printf ("__kmp_register_library_startup: %s=\"%s\"\n"
, name, __kmp_registration_str); }
;
6763
6764 while (!done) {
6765
6766 char *value = NULL__null; // Actual value of the environment variable.
6767
6768#if defined(KMP_USE_SHM)
6769 char *shm_name = __kmp_str_format("/%s", name);
6770 int shm_preexist = 0;
6771 char *data1;
6772 int fd1 = shm_open(shm_name, O_CREAT0100 | O_EXCL0200 | O_RDWR02, 0666);
6773 if ((fd1 == -1) && (errno(*__errno_location ()) == EEXIST17)) {
6774 // file didn't open because it already exists.
6775 // try opening existing file
6776 fd1 = shm_open(shm_name, O_RDWR02, 0666);
6777 if (fd1 == -1) { // file didn't open
6778 // error out here
6779 __kmp_fatal(KMP_MSG(FunctionError, "Can't open SHM")__kmp_msg_format(kmp_i18n_msg_FunctionError, "Can't open SHM"
)
, KMP_ERR(0)__kmp_msg_error_code(0),
6780 __kmp_msg_null);
6781 } else {
6782 // able to open existing file
6783 shm_preexist = 1;
6784 }
6785 } else if (fd1 == -1) {
6786 // SHM didn't open; it was due to error other than already exists. Try to
6787 // create a temp file under /tmp.
6788 // TODO: /tmp might not always be the temporary directory. For now we will
6789 // not consider TMPDIR. If /tmp is not accessible, we simply error out.
6790 char *temp_file_name = __kmp_str_format("/tmp/%sXXXXXX", name);
6791 fd1 = mkstemp(temp_file_name);
6792 if (fd1 == -1) {
6793 // error out here.
6794 __kmp_fatal(KMP_MSG(FunctionError, "Can't open TEMP")__kmp_msg_format(kmp_i18n_msg_FunctionError, "Can't open TEMP"
)
, KMP_ERR(errno)__kmp_msg_error_code((*__errno_location ())),
6795 __kmp_msg_null);
6796 }
6797 temp_reg_status_file_name = temp_file_name;
6798 }
6799 if (shm_preexist == 0) {
6800 // we created SHM now set size
6801 if (ftruncate(fd1, SHM_SIZE1024) == -1) {
6802 // error occured setting size;
6803 __kmp_fatal(KMP_MSG(FunctionError, "Can't set size of SHM")__kmp_msg_format(kmp_i18n_msg_FunctionError, "Can't set size of SHM"
)
,
6804 KMP_ERR(errno)__kmp_msg_error_code((*__errno_location ())), __kmp_msg_null);
6805 }
6806 }
6807 data1 =
6808 (char *)mmap(0, SHM_SIZE1024, PROT_READ0x1 | PROT_WRITE0x2, MAP_SHARED0x01, fd1, 0);
6809 if (data1 == MAP_FAILED((void *) -1)) {
6810 // failed to map shared memory
6811 __kmp_fatal(KMP_MSG(FunctionError, "Can't map SHM")__kmp_msg_format(kmp_i18n_msg_FunctionError, "Can't map SHM"), KMP_ERR(errno)__kmp_msg_error_code((*__errno_location ())),
6812 __kmp_msg_null);
6813 }
6814 if (shm_preexist == 0) { // set data to SHM, set value
6815 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str)strcpy(data1, __kmp_registration_str);
6816 }
6817 // Read value from either what we just wrote or existing file.
6818 value = __kmp_str_format("%s", data1); // read value from SHM
6819 munmap(data1, SHM_SIZE1024);
6820 close(fd1);
6821#else // Windows and unix with static library
6822 // Set environment variable, but do not overwrite if it is exist.
6823 __kmp_env_set(name, __kmp_registration_str, 0);
6824 // read value to see if it got set
6825 value = __kmp_env_get(name);
6826#endif
6827
6828 if (value != NULL__null && strcmp(value, __kmp_registration_str) == 0) {
6829 done = 1; // Ok, environment variable set successfully, exit the loop.
6830 } else {
6831 // Oops. Write failed. Another copy of OpenMP RTL is in memory.
6832 // Check whether it alive or dead.
6833 int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead.
6834 char *tail = value;
6835 char *flag_addr_str = NULL__null;
6836 char *flag_val_str = NULL__null;
6837 char const *file_name = NULL__null;
6838 __kmp_str_split(tail, '-', &flag_addr_str, &tail);
6839 __kmp_str_split(tail, '-', &flag_val_str, &tail);
6840 file_name = tail;
6841 if (tail != NULL__null) {
6842 unsigned long *flag_addr = 0;
6843 unsigned long flag_val = 0;
6844 KMP_SSCANFsscanf(flag_addr_str, "%p", RCAST(void **, &flag_addr)reinterpret_cast<void **>(&flag_addr));
6845 KMP_SSCANFsscanf(flag_val_str, "%lx", &flag_val);
6846 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name, "") != 0) {
6847 // First, check whether environment-encoded address is mapped into
6848 // addr space.
6849 // If so, dereference it to see if it still has the right value.
6850 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6851 neighbor = 1;
6852 } else {
6853 // If not, then we know the other copy of the library is no longer
6854 // running.
6855 neighbor = 2;
6856 }
6857 }
6858 }
6859 switch (neighbor) {
6860 case 0: // Cannot parse environment variable -- neighbor status unknown.
6861 // Assume it is the incompatible format of future version of the
6862 // library. Assume the other library is alive.
6863 // WARN( ... ); // TODO: Issue a warning.
6864 file_name = "unknown library";
6865 KMP_FALLTHROUGH()[[fallthrough]];
6866 // Attention! Falling to the next case. That's intentional.
6867 case 1: { // Neighbor is alive.
6868 // Check it is allowed.
6869 char *duplicate_ok = __kmp_env_get("KMP_DUPLICATE_LIB_OK");
6870 if (!__kmp_str_match_true(duplicate_ok)) {
6871 // That's not allowed. Issue fatal error.
6872 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name)__kmp_msg_format(kmp_i18n_msg_DuplicateLibrary, "libomp.so.5"
, file_name)
,
6873 KMP_HNT(DuplicateLibrary)__kmp_msg_format(kmp_i18n_hnt_DuplicateLibrary), __kmp_msg_null);
6874 }
6875 KMP_INTERNAL_FREE(duplicate_ok)free(duplicate_ok);
6876 __kmp_duplicate_library_ok = 1;
6877 done = 1; // Exit the loop.
6878 } break;
6879 case 2: { // Neighbor is dead.
6880
6881#if defined(KMP_USE_SHM)
6882 // close shared memory.
6883 shm_unlink(shm_name); // this removes file in /dev/shm
6884#else
6885 // Clear the variable and try to register library again.
6886 __kmp_env_unset(name);
6887#endif
6888 } break;
6889 default: {
6890 KMP_DEBUG_ASSERT(0)if (!(0)) { __kmp_debug_assert("0", "openmp/runtime/src/kmp_runtime.cpp"
, 6890); }
;
6891 } break;
6892 }
6893 }
6894 KMP_INTERNAL_FREE((void *)value)free((void *)value);
6895#if defined(KMP_USE_SHM)
6896 KMP_INTERNAL_FREE((void *)shm_name)free((void *)shm_name);
6897#endif
6898 } // while
6899 KMP_INTERNAL_FREE((void *)name)free((void *)name);
6900
6901} // func __kmp_register_library_startup
6902
6903void __kmp_unregister_library(void) {
6904
6905 char *name = __kmp_reg_status_name();
6906 char *value = NULL__null;
6907
6908#if defined(KMP_USE_SHM)
6909 bool use_shm = true;
6910 char *shm_name = __kmp_str_format("/%s", name);
6911 int fd1 = shm_open(shm_name, O_RDONLY00, 0666);
6912 if (fd1 == -1) {
6913 // File did not open. Try the temporary file.
6914 use_shm = false;
6915 KMP_DEBUG_ASSERT(temp_reg_status_file_name)if (!(temp_reg_status_file_name)) { __kmp_debug_assert("temp_reg_status_file_name"
, "openmp/runtime/src/kmp_runtime.cpp", 6915); }
;
6916 fd1 = open(temp_reg_status_file_name, O_RDONLY00);
6917 if (fd1 == -1) {
6918 // give it up now.
6919 return;
6920 }
6921 }
6922 char *data1 = (char *)mmap(0, SHM_SIZE1024, PROT_READ0x1, MAP_SHARED0x01, fd1, 0);
6923 if (data1 != MAP_FAILED((void *) -1)) {
6924 value = __kmp_str_format("%s", data1); // read value from SHM
6925 munmap(data1, SHM_SIZE1024);
6926 }
6927 close(fd1);
6928#else
6929 value = __kmp_env_get(name);
6930#endif
6931
6932 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0)if (!(__kmp_registration_flag != 0)) { __kmp_debug_assert("__kmp_registration_flag != 0"
, "openmp/runtime/src/kmp_runtime.cpp", 6932); }
;
6933 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL)if (!(__kmp_registration_str != __null)) { __kmp_debug_assert
("__kmp_registration_str != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 6933); }
;
6934 if (value != NULL__null && strcmp(value, __kmp_registration_str) == 0) {
6935// Ok, this is our variable. Delete it.
6936#if defined(KMP_USE_SHM)
6937 if (use_shm) {
6938 shm_unlink(shm_name); // this removes file in /dev/shm
6939 } else {
6940 KMP_DEBUG_ASSERT(temp_reg_status_file_name)if (!(temp_reg_status_file_name)) { __kmp_debug_assert("temp_reg_status_file_name"
, "openmp/runtime/src/kmp_runtime.cpp", 6940); }
;
6941 unlink(temp_reg_status_file_name); // this removes the temp file
6942 }
6943#else
6944 __kmp_env_unset(name);
6945#endif
6946 }
6947
6948#if defined(KMP_USE_SHM)
6949 KMP_INTERNAL_FREE(shm_name)free(shm_name);
6950 if (!use_shm) {
6951 KMP_DEBUG_ASSERT(temp_reg_status_file_name)if (!(temp_reg_status_file_name)) { __kmp_debug_assert("temp_reg_status_file_name"
, "openmp/runtime/src/kmp_runtime.cpp", 6951); }
;
6952 KMP_INTERNAL_FREE(temp_reg_status_file_name)free(temp_reg_status_file_name);
6953 }
6954#endif
6955
6956 KMP_INTERNAL_FREE(__kmp_registration_str)free(__kmp_registration_str);
6957 KMP_INTERNAL_FREE(value)free(value);
6958 KMP_INTERNAL_FREE(name)free(name);
6959
6960 __kmp_registration_flag = 0;
6961 __kmp_registration_str = NULL__null;
6962
6963} // __kmp_unregister_library
6964
6965// End of Library registration stuff.
6966// -----------------------------------------------------------------------------
6967
6968#if KMP_MIC_SUPPORTED((0 || 1) && (1 || 0))
6969
6970static void __kmp_check_mic_type() {
6971 kmp_cpuid_t cpuid_state = {0};
6972 kmp_cpuid_t *cs_p = &cpuid_state;
6973 __kmp_x86_cpuid(1, 0, cs_p);
6974 // We don't support mic1 at the moment
6975 if ((cs_p->eax & 0xff0) == 0xB10) {
6976 __kmp_mic_type = mic2;
6977 } else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6978 __kmp_mic_type = mic3;
6979 } else {
6980 __kmp_mic_type = non_mic;
6981 }
6982}
6983
6984#endif /* KMP_MIC_SUPPORTED */
6985
6986#if KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0)
6987static void __kmp_user_level_mwait_init() {
6988 struct kmp_cpuid buf;
6989 __kmp_x86_cpuid(7, 0, &buf);
6990 __kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1);
6991 __kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait;
6992 __kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0);
6993 KF_TRACE(30, ("__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",if (kmp_f_debug >= 30) { __kmp_debug_printf ("__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n"
, __kmp_umwait_enabled); }
6994 __kmp_umwait_enabled))if (kmp_f_debug >= 30) { __kmp_debug_printf ("__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n"
, __kmp_umwait_enabled); }
;
6995}
6996#elif KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0)
6997#ifndef AT_INTELPHIUSERMWAIT
6998// Spurious, non-existent value that should always fail to return anything.
6999// Will be replaced with the correct value when we know that.
7000#define AT_INTELPHIUSERMWAIT 10000
7001#endif
7002// getauxval() function is available in RHEL7 and SLES12. If a system with an
7003// earlier OS is used to build the RTL, we'll use the following internal
7004// function when the entry is not found.
7005unsigned long getauxval(unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
7006unsigned long getauxval(unsigned long) { return 0; }
7007
7008static void __kmp_user_level_mwait_init() {
7009 // When getauxval() and correct value of AT_INTELPHIUSERMWAIT are available
7010 // use them to find if the user-level mwait is enabled. Otherwise, forcibly
7011 // set __kmp_mwait_enabled=TRUE on Intel MIC if the environment variable
7012 // KMP_USER_LEVEL_MWAIT was set to TRUE.
7013 if (__kmp_mic_type == mic3) {
7014 unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
7015 if ((res & 0x1) || __kmp_user_level_mwait) {
7016 __kmp_mwait_enabled = TRUE(!0);
7017 if (__kmp_user_level_mwait) {
7018 KMP_INFORM(EnvMwaitWarn)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_EnvMwaitWarn
), __kmp_msg_null)
;
7019 }
7020 } else {
7021 __kmp_mwait_enabled = FALSE0;
7022 }
7023 }
7024 KF_TRACE(30, ("__kmp_user_level_mwait_init: __kmp_mic_type = %d, "if (kmp_f_debug >= 30) { __kmp_debug_printf ("__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
"__kmp_mwait_enabled = %d\n", __kmp_mic_type, __kmp_mwait_enabled
); }
7025 "__kmp_mwait_enabled = %d\n",if (kmp_f_debug >= 30) { __kmp_debug_printf ("__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
"__kmp_mwait_enabled = %d\n", __kmp_mic_type, __kmp_mwait_enabled
); }
7026 __kmp_mic_type, __kmp_mwait_enabled))if (kmp_f_debug >= 30) { __kmp_debug_printf ("__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
"__kmp_mwait_enabled = %d\n", __kmp_mic_type, __kmp_mwait_enabled
); }
;
7027}
7028#endif /* KMP_HAVE_UMWAIT */
7029
7030static void __kmp_do_serial_initialize(void) {
7031 int i, gtid;
7032 size_t size;
7033
7034 KA_TRACE(10, ("__kmp_do_serial_initialize: enter\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_do_serial_initialize: enter\n"
); }
;
7035
7036 KMP_DEBUG_ASSERT(sizeof(kmp_int32) == 4)if (!(sizeof(kmp_int32) == 4)) { __kmp_debug_assert("sizeof(kmp_int32) == 4"
, "openmp/runtime/src/kmp_runtime.cpp", 7036); }
;
7037 KMP_DEBUG_ASSERT(sizeof(kmp_uint32) == 4)if (!(sizeof(kmp_uint32) == 4)) { __kmp_debug_assert("sizeof(kmp_uint32) == 4"
, "openmp/runtime/src/kmp_runtime.cpp", 7037); }
;
7038 KMP_DEBUG_ASSERT(sizeof(kmp_int64) == 8)if (!(sizeof(kmp_int64) == 8)) { __kmp_debug_assert("sizeof(kmp_int64) == 8"
, "openmp/runtime/src/kmp_runtime.cpp", 7038); }
;
7039 KMP_DEBUG_ASSERT(sizeof(kmp_uint64) == 8)if (!(sizeof(kmp_uint64) == 8)) { __kmp_debug_assert("sizeof(kmp_uint64) == 8"
, "openmp/runtime/src/kmp_runtime.cpp", 7039); }
;
7040 KMP_DEBUG_ASSERT(sizeof(kmp_intptr_t) == sizeof(void *))if (!(sizeof(kmp_intptr_t) == sizeof(void *))) { __kmp_debug_assert
("sizeof(kmp_intptr_t) == sizeof(void *)", "openmp/runtime/src/kmp_runtime.cpp"
, 7040); }
;
7041
7042#if OMPT_SUPPORT1
7043 ompt_pre_init();
7044#endif
7045#if OMPD_SUPPORT1
7046 __kmp_env_dump();
7047 ompd_init();
7048#endif
7049
7050 __kmp_validate_locks();
7051
7052#if ENABLE_LIBOMPTARGET1
7053 /* Initialize functions from libomptarget */
7054 __kmp_init_omptarget();
7055#endif
7056
7057 /* Initialize internal memory allocator */
7058 __kmp_init_allocator();
7059
7060 /* Register the library startup via an environment variable or via mapped
7061 shared memory file and check to see whether another copy of the library is
7062 already registered. Since forked child process is often terminated, we
7063 postpone the registration till middle initialization in the child */
7064 if (__kmp_need_register_serial)
7065 __kmp_register_library_startup();
7066
7067 /* TODO reinitialization of library */
7068 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) {
7069 KA_TRACE(10, ("__kmp_do_serial_initialize: reinitialization of library\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_do_serial_initialize: reinitialization of library\n"
); }
;
7070 }
7071
7072 __kmp_global.g.g_abort = 0;
7073 TCW_SYNC_4(__kmp_global.g.g_done, FALSE)(__kmp_global.g.g_done) = (0);
7074
7075/* initialize the locks */
7076#if KMP_USE_ADAPTIVE_LOCKS(0 || 1) && !0
7077#if KMP_DEBUG_ADAPTIVE_LOCKS0
7078 __kmp_init_speculative_stats();
7079#endif
7080#endif
7081#if KMP_STATS_ENABLED0
7082 __kmp_stats_init();
7083#endif
7084 __kmp_init_lock(&__kmp_global_lock);
7085 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
7086 __kmp_init_lock(&__kmp_debug_lock);
7087 __kmp_init_atomic_lock(&__kmp_atomic_lock);
7088 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
7089 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
7090 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
7091 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
7092 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
7093 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
7094 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
7095 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
7096 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
7097 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
7098 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
7099 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
7100 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
7101 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
7102#if KMP_USE_MONITOR
7103 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
7104#endif
7105 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
7106
7107 /* conduct initialization and initial setup of configuration */
7108
7109 __kmp_runtime_initialize();
7110
7111#if KMP_MIC_SUPPORTED((0 || 1) && (1 || 0))
7112 __kmp_check_mic_type();
7113#endif
7114
7115// Some global variable initialization moved here from kmp_env_initialize()
7116#ifdef KMP_DEBUG1
7117 kmp_diag = 0;
7118#endif
7119 __kmp_abort_delay = 0;
7120
7121 // From __kmp_init_dflt_team_nth()
7122 /* assume the entire machine will be used */
7123 __kmp_dflt_team_nth_ub = __kmp_xproc;
7124 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH1) {
7125 __kmp_dflt_team_nth_ub = KMP_MIN_NTH1;
7126 }
7127 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
7128 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
7129 }
7130 __kmp_max_nth = __kmp_sys_max_nth;
7131 __kmp_cg_max_nth = __kmp_sys_max_nth;
7132 __kmp_teams_max_nth = __kmp_xproc; // set a "reasonable" default
7133 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
7134 __kmp_teams_max_nth = __kmp_sys_max_nth;
7135 }
7136
7137 // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME"
7138 // part
7139 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME(__kmp_is_hybrid_cpu() ? (0) : (200));
7140#if KMP_USE_MONITOR
7141 __kmp_monitor_wakeups =
7142 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7143 __kmp_bt_intervals =
7144 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7145#endif
7146 // From "KMP_LIBRARY" part of __kmp_env_initialize()
7147 __kmp_library = library_throughput;
7148 // From KMP_SCHEDULE initialization
7149 __kmp_static = kmp_sch_static_balanced;
7150// AC: do not use analytical here, because it is non-monotonous
7151//__kmp_guided = kmp_sch_guided_iterative_chunked;
7152//__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no
7153// need to repeat assignment
7154// Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch
7155// bit control and barrier method control parts
7156#if KMP_FAST_REDUCTION_BARRIER1
7157#define kmp_reduction_barrier_gather_bb ((int)1)
7158#define kmp_reduction_barrier_release_bb ((int)1)
7159#define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
7160#define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
7161#endif // KMP_FAST_REDUCTION_BARRIER
7162 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
7163 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
7164 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
7165 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
7166 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
7167#if KMP_FAST_REDUCTION_BARRIER1
7168 if (i == bs_reduction_barrier) { // tested and confirmed on ALTIX only (
7169 // lin_64 ): hyper,1
7170 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
7171 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
7172 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
7173 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
7174 }
7175#endif // KMP_FAST_REDUCTION_BARRIER
7176 }
7177#if KMP_FAST_REDUCTION_BARRIER1
7178#undef kmp_reduction_barrier_release_pat
7179#undef kmp_reduction_barrier_gather_pat
7180#undef kmp_reduction_barrier_release_bb
7181#undef kmp_reduction_barrier_gather_bb
7182#endif // KMP_FAST_REDUCTION_BARRIER
7183#if KMP_MIC_SUPPORTED((0 || 1) && (1 || 0))
7184 if (__kmp_mic_type == mic2) { // KNC
7185 // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC
7186 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3; // plain gather
7187 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
7188 1; // forkjoin release
7189 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7190 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7191 }
7192#if KMP_FAST_REDUCTION_BARRIER1
7193 if (__kmp_mic_type == mic2) { // KNC
7194 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7195 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7196 }
7197#endif // KMP_FAST_REDUCTION_BARRIER
7198#endif // KMP_MIC_SUPPORTED
7199
7200// From KMP_CHECKS initialization
7201#ifdef KMP_DEBUG1
7202 __kmp_env_checks = TRUE(!0); /* development versions have the extra checks */
7203#else
7204 __kmp_env_checks = FALSE0; /* port versions do not have the extra checks */
7205#endif
7206
7207 // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization
7208 __kmp_foreign_tp = TRUE(!0);
7209
7210 __kmp_global.g.g_dynamic = FALSE0;
7211 __kmp_global.g.g_dynamic_mode = dynamic_default;
7212
7213 __kmp_init_nesting_mode();
7214
7215 __kmp_env_initialize(NULL__null);
7216
7217#if KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0) || KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0)
7218 __kmp_user_level_mwait_init();
7219#endif
7220// Print all messages in message catalog for testing purposes.
7221#ifdef KMP_DEBUG1
7222 char const *val = __kmp_env_get("KMP_DUMP_CATALOG");
7223 if (__kmp_str_match_true(val)) {
7224 kmp_str_buf_t buffer;
7225 __kmp_str_buf_init(&buffer){ (&buffer)->str = (&buffer)->bulk; (&buffer
)->size = sizeof((&buffer)->bulk); (&buffer)->
used = 0; (&buffer)->bulk[0] = 0; }
;
7226 __kmp_i18n_dump_catalog(&buffer);
7227 __kmp_printf("%s", buffer.str);
7228 __kmp_str_buf_free(&buffer);
7229 }
7230 __kmp_env_free(&val);
7231#endif
7232
7233 __kmp_threads_capacity =
7234 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
7235 // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part
7236 __kmp_tp_capacity = __kmp_default_tp_capacity(
7237 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
7238
7239 // If the library is shut down properly, both pools must be NULL. Just in
7240 // case, set them to NULL -- some memory may leak, but subsequent code will
7241 // work even if pools are not freed.
7242 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL)if (!(__kmp_thread_pool == __null)) { __kmp_debug_assert("__kmp_thread_pool == __null"
, "openmp/runtime/src/kmp_runtime.cpp", 7242); }
;
7243 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL)if (!(__kmp_thread_pool_insert_pt == __null)) { __kmp_debug_assert
("__kmp_thread_pool_insert_pt == __null", "openmp/runtime/src/kmp_runtime.cpp"
, 7243); }
;
7244 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL)if (!(__kmp_team_pool == __null)) { __kmp_debug_assert("__kmp_team_pool == __null"
, "openmp/runtime/src/kmp_runtime.cpp", 7244); }
;
7245 __kmp_thread_pool = NULL__null;
7246 __kmp_thread_pool_insert_pt = NULL__null;
7247 __kmp_team_pool = NULL__null;
7248
7249 /* Allocate all of the variable sized records */
7250 /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are
7251 * expandable */
7252 /* Since allocation is cache-aligned, just add extra padding at the end */
7253 size =
7254 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * __kmp_threads_capacity +
7255 CACHE_LINE64;
7256 __kmp_threads = (kmp_info_t **)__kmp_allocate(size)___kmp_allocate((size), "openmp/runtime/src/kmp_runtime.cpp",
7256)
;
7257 __kmp_root = (kmp_root_t **)((char *)__kmp_threads +
7258 sizeof(kmp_info_t *) * __kmp_threads_capacity);
7259
7260 /* init thread counts */
7261 KMP_DEBUG_ASSERT(__kmp_all_nth ==if (!(__kmp_all_nth == 0)) { __kmp_debug_assert("__kmp_all_nth == 0"
, "openmp/runtime/src/kmp_runtime.cpp", 7262); }
7262 0)if (!(__kmp_all_nth == 0)) { __kmp_debug_assert("__kmp_all_nth == 0"
, "openmp/runtime/src/kmp_runtime.cpp", 7262); }
; // Asserts fail if the library is reinitializing and
7263 KMP_DEBUG_ASSERT(__kmp_nth == 0)if (!(__kmp_nth == 0)) { __kmp_debug_assert("__kmp_nth == 0",
"openmp/runtime/src/kmp_runtime.cpp", 7263); }
; // something was wrong in termination.
7264 __kmp_all_nth = 0;
7265 __kmp_nth = 0;
7266
7267 /* setup the uber master thread and hierarchy */
7268 gtid = __kmp_register_root(TRUE(!0));
7269 KA_TRACE(10, ("__kmp_do_serial_initialize T#%d\n", gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_do_serial_initialize T#%d\n"
, gtid); }
;
7270 KMP_ASSERT(KMP_UBER_GTID(gtid))if (!(KMP_UBER_GTID(gtid))) { __kmp_debug_assert("KMP_UBER_GTID(gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 7270); }
;
7271 KMP_ASSERT(KMP_INITIAL_GTID(gtid))if (!((0 == (gtid)))) { __kmp_debug_assert("KMP_INITIAL_GTID(gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 7271); }
;
7272
7273 KMP_MB(); /* Flush all pending memory write invalidates. */
7274
7275 __kmp_common_initialize();
7276
7277#if KMP_OS_UNIX1
7278 /* invoke the child fork handler */
7279 __kmp_register_atfork();
7280#endif
7281
7282#if !KMP_DYNAMIC_LIB1 || \
7283 ((KMP_COMPILER_ICC0 || KMP_COMPILER_ICX0) && KMP_OS_DARWIN0)
7284 {
7285 /* Invoke the exit handler when the program finishes, only for static
7286 library and macOS* dynamic. For other dynamic libraries, we already
7287 have _fini and DllMain. */
7288 int rc = atexit(__kmp_internal_end_atexit);
7289 if (rc != 0) {
7290 __kmp_fatal(KMP_MSG(FunctionError, "atexit()")__kmp_msg_format(kmp_i18n_msg_FunctionError, "atexit()"), KMP_ERR(rc)__kmp_msg_error_code(rc),
7291 __kmp_msg_null);
7292 }
7293 }
7294#endif
7295
7296#if KMP_HANDLE_SIGNALS(1 || 0)
7297#if KMP_OS_UNIX1
7298 /* NOTE: make sure that this is called before the user installs their own
7299 signal handlers so that the user handlers are called first. this way they
7300 can return false, not call our handler, avoid terminating the library, and
7301 continue execution where they left off. */
7302 __kmp_install_signals(FALSE0);
7303#endif /* KMP_OS_UNIX */
7304#if KMP_OS_WINDOWS0
7305 __kmp_install_signals(TRUE(!0));
7306#endif /* KMP_OS_WINDOWS */
7307#endif
7308
7309 /* we have finished the serial initialization */
7310 __kmp_init_counter++;
7311
7312 __kmp_init_serial = TRUE(!0);
7313
7314 if (__kmp_settings) {
7315 __kmp_env_print();
7316 }
7317
7318 if (__kmp_display_env || __kmp_display_env_verbose) {
7319 __kmp_env_print_2();
7320 }
7321
7322#if OMPT_SUPPORT1
7323 ompt_post_init();
7324#endif
7325
7326 KMP_MB();
7327
7328 KA_TRACE(10, ("__kmp_do_serial_initialize: exit\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_do_serial_initialize: exit\n"
); }
;
7329}
7330
7331void __kmp_serial_initialize(void) {
7332 if (__kmp_init_serial) {
7333 return;
7334 }
7335 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7336 if (__kmp_init_serial) {
7337 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7338 return;
7339 }
7340 __kmp_do_serial_initialize();
7341 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7342}
7343
7344static void __kmp_do_middle_initialize(void) {
7345 int i, j;
7346 int prev_dflt_team_nth;
7347
7348 if (!__kmp_init_serial) {
7349 __kmp_do_serial_initialize();
7350 }
7351
7352 KA_TRACE(10, ("__kmp_middle_initialize: enter\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_middle_initialize: enter\n"
); }
;
7353
7354 if (UNLIKELY(!__kmp_need_register_serial)__builtin_expect(!!(!__kmp_need_register_serial), 0)) {
7355 // We are in a forked child process. The registration was skipped during
7356 // serial initialization in __kmp_atfork_child handler. Do it here.
7357 __kmp_register_library_startup();
7358 }
7359
7360 // Save the previous value for the __kmp_dflt_team_nth so that
7361 // we can avoid some reinitialization if it hasn't changed.
7362 prev_dflt_team_nth = __kmp_dflt_team_nth;
7363
7364#if KMP_AFFINITY_SUPPORTED1
7365 // __kmp_affinity_initialize() will try to set __kmp_ncores to the
7366 // number of cores on the machine.
7367 __kmp_affinity_initialize(__kmp_affinity);
7368
7369#endif /* KMP_AFFINITY_SUPPORTED */
7370
7371 KMP_ASSERT(__kmp_xproc > 0)if (!(__kmp_xproc > 0)) { __kmp_debug_assert("__kmp_xproc > 0"
, "openmp/runtime/src/kmp_runtime.cpp", 7371); }
;
7372 if (__kmp_avail_proc == 0) {
7373 __kmp_avail_proc = __kmp_xproc;
7374 }
7375
7376 // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3),
7377 // correct them now
7378 j = 0;
7379 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
7380 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
7381 __kmp_avail_proc;
7382 j++;
7383 }
7384
7385 if (__kmp_dflt_team_nth == 0) {
7386#ifdef KMP_DFLT_NTH_CORES
7387 // Default #threads = #cores
7388 __kmp_dflt_team_nth = __kmp_ncores;
7389 KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
"__kmp_ncores (%d)\n", __kmp_dflt_team_nth); }
7390 "__kmp_ncores (%d)\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
"__kmp_ncores (%d)\n", __kmp_dflt_team_nth); }
7391 __kmp_dflt_team_nth))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
"__kmp_ncores (%d)\n", __kmp_dflt_team_nth); }
;
7392#else
7393 // Default #threads = #available OS procs
7394 __kmp_dflt_team_nth = __kmp_avail_proc;
7395 KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
"__kmp_avail_proc(%d)\n", __kmp_dflt_team_nth); }
7396 "__kmp_avail_proc(%d)\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
"__kmp_avail_proc(%d)\n", __kmp_dflt_team_nth); }
7397 __kmp_dflt_team_nth))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
"__kmp_avail_proc(%d)\n", __kmp_dflt_team_nth); }
;
7398#endif /* KMP_DFLT_NTH_CORES */
7399 }
7400
7401 if (__kmp_dflt_team_nth < KMP_MIN_NTH1) {
7402 __kmp_dflt_team_nth = KMP_MIN_NTH1;
7403 }
7404 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
7405 __kmp_dflt_team_nth = __kmp_sys_max_nth;
7406 }
7407
7408 if (__kmp_nesting_mode > 0)
7409 __kmp_set_nesting_mode_threads();
7410
7411 // There's no harm in continuing if the following check fails,
7412 // but it indicates an error in the previous logic.
7413 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub)if (!(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub)) { __kmp_debug_assert
("__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub", "openmp/runtime/src/kmp_runtime.cpp"
, 7413); }
;
7414
7415 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
7416 // Run through the __kmp_threads array and set the num threads icv for each
7417 // root thread that is currently registered with the RTL (which has not
7418 // already explicitly set its nthreads-var with a call to
7419 // omp_set_num_threads()).
7420 for (i = 0; i < __kmp_threads_capacity; i++) {
7421 kmp_info_t *thread = __kmp_threads[i];
7422 if (thread == NULL__null)
7423 continue;
7424 if (thread->th.th_current_task->td_icvs.nproc != 0)
7425 continue;
7426
7427 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth)(((__kmp_threads[i])->th.th_current_task->td_icvs.nproc
) = (__kmp_dflt_team_nth))
;
7428 }
7429 }
7430 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n"
, __kmp_dflt_team_nth); }
7431 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n"
, __kmp_dflt_team_nth); }
7432 ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n"
, __kmp_dflt_team_nth); }
7433 __kmp_dflt_team_nth))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n"
, __kmp_dflt_team_nth); }
;
7434
7435#ifdef KMP_ADJUST_BLOCKTIME1
7436 /* Adjust blocktime to zero if necessary now that __kmp_avail_proc is set */
7437 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
7438 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0)if (!(__kmp_avail_proc > 0)) { __kmp_debug_assert("__kmp_avail_proc > 0"
, "openmp/runtime/src/kmp_runtime.cpp", 7438); }
;
7439 if (__kmp_nth > __kmp_avail_proc) {
7440 __kmp_zero_bt = TRUE(!0);
7441 }
7442 }
7443#endif /* KMP_ADJUST_BLOCKTIME */
7444
7445 /* we have finished middle initialization */
7446 TCW_SYNC_4(__kmp_init_middle, TRUE)(__kmp_init_middle) = ((!0));
7447
7448 KA_TRACE(10, ("__kmp_do_middle_initialize: exit\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_do_middle_initialize: exit\n"
); }
;
7449}
7450
7451void __kmp_middle_initialize(void) {
7452 if (__kmp_init_middle) {
7453 return;
7454 }
7455 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7456 if (__kmp_init_middle) {
7457 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7458 return;
7459 }
7460 __kmp_do_middle_initialize();
7461 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7462}
7463
7464void __kmp_parallel_initialize(void) {
7465 int gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg(); // this might be a new root
7466
7467 /* synchronize parallel initialization (for sibling) */
7468 if (TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
7469 return;
7470 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7471 if (TCR_4(__kmp_init_parallel)(__kmp_init_parallel)) {
7472 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7473 return;
7474 }
7475
7476 /* TODO reinitialization after we have already shut down */
7477 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) {
7478 KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_parallel_initialize: attempt to init while shutting down\n"
); }
7479 10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_parallel_initialize: attempt to init while shutting down\n"
); }
7480 ("__kmp_parallel_initialize: attempt to init while shutting down\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_parallel_initialize: attempt to init while shutting down\n"
); }
;
7481 __kmp_infinite_loop();
7482 }
7483
7484 /* jc: The lock __kmp_initz_lock is already held, so calling
7485 __kmp_serial_initialize would cause a deadlock. So we call
7486 __kmp_do_serial_initialize directly. */
7487 if (!__kmp_init_middle) {
7488 __kmp_do_middle_initialize();
7489 }
7490 __kmp_assign_root_init_mask();
7491 __kmp_resume_if_hard_paused();
7492
7493 /* begin initialization */
7494 KA_TRACE(10, ("__kmp_parallel_initialize: enter\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_parallel_initialize: enter\n"
); }
;
7495 KMP_ASSERT(KMP_UBER_GTID(gtid))if (!(KMP_UBER_GTID(gtid))) { __kmp_debug_assert("KMP_UBER_GTID(gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 7495); }
;
7496
7497#if KMP_ARCH_X860 || KMP_ARCH_X86_641
7498 // Save the FP control regs.
7499 // Worker threads will set theirs to these values at thread startup.
7500 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7501 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7502 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK0xffffffc0;
7503#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
7504
7505#if KMP_OS_UNIX1
7506#if KMP_HANDLE_SIGNALS(1 || 0)
7507 /* must be after __kmp_serial_initialize */
7508 __kmp_install_signals(TRUE(!0));
7509#endif
7510#endif
7511
7512 __kmp_suspend_initialize();
7513
7514#if defined(USE_LOAD_BALANCE1)
7515 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7516 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7517 }
7518#else
7519 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7520 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7521 }
7522#endif
7523
7524 if (__kmp_version) {
7525 __kmp_print_version_2();
7526 }
7527
7528 /* we have finished parallel initialization */
7529 TCW_SYNC_4(__kmp_init_parallel, TRUE)(__kmp_init_parallel) = ((!0));
7530
7531 KMP_MB();
7532 KA_TRACE(10, ("__kmp_parallel_initialize: exit\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_parallel_initialize: exit\n"
); }
;
7533
7534 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7535}
7536
7537void __kmp_hidden_helper_initialize() {
7538 if (TCR_4(__kmp_init_hidden_helper)(__kmp_init_hidden_helper))
7539 return;
7540
7541 // __kmp_parallel_initialize is required before we initialize hidden helper
7542 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
7543 __kmp_parallel_initialize();
7544
7545 // Double check. Note that this double check should not be placed before
7546 // __kmp_parallel_initialize as it will cause dead lock.
7547 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7548 if (TCR_4(__kmp_init_hidden_helper)(__kmp_init_hidden_helper)) {
7549 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7550 return;
7551 }
7552
7553#if KMP_AFFINITY_SUPPORTED1
7554 // Initialize hidden helper affinity settings.
7555 // The above __kmp_parallel_initialize() will initialize
7556 // regular affinity (and topology) if not already done.
7557 if (!__kmp_hh_affinity.flags.initialized)
7558 __kmp_affinity_initialize(__kmp_hh_affinity);
7559#endif
7560
7561 // Set the count of hidden helper tasks to be executed to zero
7562 KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0)(&__kmp_unexecuted_hidden_helper_tasks)->store(0, std::
memory_order_release)
;
7563
7564 // Set the global variable indicating that we're initializing hidden helper
7565 // team/threads
7566 TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE)(__kmp_init_hidden_helper_threads) = ((!0));
7567
7568 // Platform independent initialization
7569 __kmp_do_initialize_hidden_helper_threads();
7570
7571 // Wait here for the finish of initialization of hidden helper teams
7572 __kmp_hidden_helper_threads_initz_wait();
7573
7574 // We have finished hidden helper initialization
7575 TCW_SYNC_4(__kmp_init_hidden_helper, TRUE)(__kmp_init_hidden_helper) = ((!0));
7576
7577 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7578}
7579
7580/* ------------------------------------------------------------------------ */
7581
7582void __kmp_run_before_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
7583 kmp_team_t *team) {
7584 kmp_disp_t *dispatch;
7585
7586 KMP_MB();
7587
7588 /* none of the threads have encountered any constructs, yet. */
7589 this_thr->th.th_local.this_construct = 0;
7590#if KMP_CACHE_MANAGE
7591 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7592#endif /* KMP_CACHE_MANAGE */
7593 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch)((void *)(this_thr->th.th_dispatch));
7594 KMP_DEBUG_ASSERT(dispatch)if (!(dispatch)) { __kmp_debug_assert("dispatch", "openmp/runtime/src/kmp_runtime.cpp"
, 7594); }
;
7595 KMP_DEBUG_ASSERT(team->t.t_dispatch)if (!(team->t.t_dispatch)) { __kmp_debug_assert("team->t.t_dispatch"
, "openmp/runtime/src/kmp_runtime.cpp", 7595); }
;
7596 // KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[
7597 // this_thr->th.th_info.ds.ds_tid ] );
7598
7599 dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */
7600 dispatch->th_doacross_buf_idx = 0; // reset doacross dispatch buffer counter
7601 if (__kmp_env_consistency_check)
7602 __kmp_push_parallel(gtid, team->t.t_ident);
7603
7604 KMP_MB(); /* Flush all pending memory write invalidates. */
7605}
7606
7607void __kmp_run_after_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
7608 kmp_team_t *team) {
7609 if (__kmp_env_consistency_check)
7610 __kmp_pop_parallel(gtid, team->t.t_ident);
7611
7612 __kmp_finish_implicit_task(this_thr);
7613}
7614
7615int __kmp_invoke_task_func(int gtid) {
7616 int rc;
7617 int tid = __kmp_tid_from_gtid(gtid);
7618 kmp_info_t *this_thr = __kmp_threads[gtid];
7619 kmp_team_t *team = this_thr->th.th_team;
7620
7621 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7622#if USE_ITT_BUILD1
7623 if (__itt_stack_caller_create_ptr__kmp_itt_stack_caller_create_ptr__3_0) {
7624 // inform ittnotify about entering user's code
7625 if (team->t.t_stack_id != NULL__null) {
7626 __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id);
7627 } else {
7628 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL)if (!(team->t.t_parent->t.t_stack_id != __null)) { __kmp_debug_assert
("team->t.t_parent->t.t_stack_id != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 7628); }
;
7629 __kmp_itt_stack_callee_enter(
7630 (__itt_caller)team->t.t_parent->t.t_stack_id);
7631 }
7632 }
7633#endif /* USE_ITT_BUILD */
7634#if INCLUDE_SSC_MARKS(1 && 1)
7635 SSC_MARK_INVOKING()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0xd695) : "%ebx")
;
7636#endif
7637
7638#if OMPT_SUPPORT1
7639 void *dummy;
7640 void **exit_frame_p;
7641 ompt_data_t *my_task_data;
7642 ompt_data_t *my_parallel_data;
7643 int ompt_team_size;
7644
7645 if (ompt_enabled.enabled) {
7646 exit_frame_p = &(team->t.t_implicit_task_taskdata[tid]
7647 .ompt_task_info.frame.exit_frame.ptr);
7648 } else {
7649 exit_frame_p = &dummy;
7650 }
7651
7652 my_task_data =
7653 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7654 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7655 if (ompt_enabled.ompt_callback_implicit_task) {
7656 ompt_team_size = team->t.t_nproc;
7657 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
7658 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7659 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7660 OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info))->thread_num = __kmp_tid_from_gtid(gtid);
7661 }
7662#endif
7663
7664#if KMP_STATS_ENABLED0
7665 stats_state_e previous_state = KMP_GET_THREAD_STATE()((void)0);
7666 if (previous_state == stats_state_e::TEAMS_REGION) {
7667 KMP_PUSH_PARTITIONED_TIMER(OMP_teams)((void)0);
7668 } else {
7669 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel)((void)0);
7670 }
7671 KMP_SET_THREAD_STATE(IMPLICIT_TASK)((void)0);
7672#endif
7673
7674 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn)((void *)(team->t.t_pkfn)), gtid,
7675 tid, (int)team->t.t_argc, (void **)team->t.t_argv
7676#if OMPT_SUPPORT1
7677 ,
7678 exit_frame_p
7679#endif
7680 );
7681#if OMPT_SUPPORT1
7682 *exit_frame_p = NULL__null;
7683 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
7684#endif
7685
7686#if KMP_STATS_ENABLED0
7687 if (previous_state == stats_state_e::TEAMS_REGION) {
7688 KMP_SET_THREAD_STATE(previous_state)((void)0);
7689 }
7690 KMP_POP_PARTITIONED_TIMER()((void)0);
7691#endif
7692
7693#if USE_ITT_BUILD1
7694 if (__itt_stack_caller_create_ptr__kmp_itt_stack_caller_create_ptr__3_0) {
7695 // inform ittnotify about leaving user's code
7696 if (team->t.t_stack_id != NULL__null) {
7697 __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id);
7698 } else {
7699 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL)if (!(team->t.t_parent->t.t_stack_id != __null)) { __kmp_debug_assert
("team->t.t_parent->t.t_stack_id != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 7699); }
;
7700 __kmp_itt_stack_callee_leave(
7701 (__itt_caller)team->t.t_parent->t.t_stack_id);
7702 }
7703 }
7704#endif /* USE_ITT_BUILD */
7705 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7706
7707 return rc;
7708}
7709
7710void __kmp_teams_master(int gtid) {
7711 // This routine is called by all primary threads in teams construct
7712 kmp_info_t *thr = __kmp_threads[gtid];
7713 kmp_team_t *team = thr->th.th_team;
7714 ident_t *loc = team->t.t_ident;
7715 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7716 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask)if (!(thr->th.th_teams_microtask)) { __kmp_debug_assert("thr->th.th_teams_microtask"
, "openmp/runtime/src/kmp_runtime.cpp", 7716); }
;
7717 KMP_DEBUG_ASSERT(thr->th.th_set_nproc)if (!(thr->th.th_set_nproc)) { __kmp_debug_assert("thr->th.th_set_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 7717); }
;
7718 KA_TRACE(20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n"
, gtid, __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask
); }
7719 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n"
, gtid, __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask
); }
;
7720
7721 // This thread is a new CG root. Set up the proper variables.
7722 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(sizeof(kmp_cg_root_t))___kmp_allocate((sizeof(kmp_cg_root_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 7722)
;
7723 tmp->cg_root = thr; // Make thr the CG root
7724 // Init to thread limit stored when league primary threads were forked
7725 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7726 tmp->cg_nthreads = 1; // Init counter to one active thread, this one
7727 KA_TRACE(100, ("__kmp_teams_master: Thread %p created node %p and init"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_teams_master: Thread %p created node %p and init"
" cg_nthreads to 1\n", thr, tmp); }
7728 " cg_nthreads to 1\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_teams_master: Thread %p created node %p and init"
" cg_nthreads to 1\n", thr, tmp); }
7729 thr, tmp))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_teams_master: Thread %p created node %p and init"
" cg_nthreads to 1\n", thr, tmp); }
;
7730 tmp->up = thr->th.th_cg_roots;
7731 thr->th.th_cg_roots = tmp;
7732
7733// Launch league of teams now, but not let workers execute
7734// (they hang on fork barrier until next parallel)
7735#if INCLUDE_SSC_MARKS(1 && 1)
7736 SSC_MARK_FORKING()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0xd693) : "%ebx")
;
7737#endif
7738 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7739 (microtask_t)thr->th.th_teams_microtask, // "wrapped" task
7740 VOLATILE_CAST(launch_t)(launch_t) __kmp_invoke_task_func, NULL__null);
7741#if INCLUDE_SSC_MARKS(1 && 1)
7742 SSC_MARK_JOINING()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0xd694) : "%ebx")
;
7743#endif
7744 // If the team size was reduced from the limit, set it to the new size
7745 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7746 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7747 // AC: last parameter "1" eliminates join barrier which won't work because
7748 // worker threads are in a fork barrier waiting for more parallel regions
7749 __kmp_join_call(loc, gtid
7750#if OMPT_SUPPORT1
7751 ,
7752 fork_context_intel
7753#endif
7754 ,
7755 1);
7756}
7757
7758int __kmp_invoke_teams_master(int gtid) {
7759 kmp_info_t *this_thr = __kmp_threads[gtid];
7760 kmp_team_t *team = this_thr->th.th_team;
7761#if KMP_DEBUG1
7762 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7763 KMP_DEBUG_ASSERT((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==if (!((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn
== (void *)__kmp_teams_master)) { __kmp_debug_assert("(void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn == (void *)__kmp_teams_master"
, "openmp/runtime/src/kmp_runtime.cpp", 7764); }
7764 (void *)__kmp_teams_master)if (!((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn
== (void *)__kmp_teams_master)) { __kmp_debug_assert("(void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn == (void *)__kmp_teams_master"
, "openmp/runtime/src/kmp_runtime.cpp", 7764); }
;
7765#endif
7766 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7767#if OMPT_SUPPORT1
7768 int tid = __kmp_tid_from_gtid(gtid);
7769 ompt_data_t *task_data =
7770 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7771 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7772 if (ompt_enabled.ompt_callback_implicit_task) {
7773 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
7774 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7775 ompt_task_initial);
7776 OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info))->thread_num = tid;
7777 }
7778#endif
7779 __kmp_teams_master(gtid);
7780#if OMPT_SUPPORT1
7781 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
7782#endif
7783 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7784 return 1;
7785}
7786
7787/* this sets the requested number of threads for the next parallel region
7788 encountered by this team. since this should be enclosed in the forkjoin
7789 critical section it should avoid race conditions with asymmetrical nested
7790 parallelism */
7791
7792void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) {
7793 kmp_info_t *thr = __kmp_threads[gtid];
7794
7795 if (num_threads > 0)
7796 thr->th.th_set_nproc = num_threads;
7797}
7798
7799static void __kmp_push_thread_limit(kmp_info_t *thr, int num_teams,
7800 int num_threads) {
7801 KMP_DEBUG_ASSERT(thr)if (!(thr)) { __kmp_debug_assert("thr", "openmp/runtime/src/kmp_runtime.cpp"
, 7801); }
;
7802 // Remember the number of threads for inner parallel regions
7803 if (!TCR_4(__kmp_init_middle)(__kmp_init_middle))
7804 __kmp_middle_initialize(); // get internal globals calculated
7805 __kmp_assign_root_init_mask();
7806 KMP_DEBUG_ASSERT(__kmp_avail_proc)if (!(__kmp_avail_proc)) { __kmp_debug_assert("__kmp_avail_proc"
, "openmp/runtime/src/kmp_runtime.cpp", 7806); }
;
7807 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth)if (!(__kmp_dflt_team_nth)) { __kmp_debug_assert("__kmp_dflt_team_nth"
, "openmp/runtime/src/kmp_runtime.cpp", 7807); }
;
7808
7809 if (num_threads == 0) {
7810 if (__kmp_teams_thread_limit > 0) {
7811 num_threads = __kmp_teams_thread_limit;
7812 } else {
7813 num_threads = __kmp_avail_proc / num_teams;
7814 }
7815 // adjust num_threads w/o warning as it is not user setting
7816 // num_threads = min(num_threads, nthreads-var, thread-limit-var)
7817 // no thread_limit clause specified - do not change thread-limit-var ICV
7818 if (num_threads > __kmp_dflt_team_nth) {
7819 num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV
7820 }
7821 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7822 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7823 } // prevent team size to exceed thread-limit-var
7824 if (num_teams * num_threads > __kmp_teams_max_nth) {
7825 num_threads = __kmp_teams_max_nth / num_teams;
7826 }
7827 if (num_threads == 0) {
7828 num_threads = 1;
7829 }
7830 } else {
7831 if (num_threads < 0) {
7832 __kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, 1)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, num_threads, 1
)
,
7833 __kmp_msg_null);
7834 num_threads = 1;
7835 }
7836 // This thread will be the primary thread of the league primary threads
7837 // Store new thread limit; old limit is saved in th_cg_roots list
7838 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7839 // num_threads = min(num_threads, nthreads-var)
7840 if (num_threads > __kmp_dflt_team_nth) {
7841 num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV
7842 }
7843 if (num_teams * num_threads > __kmp_teams_max_nth) {
7844 int new_threads = __kmp_teams_max_nth / num_teams;
7845 if (new_threads == 0) {
7846 new_threads = 1;
7847 }
7848 if (new_threads != num_threads) {
7849 if (!__kmp_reserve_warn) { // user asked for too many threads
7850 __kmp_reserve_warn = 1; // conflicts with KMP_TEAMS_THREAD_LIMIT
7851 __kmp_msg(kmp_ms_warning,
7852 KMP_MSG(CantFormThrTeam, num_threads, new_threads)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, num_threads, new_threads
)
,
7853 KMP_HNT(Unset_ALL_THREADS)__kmp_msg_format(kmp_i18n_hnt_Unset_ALL_THREADS), __kmp_msg_null);
7854 }
7855 }
7856 num_threads = new_threads;
7857 }
7858 }
7859 thr->th.th_teams_size.nth = num_threads;
7860}
7861
7862/* this sets the requested number of teams for the teams region and/or
7863 the number of threads for the next parallel region encountered */
7864void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
7865 int num_threads) {
7866 kmp_info_t *thr = __kmp_threads[gtid];
7867 if (num_teams < 0) {
7868 // OpenMP specification requires requested values to be positive,
7869 // but people can send us any value, so we'd better check
7870 __kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, 1)__kmp_msg_format(kmp_i18n_msg_NumTeamsNotPositive, num_teams,
1)
,
7871 __kmp_msg_null);
7872 num_teams = 1;
7873 }
7874 if (num_teams == 0) {
7875 if (__kmp_nteams > 0) {
7876 num_teams = __kmp_nteams;
7877 } else {
7878 num_teams = 1; // default number of teams is 1.
7879 }
7880 }
7881 if (num_teams > __kmp_teams_max_nth) { // if too many teams requested?
7882 if (!__kmp_reserve_warn) {
7883 __kmp_reserve_warn = 1;
7884 __kmp_msg(kmp_ms_warning,
7885 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, num_teams, __kmp_teams_max_nth
)
,
7886 KMP_HNT(Unset_ALL_THREADS)__kmp_msg_format(kmp_i18n_hnt_Unset_ALL_THREADS), __kmp_msg_null);
7887 }
7888 num_teams = __kmp_teams_max_nth;
7889 }
7890 // Set number of teams (number of threads in the outer "parallel" of the
7891 // teams)
7892 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7893
7894 __kmp_push_thread_limit(thr, num_teams, num_threads);
7895}
7896
7897/* This sets the requested number of teams for the teams region and/or
7898 the number of threads for the next parallel region encountered */
7899void __kmp_push_num_teams_51(ident_t *id, int gtid, int num_teams_lb,
7900 int num_teams_ub, int num_threads) {
7901 kmp_info_t *thr = __kmp_threads[gtid];
7902 KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0)if (!(num_teams_lb >= 0 && num_teams_ub >= 0)) {
__kmp_debug_assert("num_teams_lb >= 0 && num_teams_ub >= 0"
, "openmp/runtime/src/kmp_runtime.cpp", 7902); }
;
7903 KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb)if (!(num_teams_ub >= num_teams_lb)) { __kmp_debug_assert(
"num_teams_ub >= num_teams_lb", "openmp/runtime/src/kmp_runtime.cpp"
, 7903); }
;
7904 KMP_DEBUG_ASSERT(num_threads >= 0)if (!(num_threads >= 0)) { __kmp_debug_assert("num_threads >= 0"
, "openmp/runtime/src/kmp_runtime.cpp", 7904); }
;
7905
7906 if (num_teams_lb > num_teams_ub) {
7907 __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub)__kmp_msg_format(kmp_i18n_msg_FailedToCreateTeam, num_teams_lb
, num_teams_ub)
,
7908 KMP_HNT(SetNewBound, __kmp_teams_max_nth)__kmp_msg_format(kmp_i18n_hnt_SetNewBound, __kmp_teams_max_nth
)
, __kmp_msg_null);
7909 }
7910
7911 int num_teams = 1; // defalt number of teams is 1.
7912
7913 if (num_teams_lb == 0 && num_teams_ub > 0)
7914 num_teams_lb = num_teams_ub;
7915
7916 if (num_teams_lb == 0 && num_teams_ub == 0) { // no num_teams clause
7917 num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
7918 if (num_teams > __kmp_teams_max_nth) {
7919 if (!__kmp_reserve_warn) {
7920 __kmp_reserve_warn = 1;
7921 __kmp_msg(kmp_ms_warning,
7922 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, num_teams, __kmp_teams_max_nth
)
,
7923 KMP_HNT(Unset_ALL_THREADS)__kmp_msg_format(kmp_i18n_hnt_Unset_ALL_THREADS), __kmp_msg_null);
7924 }
7925 num_teams = __kmp_teams_max_nth;
7926 }
7927 } else if (num_teams_lb == num_teams_ub) { // requires exact number of teams
7928 num_teams = num_teams_ub;
7929 } else { // num_teams_lb <= num_teams <= num_teams_ub
7930 if (num_threads <= 0) {
7931 if (num_teams_ub > __kmp_teams_max_nth) {
7932 num_teams = num_teams_lb;
7933 } else {
7934 num_teams = num_teams_ub;
7935 }
7936 } else {
7937 num_teams = (num_threads > __kmp_teams_max_nth)
7938 ? num_teams
7939 : __kmp_teams_max_nth / num_threads;
7940 if (num_teams < num_teams_lb) {
7941 num_teams = num_teams_lb;
7942 } else if (num_teams > num_teams_ub) {
7943 num_teams = num_teams_ub;
7944 }
7945 }
7946 }
7947 // Set number of teams (number of threads in the outer "parallel" of the
7948 // teams)
7949 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7950
7951 __kmp_push_thread_limit(thr, num_teams, num_threads);
7952}
7953
7954// Set the proc_bind var to use in the following parallel region.
7955void __kmp_push_proc_bind(ident_t *id, int gtid, kmp_proc_bind_t proc_bind) {
7956 kmp_info_t *thr = __kmp_threads[gtid];
7957 thr->th.th_set_proc_bind = proc_bind;
7958}
7959
7960/* Launch the worker threads into the microtask. */
7961
7962void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team) {
7963 kmp_info_t *this_thr = __kmp_threads[gtid];
7964
7965#ifdef KMP_DEBUG1
7966 int f;
7967#endif /* KMP_DEBUG */
7968
7969 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 7969); }
;
7970 KMP_DEBUG_ASSERT(this_thr->th.th_team == team)if (!(this_thr->th.th_team == team)) { __kmp_debug_assert(
"this_thr->th.th_team == team", "openmp/runtime/src/kmp_runtime.cpp"
, 7970); }
;
7971 KMP_ASSERT(KMP_MASTER_GTID(gtid))if (!((0 == __kmp_tid_from_gtid((gtid))))) { __kmp_debug_assert
("KMP_MASTER_GTID(gtid)", "openmp/runtime/src/kmp_runtime.cpp"
, 7971); }
;
7972 KMP_MB(); /* Flush all pending memory write invalidates. */
7973
7974 team->t.t_construct = 0; /* no single directives seen yet */
7975 team->t.t_ordered.dt.t_value =
7976 0; /* thread 0 enters the ordered section first */
7977
7978 /* Reset the identifiers on the dispatch buffer */
7979 KMP_DEBUG_ASSERT(team->t.t_disp_buffer)if (!(team->t.t_disp_buffer)) { __kmp_debug_assert("team->t.t_disp_buffer"
, "openmp/runtime/src/kmp_runtime.cpp", 7979); }
;
7980 if (team->t.t_max_nproc > 1) {
7981 int i;
7982 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7983 team->t.t_disp_buffer[i].buffer_index = i;
7984 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7985 }
7986 } else {
7987 team->t.t_disp_buffer[0].buffer_index = 0;
7988 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7989 }
7990
7991 KMP_MB(); /* Flush all pending memory write invalidates. */
7992 KMP_ASSERT(this_thr->th.th_team == team)if (!(this_thr->th.th_team == team)) { __kmp_debug_assert(
"this_thr->th.th_team == team", "openmp/runtime/src/kmp_runtime.cpp"
, 7992); }
;
7993
7994#ifdef KMP_DEBUG1
7995 for (f = 0; f < team->t.t_nproc; f++) {
7996 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&if (!(team->t.t_threads[f] && team->t.t_threads
[f]->th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("team->t.t_threads[f] && team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 7997); }
7997 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc)if (!(team->t.t_threads[f] && team->t.t_threads
[f]->th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("team->t.t_threads[f] && team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 7997); }
;
7998 }
7999#endif /* KMP_DEBUG */
8000
8001 /* release the worker threads so they may begin working */
8002 __kmp_fork_barrier(gtid, 0);
8003}
8004
8005void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) {
8006 kmp_info_t *this_thr = __kmp_threads[gtid];
8007
8008 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 8008); }
;
8009 KMP_DEBUG_ASSERT(this_thr->th.th_team == team)if (!(this_thr->th.th_team == team)) { __kmp_debug_assert(
"this_thr->th.th_team == team", "openmp/runtime/src/kmp_runtime.cpp"
, 8009); }
;
8010 KMP_ASSERT(KMP_MASTER_GTID(gtid))if (!((0 == __kmp_tid_from_gtid((gtid))))) { __kmp_debug_assert
("KMP_MASTER_GTID(gtid)", "openmp/runtime/src/kmp_runtime.cpp"
, 8010); }
;
8011 KMP_MB(); /* Flush all pending memory write invalidates. */
8012
8013 /* Join barrier after fork */
8014
8015#ifdef KMP_DEBUG1
8016 if (__kmp_threads[gtid] &&
8017 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
8018 __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
8019 __kmp_threads[gtid]);
8020 __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
8021 "team->t.t_nproc=%d\n",
8022 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
8023 team->t.t_nproc);
8024 __kmp_print_structure();
8025 }
8026 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&if (!(__kmp_threads[gtid] && __kmp_threads[gtid]->
th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 8027); }
8027 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc)if (!(__kmp_threads[gtid] && __kmp_threads[gtid]->
th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 8027); }
;
8028#endif /* KMP_DEBUG */
8029
8030 __kmp_join_barrier(gtid); /* wait for everyone */
8031#if OMPT_SUPPORT1
8032 if (ompt_enabled.enabled &&
8033 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
8034 int ds_tid = this_thr->th.th_info.ds.ds_tid;
8035 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data
))
;
8036 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
8037#if OMPT_OPTIONAL1
8038 void *codeptr = NULL__null;
8039 if (KMP_MASTER_TID(ds_tid)(0 == (ds_tid)) &&
8040 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback ||
8041 ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback))
8042 codeptr = OMPT_CUR_TEAM_INFO(this_thr)(&(this_thr->th.th_team->t.ompt_team_info))->master_return_address;
8043
8044 if (ompt_enabled.ompt_callback_sync_region_wait) {
8045 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback(
8046 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL__null, task_data,
8047 codeptr);
8048 }
8049 if (ompt_enabled.ompt_callback_sync_region) {
8050 ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback(
8051 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL__null, task_data,
8052 codeptr);
8053 }
8054#endif
8055 if (!KMP_MASTER_TID(ds_tid)(0 == (ds_tid)) && ompt_enabled.ompt_callback_implicit_task) {
8056 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
8057 ompt_scope_end, NULL__null, task_data, 0, ds_tid,
8058 ompt_task_implicit); // TODO: Can this be ompt_task_initial?
8059 }
8060 }
8061#endif
8062
8063 KMP_MB(); /* Flush all pending memory write invalidates. */
8064 KMP_ASSERT(this_thr->th.th_team == team)if (!(this_thr->th.th_team == team)) { __kmp_debug_assert(
"this_thr->th.th_team == team", "openmp/runtime/src/kmp_runtime.cpp"
, 8064); }
;
8065}
8066
8067/* ------------------------------------------------------------------------ */
8068
8069#ifdef USE_LOAD_BALANCE1
8070
8071// Return the worker threads actively spinning in the hot team, if we
8072// are at the outermost level of parallelism. Otherwise, return 0.
8073static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
8074 int i;
8075 int retval;
8076 kmp_team_t *hot_team;
8077
8078 if (root->r.r_active) {
8079 return 0;
8080 }
8081 hot_team = root->r.r_hot_team;
8082 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME(2147483647)) {
8083 return hot_team->t.t_nproc - 1; // Don't count primary thread
8084 }
8085
8086 // Skip the primary thread - it is accounted for elsewhere.
8087 retval = 0;
8088 for (i = 1; i < hot_team->t.t_nproc; i++) {
8089 if (hot_team->t.t_threads[i]->th.th_active) {
8090 retval++;
8091 }
8092 }
8093 return retval;
8094}
8095
8096// Perform an automatic adjustment to the number of
8097// threads used by the next parallel region.
8098static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc) {
8099 int retval;
8100 int pool_active;
8101 int hot_team_active;
8102 int team_curr_active;
8103 int system_active;
8104
8105 KB_TRACE(20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,if (kmp_b_debug >= 20) { __kmp_debug_printf ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n"
, root, set_nproc); }
8106 set_nproc))if (kmp_b_debug >= 20) { __kmp_debug_printf ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n"
, root, set_nproc); }
;
8107 KMP_DEBUG_ASSERT(root)if (!(root)) { __kmp_debug_assert("root", "openmp/runtime/src/kmp_runtime.cpp"
, 8107); }
;
8108 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]if (!(root->r.r_root_team->t.t_threads[0] ->th.th_current_task
->td_icvs.dynamic == (!0))) { __kmp_debug_assert("root->r.r_root_team->t.t_threads[0] ->th.th_current_task->td_icvs.dynamic == (!0)"
, "openmp/runtime/src/kmp_runtime.cpp", 8109); }
8109 ->th.th_current_task->td_icvs.dynamic == TRUE)if (!(root->r.r_root_team->t.t_threads[0] ->th.th_current_task
->td_icvs.dynamic == (!0))) { __kmp_debug_assert("root->r.r_root_team->t.t_threads[0] ->th.th_current_task->td_icvs.dynamic == (!0)"
, "openmp/runtime/src/kmp_runtime.cpp", 8109); }
;
8110 KMP_DEBUG_ASSERT(set_nproc > 1)if (!(set_nproc > 1)) { __kmp_debug_assert("set_nproc > 1"
, "openmp/runtime/src/kmp_runtime.cpp", 8110); }
;
8111
8112 if (set_nproc == 1) {
8113 KB_TRACE(20, ("__kmp_load_balance_nproc: serial execution.\n"))if (kmp_b_debug >= 20) { __kmp_debug_printf ("__kmp_load_balance_nproc: serial execution.\n"
); }
;
8114 return 1;
8115 }
8116
8117 // Threads that are active in the thread pool, active in the hot team for this
8118 // particular root (if we are at the outer par level), and the currently
8119 // executing thread (to become the primary thread) are available to add to the
8120 // new team, but are currently contributing to the system load, and must be
8121 // accounted for.
8122 pool_active = __kmp_thread_pool_active_nth;
8123 hot_team_active = __kmp_active_hot_team_nproc(root);
8124 team_curr_active = pool_active + hot_team_active + 1;
8125
8126 // Check the system load.
8127 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
8128 KB_TRACE(30, ("__kmp_load_balance_nproc: system active = %d pool active = %d "if (kmp_b_debug >= 30) { __kmp_debug_printf ("__kmp_load_balance_nproc: system active = %d pool active = %d "
"hot team active = %d\n", system_active, pool_active, hot_team_active
); }
8129 "hot team active = %d\n",if (kmp_b_debug >= 30) { __kmp_debug_printf ("__kmp_load_balance_nproc: system active = %d pool active = %d "
"hot team active = %d\n", system_active, pool_active, hot_team_active
); }
8130 system_active, pool_active, hot_team_active))if (kmp_b_debug >= 30) { __kmp_debug_printf ("__kmp_load_balance_nproc: system active = %d pool active = %d "
"hot team active = %d\n", system_active, pool_active, hot_team_active
); }
;
8131
8132 if (system_active < 0) {
8133 // There was an error reading the necessary info from /proc, so use the
8134 // thread limit algorithm instead. Once we set __kmp_global.g.g_dynamic_mode
8135 // = dynamic_thread_limit, we shouldn't wind up getting back here.
8136 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
8137 KMP_WARNING(CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit")__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_CantLoadBalUsing
, "KMP_DYNAMIC_MODE=thread limit"), __kmp_msg_null)
;
8138
8139 // Make this call behave like the thread limit algorithm.
8140 retval = __kmp_avail_proc - __kmp_nth +
8141 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
8142 if (retval > set_nproc) {
8143 retval = set_nproc;
8144 }
8145 if (retval < KMP_MIN_NTH1) {
8146 retval = KMP_MIN_NTH1;
8147 }
8148
8149 KB_TRACE(20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n",if (kmp_b_debug >= 20) { __kmp_debug_printf ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n"
, retval); }
8150 retval))if (kmp_b_debug >= 20) { __kmp_debug_printf ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n"
, retval); }
;
8151 return retval;
8152 }
8153
8154 // There is a slight delay in the load balance algorithm in detecting new
8155 // running procs. The real system load at this instant should be at least as
8156 // large as the #active omp thread that are available to add to the team.
8157 if (system_active < team_curr_active) {
8158 system_active = team_curr_active;
8159 }
8160 retval = __kmp_avail_proc - system_active + team_curr_active;
8161 if (retval > set_nproc) {
8162 retval = set_nproc;
8163 }
8164 if (retval < KMP_MIN_NTH1) {
8165 retval = KMP_MIN_NTH1;
8166 }
8167
8168 KB_TRACE(20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval))if (kmp_b_debug >= 20) { __kmp_debug_printf ("__kmp_load_balance_nproc: exit. retval:%d\n"
, retval); }
;
8169 return retval;
8170} // __kmp_load_balance_nproc()
8171
8172#endif /* USE_LOAD_BALANCE */
8173
8174/* ------------------------------------------------------------------------ */
8175
8176/* NOTE: this is called with the __kmp_init_lock held */
8177void __kmp_cleanup(void) {
8178 int f;
8179
8180 KA_TRACE(10, ("__kmp_cleanup: enter\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_cleanup: enter\n"
); }
;
8181
8182 if (TCR_4(__kmp_init_parallel)(__kmp_init_parallel)) {
8183#if KMP_HANDLE_SIGNALS(1 || 0)
8184 __kmp_remove_signals();
8185#endif
8186 TCW_4(__kmp_init_parallel, FALSE)(__kmp_init_parallel) = (0);
8187 }
8188
8189 if (TCR_4(__kmp_init_middle)(__kmp_init_middle)) {
8190#if KMP_AFFINITY_SUPPORTED1
8191 __kmp_affinity_uninitialize();
8192#endif /* KMP_AFFINITY_SUPPORTED */
8193 __kmp_cleanup_hierarchy();
8194 TCW_4(__kmp_init_middle, FALSE)(__kmp_init_middle) = (0);
8195 }
8196
8197 KA_TRACE(10, ("__kmp_cleanup: go serial cleanup\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_cleanup: go serial cleanup\n"
); }
;
8198
8199 if (__kmp_init_serial) {
8200 __kmp_runtime_destroy();
8201 __kmp_init_serial = FALSE0;
8202 }
8203
8204 __kmp_cleanup_threadprivate_caches();
8205
8206 for (f = 0; f < __kmp_threads_capacity; f++) {
8207 if (__kmp_root[f] != NULL__null) {
8208 __kmp_free(__kmp_root[f])___kmp_free((__kmp_root[f]), "openmp/runtime/src/kmp_runtime.cpp"
, 8208)
;
8209 __kmp_root[f] = NULL__null;
8210 }
8211 }
8212 __kmp_free(__kmp_threads)___kmp_free((__kmp_threads), "openmp/runtime/src/kmp_runtime.cpp"
, 8212)
;
8213 // __kmp_threads and __kmp_root were allocated at once, as single block, so
8214 // there is no need in freeing __kmp_root.
8215 __kmp_threads = NULL__null;
8216 __kmp_root = NULL__null;
8217 __kmp_threads_capacity = 0;
8218
8219 // Free old __kmp_threads arrays if they exist.
8220 kmp_old_threads_list_t *ptr = __kmp_old_threads_list;
8221 while (ptr) {
8222 kmp_old_threads_list_t *next = ptr->next;
8223 __kmp_free(ptr->threads)___kmp_free((ptr->threads), "openmp/runtime/src/kmp_runtime.cpp"
, 8223)
;
8224 __kmp_free(ptr)___kmp_free((ptr), "openmp/runtime/src/kmp_runtime.cpp", 8224
)
;
8225 ptr = next;
8226 }
8227
8228#if KMP_USE_DYNAMIC_LOCK1
8229 __kmp_cleanup_indirect_user_locks();
8230#else
8231 __kmp_cleanup_user_locks();
8232#endif
8233#if OMPD_SUPPORT1
8234 if (ompd_state) {
8235 __kmp_free(ompd_env_block)___kmp_free((ompd_env_block), "openmp/runtime/src/kmp_runtime.cpp"
, 8235)
;
8236 ompd_env_block = NULL__null;
8237 ompd_env_block_size = 0;
8238 }
8239#endif
8240
8241#if KMP_AFFINITY_SUPPORTED1
8242 KMP_INTERNAL_FREE(CCAST(char *, __kmp_cpuinfo_file))free(const_cast<char *>(__kmp_cpuinfo_file));
8243 __kmp_cpuinfo_file = NULL__null;
8244#endif /* KMP_AFFINITY_SUPPORTED */
8245
8246#if KMP_USE_ADAPTIVE_LOCKS(0 || 1) && !0
8247#if KMP_DEBUG_ADAPTIVE_LOCKS0
8248 __kmp_print_speculative_stats();
8249#endif
8250#endif
8251 KMP_INTERNAL_FREE(__kmp_nested_nth.nth)free(__kmp_nested_nth.nth);
8252 __kmp_nested_nth.nth = NULL__null;
8253 __kmp_nested_nth.size = 0;
8254 __kmp_nested_nth.used = 0;
8255 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types)free(__kmp_nested_proc_bind.bind_types);
8256 __kmp_nested_proc_bind.bind_types = NULL__null;
8257 __kmp_nested_proc_bind.size = 0;
8258 __kmp_nested_proc_bind.used = 0;
8259 if (__kmp_affinity_format) {
8260 KMP_INTERNAL_FREE(__kmp_affinity_format)free(__kmp_affinity_format);
8261 __kmp_affinity_format = NULL__null;
8262 }
8263
8264 __kmp_i18n_catclose();
8265
8266#if KMP_USE_HIER_SCHED0
8267 __kmp_hier_scheds.deallocate();
8268#endif
8269
8270#if KMP_STATS_ENABLED0
8271 __kmp_stats_fini();
8272#endif
8273
8274 KA_TRACE(10, ("__kmp_cleanup: exit\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_cleanup: exit\n"
); }
;
8275}
8276
8277/* ------------------------------------------------------------------------ */
8278
8279int __kmp_ignore_mppbeg(void) {
8280 char *env;
8281
8282 if ((env = getenv("KMP_IGNORE_MPPBEG")) != NULL__null) {
8283 if (__kmp_str_match_false(env))
8284 return FALSE0;
8285 }
8286 // By default __kmpc_begin() is no-op.
8287 return TRUE(!0);
8288}
8289
8290int __kmp_ignore_mppend(void) {
8291 char *env;
8292
8293 if ((env = getenv("KMP_IGNORE_MPPEND")) != NULL__null) {
8294 if (__kmp_str_match_false(env))
8295 return FALSE0;
8296 }
8297 // By default __kmpc_end() is no-op.
8298 return TRUE(!0);
8299}
8300
8301void __kmp_internal_begin(void) {
8302 int gtid;
8303 kmp_root_t *root;
8304
8305 /* this is a very important step as it will register new sibling threads
8306 and assign these new uber threads a new gtid */
8307 gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
8308 root = __kmp_threads[gtid]->th.th_root;
8309 KMP_ASSERT(KMP_UBER_GTID(gtid))if (!(KMP_UBER_GTID(gtid))) { __kmp_debug_assert("KMP_UBER_GTID(gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 8309); }
;
8310
8311 if (root->r.r_begin)
8312 return;
8313 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
8314 if (root->r.r_begin) {
8315 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8316 return;
8317 }
8318
8319 root->r.r_begin = TRUE(!0);
8320
8321 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8322}
8323
8324/* ------------------------------------------------------------------------ */
8325
8326void __kmp_user_set_library(enum library_type arg) {
8327 int gtid;
8328 kmp_root_t *root;
8329 kmp_info_t *thread;
8330
8331 /* first, make sure we are initialized so we can get our gtid */
8332
8333 gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
8334 thread = __kmp_threads[gtid];
8335
8336 root = thread->th.th_root;
8337
8338 KA_TRACE(20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n"
, gtid, arg, library_serial); }
8339 library_serial))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n"
, gtid, arg, library_serial); }
;
8340 if (root->r.r_in_parallel) { /* Must be called in serial section of top-level
8341 thread */
8342 KMP_WARNING(SetLibraryIncorrectCall)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_SetLibraryIncorrectCall
), __kmp_msg_null)
;
8343 return;
8344 }
8345
8346 switch (arg) {
8347 case library_serial:
8348 thread->th.th_set_nproc = 0;
8349 set__nproc(thread, 1)(((thread)->th.th_current_task->td_icvs.nproc) = (1));
8350 break;
8351 case library_turnaround:
8352 thread->th.th_set_nproc = 0;
8353 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth(((thread)->th.th_current_task->td_icvs.nproc) = (__kmp_dflt_team_nth
? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub))
8354 : __kmp_dflt_team_nth_ub)(((thread)->th.th_current_task->td_icvs.nproc) = (__kmp_dflt_team_nth
? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub))
;
8355 break;
8356 case library_throughput:
8357 thread->th.th_set_nproc = 0;
8358 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth(((thread)->th.th_current_task->td_icvs.nproc) = (__kmp_dflt_team_nth
? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub))
8359 : __kmp_dflt_team_nth_ub)(((thread)->th.th_current_task->td_icvs.nproc) = (__kmp_dflt_team_nth
? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub))
;
8360 break;
8361 default:
8362 KMP_FATAL(UnknownLibraryType, arg)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_UnknownLibraryType,
arg), __kmp_msg_null)
;
8363 }
8364
8365 __kmp_aux_set_library(arg);
8366}
8367
8368void __kmp_aux_set_stacksize(size_t arg) {
8369 if (!__kmp_init_serial)
8370 __kmp_serial_initialize();
8371
8372#if KMP_OS_DARWIN0
8373 if (arg & (0x1000 - 1)) {
8374 arg &= ~(0x1000 - 1);
8375 if (arg + 0x1000) /* check for overflow if we round up */
8376 arg += 0x1000;
8377 }
8378#endif
8379 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8380
8381 /* only change the default stacksize before the first parallel region */
8382 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel)) {
8383 size_t value = arg; /* argument is in bytes */
8384
8385 if (value < __kmp_sys_min_stksize)
8386 value = __kmp_sys_min_stksize;
8387 else if (value > KMP_MAX_STKSIZE(~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)
))
)
8388 value = KMP_MAX_STKSIZE(~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)
))
;
8389
8390 __kmp_stksize = value;
8391
8392 __kmp_env_stksize = TRUE(!0); /* was KMP_STACKSIZE specified? */
8393 }
8394
8395 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8396}
8397
8398/* set the behaviour of the runtime library */
8399/* TODO this can cause some odd behaviour with sibling parallelism... */
8400void __kmp_aux_set_library(enum library_type arg) {
8401 __kmp_library = arg;
8402
8403 switch (__kmp_library) {
8404 case library_serial: {
8405 KMP_INFORM(LibraryIsSerial)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_LibraryIsSerial
), __kmp_msg_null)
;
8406 } break;
8407 case library_turnaround:
8408 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
8409 __kmp_use_yield = 2; // only yield when oversubscribed
8410 break;
8411 case library_throughput:
8412 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME(2147483647))
8413 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME(__kmp_is_hybrid_cpu() ? (0) : (200));
8414 break;
8415 default:
8416 KMP_FATAL(UnknownLibraryType, arg)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_UnknownLibraryType,
arg), __kmp_msg_null)
;
8417 }
8418}
8419
8420/* Getting team information common for all team API */
8421// Returns NULL if not in teams construct
8422static kmp_team_t *__kmp_aux_get_team_info(int &teams_serialized) {
8423 kmp_info_t *thr = __kmp_entry_thread();
8424 teams_serialized = 0;
8425 if (thr->th.th_teams_microtask) {
8426 kmp_team_t *team = thr->th.th_team;
8427 int tlevel = thr->th.th_teams_level; // the level of the teams construct
8428 int ii = team->t.t_level;
8429 teams_serialized = team->t.t_serialized;
8430 int level = tlevel + 1;
8431 KMP_DEBUG_ASSERT(ii >= tlevel)if (!(ii >= tlevel)) { __kmp_debug_assert("ii >= tlevel"
, "openmp/runtime/src/kmp_runtime.cpp", 8431); }
;
8432 while (ii > level) {
8433 for (teams_serialized = team->t.t_serialized;
8434 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
8435 }
8436 if (team->t.t_serialized && (!teams_serialized)) {
8437 team = team->t.t_parent;
8438 continue;
8439 }
8440 if (ii > level) {
8441 team = team->t.t_parent;
8442 ii--;
8443 }
8444 }
8445 return team;
8446 }
8447 return NULL__null;
8448}
8449
8450int __kmp_aux_get_team_num() {
8451 int serialized;
8452 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8453 if (team) {
8454 if (serialized > 1) {
8455 return 0; // teams region is serialized ( 1 team of 1 thread ).
8456 } else {
8457 return team->t.t_master_tid;
8458 }
8459 }
8460 return 0;
8461}
8462
8463int __kmp_aux_get_num_teams() {
8464 int serialized;
8465 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8466 if (team) {
8467 if (serialized > 1) {
8468 return 1;
8469 } else {
8470 return team->t.t_parent->t.t_nproc;
8471 }
8472 }
8473 return 1;
8474}
8475
8476/* ------------------------------------------------------------------------ */
8477
8478/*
8479 * Affinity Format Parser
8480 *
8481 * Field is in form of: %[[[0].]size]type
8482 * % and type are required (%% means print a literal '%')
8483 * type is either single char or long name surrounded by {},
8484 * e.g., N or {num_threads}
8485 * 0 => leading zeros
8486 * . => right justified when size is specified
8487 * by default output is left justified
8488 * size is the *minimum* field length
8489 * All other characters are printed as is
8490 *
8491 * Available field types:
8492 * L {thread_level} - omp_get_level()
8493 * n {thread_num} - omp_get_thread_num()
8494 * h {host} - name of host machine
8495 * P {process_id} - process id (integer)
8496 * T {thread_identifier} - native thread identifier (integer)
8497 * N {num_threads} - omp_get_num_threads()
8498 * A {ancestor_tnum} - omp_get_ancestor_thread_num(omp_get_level()-1)
8499 * a {thread_affinity} - comma separated list of integers or integer ranges
8500 * (values of affinity mask)
8501 *
8502 * Implementation-specific field types can be added
8503 * If a type is unknown, print "undefined"
8504 */
8505
8506// Structure holding the short name, long name, and corresponding data type
8507// for snprintf. A table of these will represent the entire valid keyword
8508// field types.
8509typedef struct kmp_affinity_format_field_t {
8510 char short_name; // from spec e.g., L -> thread level
8511 const char *long_name; // from spec thread_level -> thread level
8512 char field_format; // data type for snprintf (typically 'd' or 's'
8513 // for integer or string)
8514} kmp_affinity_format_field_t;
8515
8516static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
8517#if KMP_AFFINITY_SUPPORTED1
8518 {'A', "thread_affinity", 's'},
8519#endif
8520 {'t', "team_num", 'd'},
8521 {'T', "num_teams", 'd'},
8522 {'L', "nesting_level", 'd'},
8523 {'n', "thread_num", 'd'},
8524 {'N', "num_threads", 'd'},
8525 {'a', "ancestor_tnum", 'd'},
8526 {'H', "host", 's'},
8527 {'P', "process_id", 'd'},
8528 {'i', "native_thread_id", 'd'}};
8529
8530// Return the number of characters it takes to hold field
8531static int __kmp_aux_capture_affinity_field(int gtid, const kmp_info_t *th,
8532 const char **ptr,
8533 kmp_str_buf_t *field_buffer) {
8534 int rc, format_index, field_value;
8535 const char *width_left, *width_right;
8536 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
8537 static const int FORMAT_SIZE = 20;
8538 char format[FORMAT_SIZE] = {0};
8539 char absolute_short_name = 0;
8540
8541 KMP_DEBUG_ASSERT(gtid >= 0)if (!(gtid >= 0)) { __kmp_debug_assert("gtid >= 0", "openmp/runtime/src/kmp_runtime.cpp"
, 8541); }
;
8542 KMP_DEBUG_ASSERT(th)if (!(th)) { __kmp_debug_assert("th", "openmp/runtime/src/kmp_runtime.cpp"
, 8542); }
;
8543 KMP_DEBUG_ASSERT(**ptr == '%')if (!(**ptr == '%')) { __kmp_debug_assert("**ptr == '%'", "openmp/runtime/src/kmp_runtime.cpp"
, 8543); }
;
8544 KMP_DEBUG_ASSERT(field_buffer)if (!(field_buffer)) { __kmp_debug_assert("field_buffer", "openmp/runtime/src/kmp_runtime.cpp"
, 8544); }
;
8545
8546 __kmp_str_buf_clear(field_buffer);
8547
8548 // Skip the initial %
8549 (*ptr)++;
8550
8551 // Check for %% first
8552 if (**ptr == '%') {
8553 __kmp_str_buf_cat(field_buffer, "%", 1);
8554 (*ptr)++; // skip over the second %
8555 return 1;
8556 }
8557
8558 // Parse field modifiers if they are present
8559 pad_zeros = false;
8560 if (**ptr == '0') {
8561 pad_zeros = true;
8562 (*ptr)++; // skip over 0
8563 }
8564 right_justify = false;
8565 if (**ptr == '.') {
8566 right_justify = true;
8567 (*ptr)++; // skip over .
8568 }
8569 // Parse width of field: [width_left, width_right)
8570 width_left = width_right = NULL__null;
8571 if (**ptr >= '0' && **ptr <= '9') {
8572 width_left = *ptr;
8573 SKIP_DIGITS(*ptr){ while (*(*ptr) >= '0' && *(*ptr) <= '9') (*ptr
)++; }
;
8574 width_right = *ptr;
8575 }
8576
8577 // Create the format for KMP_SNPRINTF based on flags parsed above
8578 format_index = 0;
8579 format[format_index++] = '%';
8580 if (!right_justify)
8581 format[format_index++] = '-';
8582 if (pad_zeros)
8583 format[format_index++] = '0';
8584 if (width_left && width_right) {
8585 int i = 0;
8586 // Only allow 8 digit number widths.
8587 // This also prevents overflowing format variable
8588 while (i < 8 && width_left < width_right) {
8589 format[format_index++] = *width_left;
8590 width_left++;
8591 i++;
8592 }
8593 }
8594
8595 // Parse a name (long or short)
8596 // Canonicalize the name into absolute_short_name
8597 found_valid_name = false;
8598 parse_long_name = (**ptr == '{');
8599 if (parse_long_name)
8600 (*ptr)++; // skip initial left brace
8601 for (size_t i = 0; i < sizeof(__kmp_affinity_format_table) /
8602 sizeof(__kmp_affinity_format_table[0]);
8603 ++i) {
8604 char short_name = __kmp_affinity_format_table[i].short_name;
8605 const char *long_name = __kmp_affinity_format_table[i].long_name;
8606 char field_format = __kmp_affinity_format_table[i].field_format;
8607 if (parse_long_name) {
8608 size_t length = KMP_STRLENstrlen(long_name);
8609 if (strncmp(*ptr, long_name, length) == 0) {
8610 found_valid_name = true;
8611 (*ptr) += length; // skip the long name
8612 }
8613 } else if (**ptr == short_name) {
8614 found_valid_name = true;
8615 (*ptr)++; // skip the short name
8616 }
8617 if (found_valid_name) {
8618 format[format_index++] = field_format;
8619 format[format_index++] = '\0';
8620 absolute_short_name = short_name;
8621 break;
8622 }
8623 }
8624 if (parse_long_name) {
8625 if (**ptr != '}') {
8626 absolute_short_name = 0;
8627 } else {
8628 (*ptr)++; // skip over the right brace
8629 }
8630 }
8631
8632 // Attempt to fill the buffer with the requested
8633 // value using snprintf within __kmp_str_buf_print()
8634 switch (absolute_short_name) {
8635 case 't':
8636 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
8637 break;
8638 case 'T':
8639 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
8640 break;
8641 case 'L':
8642 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8643 break;
8644 case 'n':
8645 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
8646 break;
8647 case 'H': {
8648 static const int BUFFER_SIZE = 256;
8649 char buf[BUFFER_SIZE];
8650 __kmp_expand_host_name(buf, BUFFER_SIZE);
8651 rc = __kmp_str_buf_print(field_buffer, format, buf);
8652 } break;
8653 case 'P':
8654 rc = __kmp_str_buf_print(field_buffer, format, getpid());
8655 break;
8656 case 'i':
8657 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid()syscall(186));
8658 break;
8659 case 'N':
8660 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8661 break;
8662 case 'a':
8663 field_value =
8664 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8665 rc = __kmp_str_buf_print(field_buffer, format, field_value);
8666 break;
8667#if KMP_AFFINITY_SUPPORTED1
8668 case 'A': {
8669 kmp_str_buf_t buf;
8670 __kmp_str_buf_init(&buf){ (&buf)->str = (&buf)->bulk; (&buf)->size
= sizeof((&buf)->bulk); (&buf)->used = 0; (&
buf)->bulk[0] = 0; }
;
8671 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8672 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8673 __kmp_str_buf_free(&buf);
8674 } break;
8675#endif
8676 default:
8677 // According to spec, If an implementation does not have info for field
8678 // type, then "undefined" is printed
8679 rc = __kmp_str_buf_print(field_buffer, "%s", "undefined");
8680 // Skip the field
8681 if (parse_long_name) {
8682 SKIP_TOKEN(*ptr){ while ((*(*ptr) >= '0' && *(*ptr) <= '9') || (
*(*ptr) >= 'a' && *(*ptr) <= 'z') || (*(*ptr) >=
'A' && *(*ptr) <= 'Z') || *(*ptr) == '_') (*ptr)++
; }
;
8683 if (**ptr == '}')
8684 (*ptr)++;
8685 } else {
8686 (*ptr)++;
8687 }
8688 }
8689
8690 KMP_ASSERT(format_index <= FORMAT_SIZE)if (!(format_index <= FORMAT_SIZE)) { __kmp_debug_assert("format_index <= FORMAT_SIZE"
, "openmp/runtime/src/kmp_runtime.cpp", 8690); }
;
8691 return rc;
8692}
8693
8694/*
8695 * Return number of characters needed to hold the affinity string
8696 * (not including null byte character)
8697 * The resultant string is printed to buffer, which the caller can then
8698 * handle afterwards
8699 */
8700size_t __kmp_aux_capture_affinity(int gtid, const char *format,
8701 kmp_str_buf_t *buffer) {
8702 const char *parse_ptr;
8703 size_t retval;
8704 const kmp_info_t *th;
8705 kmp_str_buf_t field;
8706
8707 KMP_DEBUG_ASSERT(buffer)if (!(buffer)) { __kmp_debug_assert("buffer", "openmp/runtime/src/kmp_runtime.cpp"
, 8707); }
;
8708 KMP_DEBUG_ASSERT(gtid >= 0)if (!(gtid >= 0)) { __kmp_debug_assert("gtid >= 0", "openmp/runtime/src/kmp_runtime.cpp"
, 8708); }
;
8709
8710 __kmp_str_buf_init(&field){ (&field)->str = (&field)->bulk; (&field)->
size = sizeof((&field)->bulk); (&field)->used =
0; (&field)->bulk[0] = 0; }
;
8711 __kmp_str_buf_clear(buffer);
8712
8713 th = __kmp_threads[gtid];
8714 retval = 0;
8715
8716 // If format is NULL or zero-length string, then we use
8717 // affinity-format-var ICV
8718 parse_ptr = format;
8719 if (parse_ptr == NULL__null || *parse_ptr == '\0') {
8720 parse_ptr = __kmp_affinity_format;
8721 }
8722 KMP_DEBUG_ASSERT(parse_ptr)if (!(parse_ptr)) { __kmp_debug_assert("parse_ptr", "openmp/runtime/src/kmp_runtime.cpp"
, 8722); }
;
8723
8724 while (*parse_ptr != '\0') {
8725 // Parse a field
8726 if (*parse_ptr == '%') {
8727 // Put field in the buffer
8728 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8729 __kmp_str_buf_catbuf(buffer, &field);
8730 retval += rc;
8731 } else {
8732 // Put literal character in buffer
8733 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8734 retval++;
8735 parse_ptr++;
8736 }
8737 }
8738 __kmp_str_buf_free(&field);
8739 return retval;
8740}
8741
8742// Displays the affinity string to stdout
8743void __kmp_aux_display_affinity(int gtid, const char *format) {
8744 kmp_str_buf_t buf;
8745 __kmp_str_buf_init(&buf){ (&buf)->str = (&buf)->bulk; (&buf)->size
= sizeof((&buf)->bulk); (&buf)->used = 0; (&
buf)->bulk[0] = 0; }
;
8746 __kmp_aux_capture_affinity(gtid, format, &buf);
8747 __kmp_fprintf(kmp_out, "%s" KMP_END_OF_LINE"\n", buf.str);
8748 __kmp_str_buf_free(&buf);
8749}
8750
8751/* ------------------------------------------------------------------------ */
8752
8753void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid) {
8754 int blocktime = arg; /* argument is in milliseconds */
8755#if KMP_USE_MONITOR
8756 int bt_intervals;
8757#endif
8758 kmp_int8 bt_set;
8759
8760 __kmp_save_internal_controls(thread);
8761
8762 /* Normalize and set blocktime for the teams */
8763 if (blocktime < KMP_MIN_BLOCKTIME(0))
8764 blocktime = KMP_MIN_BLOCKTIME(0);
8765 else if (blocktime > KMP_MAX_BLOCKTIME(2147483647))
8766 blocktime = KMP_MAX_BLOCKTIME(2147483647);
8767
8768 set__blocktime_team(thread->th.th_team, tid, blocktime)(((thread->th.th_team)->t.t_threads[(tid)]->th.th_current_task
->td_icvs.blocktime) = (blocktime))
;
8769 set__blocktime_team(thread->th.th_serial_team, 0, blocktime)(((thread->th.th_serial_team)->t.t_threads[(0)]->th.
th_current_task->td_icvs.blocktime) = (blocktime))
;
8770
8771#if KMP_USE_MONITOR
8772 /* Calculate and set blocktime intervals for the teams */
8773 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8774
8775 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8776 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8777#endif
8778
8779 /* Set whether blocktime has been set to "TRUE" */
8780 bt_set = TRUE(!0);
8781
8782 set__bt_set_team(thread->th.th_team, tid, bt_set)(((thread->th.th_team)->t.t_threads[(tid)]->th.th_current_task
->td_icvs.bt_set) = (bt_set))
;
8783 set__bt_set_team(thread->th.th_serial_team, 0, bt_set)(((thread->th.th_serial_team)->t.t_threads[(0)]->th.
th_current_task->td_icvs.bt_set) = (bt_set))
;
8784#if KMP_USE_MONITOR
8785 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
"bt_intervals=%d, monitor_updates=%d\n", __kmp_gtid_from_tid
(tid, thread->th.th_team), thread->th.th_team->t.t_id
, tid, blocktime, bt_intervals, __kmp_monitor_wakeups); }
8786 "bt_intervals=%d, monitor_updates=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
"bt_intervals=%d, monitor_updates=%d\n", __kmp_gtid_from_tid
(tid, thread->th.th_team), thread->th.th_team->t.t_id
, tid, blocktime, bt_intervals, __kmp_monitor_wakeups); }
8787 __kmp_gtid_from_tid(tid, thread->th.th_team),if (kmp_f_debug >= 10) { __kmp_debug_printf ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
"bt_intervals=%d, monitor_updates=%d\n", __kmp_gtid_from_tid
(tid, thread->th.th_team), thread->th.th_team->t.t_id
, tid, blocktime, bt_intervals, __kmp_monitor_wakeups); }
8788 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,if (kmp_f_debug >= 10) { __kmp_debug_printf ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
"bt_intervals=%d, monitor_updates=%d\n", __kmp_gtid_from_tid
(tid, thread->th.th_team), thread->th.th_team->t.t_id
, tid, blocktime, bt_intervals, __kmp_monitor_wakeups); }
8789 __kmp_monitor_wakeups))if (kmp_f_debug >= 10) { __kmp_debug_printf ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
"bt_intervals=%d, monitor_updates=%d\n", __kmp_gtid_from_tid
(tid, thread->th.th_team), thread->th.th_team->t.t_id
, tid, blocktime, bt_intervals, __kmp_monitor_wakeups); }
;
8790#else
8791 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n"
, __kmp_gtid_from_tid(tid, thread->th.th_team), thread->
th.th_team->t.t_id, tid, blocktime); }
8792 __kmp_gtid_from_tid(tid, thread->th.th_team),if (kmp_f_debug >= 10) { __kmp_debug_printf ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n"
, __kmp_gtid_from_tid(tid, thread->th.th_team), thread->
th.th_team->t.t_id, tid, blocktime); }
8793 thread->th.th_team->t.t_id, tid, blocktime))if (kmp_f_debug >= 10) { __kmp_debug_printf ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n"
, __kmp_gtid_from_tid(tid, thread->th.th_team), thread->
th.th_team->t.t_id, tid, blocktime); }
;
8794#endif
8795}
8796
8797void __kmp_aux_set_defaults(char const *str, size_t len) {
8798 if (!__kmp_init_serial) {
8799 __kmp_serial_initialize();
8800 }
8801 __kmp_env_initialize(str);
8802
8803 if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {
8804 __kmp_env_print();
8805 }
8806} // __kmp_aux_set_defaults
8807
8808/* ------------------------------------------------------------------------ */
8809/* internal fast reduction routines */
8810
8811PACKED_REDUCTION_METHOD_T
8812__kmp_determine_reduction_method(
8813 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
8814 void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
8815 kmp_critical_name *lck) {
8816
8817 // Default reduction method: critical construct ( lck != NULL, like in current
8818 // PAROPT )
8819 // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method
8820 // can be selected by RTL
8821 // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method
8822 // can be selected by RTL
8823 // Finally, it's up to OpenMP RTL to make a decision on which method to select
8824 // among generated by PAROPT.
8825
8826 PACKED_REDUCTION_METHOD_T retval;
8827
8828 int team_size;
8829
8830 KMP_DEBUG_ASSERT(loc)if (!(loc)) { __kmp_debug_assert("loc", "openmp/runtime/src/kmp_runtime.cpp"
, 8830); }
; // it would be nice to test ( loc != 0 )
8831 KMP_DEBUG_ASSERT(lck)if (!(lck)) { __kmp_debug_assert("lck", "openmp/runtime/src/kmp_runtime.cpp"
, 8831); }
; // it would be nice to test ( lck != 0 )
8832
8833#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8834 (loc && \
8835 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)))
8836#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8837
8838 retval = critical_reduce_block;
8839
8840 // another choice of getting a team size (with 1 dynamic deference) is slower
8841 team_size = __kmp_get_team_num_threads(global_tid)(__kmp_threads[(global_tid)]->th.th_team->t.t_nproc);
8842 if (team_size == 1) {
8843
8844 retval = empty_reduce_block;
8845
8846 } else {
8847
8848 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8849
8850#if KMP_ARCH_X86_641 || KMP_ARCH_PPC64(0 || 0) || KMP_ARCH_AARCH640 || \
8851 KMP_ARCH_MIPS640 || KMP_ARCH_RISCV640 || KMP_ARCH_LOONGARCH640
8852
8853#if KMP_OS_LINUX1 || KMP_OS_DRAGONFLY0 || KMP_OS_FREEBSD0 || KMP_OS_NETBSD0 || \
8854 KMP_OS_OPENBSD0 || KMP_OS_WINDOWS0 || KMP_OS_DARWIN0 || KMP_OS_HURD0
8855
8856 int teamsize_cutoff = 4;
8857
8858#if KMP_MIC_SUPPORTED((0 || 1) && (1 || 0))
8859 if (__kmp_mic_type != non_mic) {
8860 teamsize_cutoff = 8;
8861 }
8862#endif
8863 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8864 if (tree_available) {
8865 if (team_size <= teamsize_cutoff) {
8866 if (atomic_available) {
8867 retval = atomic_reduce_block;
8868 }
8869 } else {
8870 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER(((tree_reduce_block) | (bs_reduction_barrier)));
8871 }
8872 } else if (atomic_available) {
8873 retval = atomic_reduce_block;
8874 }
8875#else
8876#error "Unknown or unsupported OS"
8877#endif // KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||
8878 // KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
8879
8880#elif KMP_ARCH_X860 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS0
8881
8882#if KMP_OS_LINUX1 || KMP_OS_FREEBSD0 || KMP_OS_WINDOWS0 || KMP_OS_HURD0
8883
8884 // basic tuning
8885
8886 if (atomic_available) {
8887 if (num_vars <= 2) { // && ( team_size <= 8 ) due to false-sharing ???
8888 retval = atomic_reduce_block;
8889 }
8890 } // otherwise: use critical section
8891
8892#elif KMP_OS_DARWIN0
8893
8894 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8895 if (atomic_available && (num_vars <= 3)) {
8896 retval = atomic_reduce_block;
8897 } else if (tree_available) {
8898 if ((reduce_size > (9 * sizeof(kmp_real64))) &&
8899 (reduce_size < (2000 * sizeof(kmp_real64)))) {
8900 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER(((tree_reduce_block) | (bs_plain_barrier)));
8901 }
8902 } // otherwise: use critical section
8903
8904#else
8905#error "Unknown or unsupported OS"
8906#endif
8907
8908#else
8909#error "Unknown or unsupported architecture"
8910#endif
8911 }
8912
8913 // KMP_FORCE_REDUCTION
8914
8915 // If the team is serialized (team_size == 1), ignore the forced reduction
8916 // method and stay with the unsynchronized method (empty_reduce_block)
8917 if (__kmp_force_reduction_method != reduction_method_not_defined &&
8918 team_size != 1) {
8919
8920 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
8921
8922 int atomic_available, tree_available;
8923
8924 switch ((forced_retval = __kmp_force_reduction_method)) {
8925 case critical_reduce_block:
8926 KMP_ASSERT(lck)if (!(lck)) { __kmp_debug_assert("lck", "openmp/runtime/src/kmp_runtime.cpp"
, 8926); }
; // lck should be != 0
8927 break;
8928
8929 case atomic_reduce_block:
8930 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8931 if (!atomic_available) {
8932 KMP_WARNING(RedMethodNotSupported, "atomic")__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_RedMethodNotSupported
, "atomic"), __kmp_msg_null)
;
8933 forced_retval = critical_reduce_block;
8934 }
8935 break;
8936
8937 case tree_reduce_block:
8938 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8939 if (!tree_available) {
8940 KMP_WARNING(RedMethodNotSupported, "tree")__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_RedMethodNotSupported
, "tree"), __kmp_msg_null)
;
8941 forced_retval = critical_reduce_block;
8942 } else {
8943#if KMP_FAST_REDUCTION_BARRIER1
8944 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER(((tree_reduce_block) | (bs_reduction_barrier)));
8945#endif
8946 }
8947 break;
8948
8949 default:
8950 KMP_ASSERT(0)if (!(0)) { __kmp_debug_assert("0", "openmp/runtime/src/kmp_runtime.cpp"
, 8950); }
; // "unsupported method specified"
8951 }
8952
8953 retval = forced_retval;
8954 }
8955
8956 KA_TRACE(10, ("reduction method selected=%08x\n", retval))if (kmp_a_debug >= 10) { __kmp_debug_printf ("reduction method selected=%08x\n"
, retval); }
;
8957
8958#undef FAST_REDUCTION_TREE_METHOD_GENERATED
8959#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
8960
8961 return (retval);
8962}
8963// this function is for testing set/get/determine reduce method
8964kmp_int32 __kmp_get_reduce_method(void) {
8965 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
8966}
8967
8968// Soft pause sets up threads to ignore blocktime and just go to sleep.
8969// Spin-wait code checks __kmp_pause_status and reacts accordingly.
8970void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
8971
8972// Hard pause shuts down the runtime completely. Resume happens naturally when
8973// OpenMP is used subsequently.
8974void __kmp_hard_pause() {
8975 __kmp_pause_status = kmp_hard_paused;
8976 __kmp_internal_end_thread(-1);
8977}
8978
8979// Soft resume sets __kmp_pause_status, and wakes up all threads.
8980void __kmp_resume_if_soft_paused() {
8981 if (__kmp_pause_status == kmp_soft_paused) {
8982 __kmp_pause_status = kmp_not_paused;
8983
8984 for (int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
8985 kmp_info_t *thread = __kmp_threads[gtid];
8986 if (thread) { // Wake it if sleeping
8987 kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
8988 thread);
8989 if (fl.is_sleeping())
8990 fl.resume(gtid);
8991 else if (__kmp_try_suspend_mx(thread)) { // got suspend lock
8992 __kmp_unlock_suspend_mx(thread); // unlock it; it won't sleep
8993 } else { // thread holds the lock and may sleep soon
8994 do { // until either the thread sleeps, or we can get the lock
8995 if (fl.is_sleeping()) {
8996 fl.resume(gtid);
8997 break;
8998 } else if (__kmp_try_suspend_mx(thread)) {
8999 __kmp_unlock_suspend_mx(thread);
9000 break;
9001 }
9002 } while (1);
9003 }
9004 }
9005 }
9006 }
9007}
9008
9009// This function is called via __kmpc_pause_resource. Returns 0 if successful.
9010// TODO: add warning messages
9011int __kmp_pause_resource(kmp_pause_status_t level) {
9012 if (level == kmp_not_paused) { // requesting resume
9013 if (__kmp_pause_status == kmp_not_paused) {
9014 // error message about runtime not being paused, so can't resume
9015 return 1;
9016 } else {
9017 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||if (!(__kmp_pause_status == kmp_soft_paused || __kmp_pause_status
== kmp_hard_paused)) { __kmp_debug_assert("__kmp_pause_status == kmp_soft_paused || __kmp_pause_status == kmp_hard_paused"
, "openmp/runtime/src/kmp_runtime.cpp", 9018); }
9018 __kmp_pause_status == kmp_hard_paused)if (!(__kmp_pause_status == kmp_soft_paused || __kmp_pause_status
== kmp_hard_paused)) { __kmp_debug_assert("__kmp_pause_status == kmp_soft_paused || __kmp_pause_status == kmp_hard_paused"
, "openmp/runtime/src/kmp_runtime.cpp", 9018); }
;
9019 __kmp_pause_status = kmp_not_paused;
9020 return 0;
9021 }
9022 } else if (level == kmp_soft_paused) { // requesting soft pause
9023 if (__kmp_pause_status != kmp_not_paused) {
9024 // error message about already being paused
9025 return 1;
9026 } else {
9027 __kmp_soft_pause();
9028 return 0;
9029 }
9030 } else if (level == kmp_hard_paused) { // requesting hard pause
9031 if (__kmp_pause_status != kmp_not_paused) {
9032 // error message about already being paused
9033 return 1;
9034 } else {
9035 __kmp_hard_pause();
9036 return 0;
9037 }
9038 } else {
9039 // error message about invalid level
9040 return 1;
9041 }
9042}
9043
9044void __kmp_omp_display_env(int verbose) {
9045 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
9046 if (__kmp_init_serial == 0)
9047 __kmp_do_serial_initialize();
9048 __kmp_display_env_impl(!verbose, verbose);
9049 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
9050}
9051
9052// The team size is changing, so distributed barrier must be modified
9053void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads,
9054 int new_nthreads) {
9055 KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==if (!(__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar
)) { __kmp_debug_assert("__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar"
, "openmp/runtime/src/kmp_runtime.cpp", 9056); }
9056 bp_dist_bar)if (!(__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar
)) { __kmp_debug_assert("__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar"
, "openmp/runtime/src/kmp_runtime.cpp", 9056); }
;
9057 kmp_info_t **other_threads = team->t.t_threads;
9058
9059 // We want all the workers to stop waiting on the barrier while we adjust the
9060 // size of the team.
9061 for (int f = 1; f < old_nthreads; ++f) {
9062 KMP_DEBUG_ASSERT(other_threads[f] != NULL)if (!(other_threads[f] != __null)) { __kmp_debug_assert("other_threads[f] != __null"
, "openmp/runtime/src/kmp_runtime.cpp", 9062); }
;
9063 // Ignore threads that are already inactive or not present in the team
9064 if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
9065 // teams construct causes thread_limit to get passed in, and some of
9066 // those could be inactive; just ignore them
9067 continue;
9068 }
9069 // If thread is transitioning still to in_use state, wait for it
9070 if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
9071 while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
9072 KMP_CPU_PAUSE()__kmp_x86_pause();
9073 }
9074 // The thread should be in_use now
9075 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1)if (!(team->t.t_threads[f]->th.th_used_in_team.load() ==
1)) { __kmp_debug_assert("team->t.t_threads[f]->th.th_used_in_team.load() == 1"
, "openmp/runtime/src/kmp_runtime.cpp", 9075); }
;
9076 // Transition to unused state
9077 team->t.t_threads[f]->th.th_used_in_team.store(2);
9078 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2)if (!(team->t.t_threads[f]->th.th_used_in_team.load() ==
2)) { __kmp_debug_assert("team->t.t_threads[f]->th.th_used_in_team.load() == 2"
, "openmp/runtime/src/kmp_runtime.cpp", 9078); }
;
9079 }
9080 // Release all the workers
9081 team->t.b->go_release();
9082
9083 KMP_MFENCE()if (__builtin_expect(!!(!__kmp_cpuinfo.initialized), 0)) { __kmp_query_cpuid
(&__kmp_cpuinfo); } if (__kmp_cpuinfo.flags.sse2) { __sync_synchronize
(); }
;
9084
9085 // Workers should see transition status 2 and move to 0; but may need to be
9086 // woken up first
9087 int count = old_nthreads - 1;
9088 while (count > 0) {
9089 count = old_nthreads - 1;
9090 for (int f = 1; f < old_nthreads; ++f) {
9091 if (other_threads[f]->th.th_used_in_team.load() != 0) {
9092 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) { // Wake up the workers
9093 kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(const_cast<void *>(other_threads[f]->th.th_sleep_loc
)
9094 void *, other_threads[f]->th.th_sleep_loc)const_cast<void *>(other_threads[f]->th.th_sleep_loc
)
;
9095 __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
9096 }
9097 } else {
9098 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0)if (!(team->t.t_threads[f]->th.th_used_in_team.load() ==
0)) { __kmp_debug_assert("team->t.t_threads[f]->th.th_used_in_team.load() == 0"
, "openmp/runtime/src/kmp_runtime.cpp", 9098); }
;
9099 count--;
9100 }
9101 }
9102 }
9103 // Now update the barrier size
9104 team->t.b->update_num_threads(new_nthreads);
9105 team->t.b->go_reset();
9106}
9107
9108void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads) {
9109 // Add the threads back to the team
9110 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 9110); }
;
9111 // Threads were paused and pointed at th_used_in_team temporarily during a
9112 // resize of the team. We're going to set th_used_in_team to 3 to indicate to
9113 // the thread that it should transition itself back into the team. Then, if
9114 // blocktime isn't infinite, the thread could be sleeping, so we send a resume
9115 // to wake it up.
9116 for (int f = 1; f < new_nthreads; ++f) {
9117 KMP_DEBUG_ASSERT(team->t.t_threads[f])if (!(team->t.t_threads[f])) { __kmp_debug_assert("team->t.t_threads[f]"
, "openmp/runtime/src/kmp_runtime.cpp", 9117); }
;
9118 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0,__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(team
->t.t_threads[f]->th.th_used_in_team)), (kmp_uint32)(0)
, (kmp_uint32)(3))
9119 3)__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(team
->t.t_threads[f]->th.th_used_in_team)), (kmp_uint32)(0)
, (kmp_uint32)(3))
;
9120 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) { // Wake up sleeping threads
9121 __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
9122 (kmp_flag_32<false, false> *)NULL__null);
9123 }
9124 }
9125 // The threads should be transitioning to the team; when they are done, they
9126 // should have set th_used_in_team to 1. This loop forces master to wait until
9127 // all threads have moved into the team and are waiting in the barrier.
9128 int count = new_nthreads - 1;
9129 while (count > 0) {
9130 count = new_nthreads - 1;
9131 for (int f = 1; f < new_nthreads; ++f) {
9132 if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
9133 count--;
9134 }
9135 }
9136 }
9137}
9138
9139// Globals and functions for hidden helper task
9140kmp_info_t **__kmp_hidden_helper_threads;
9141kmp_info_t *__kmp_hidden_helper_main_thread;
9142std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
9143#if KMP_OS_LINUX1
9144kmp_int32 __kmp_hidden_helper_threads_num = 8;
9145kmp_int32 __kmp_enable_hidden_helper = TRUE(!0);
9146#else
9147kmp_int32 __kmp_hidden_helper_threads_num = 0;
9148kmp_int32 __kmp_enable_hidden_helper = FALSE0;
9149#endif
9150
9151namespace {
9152std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;
9153
9154void __kmp_hidden_helper_wrapper_fn(int *gtid, int *, ...) {
9155 // This is an explicit synchronization on all hidden helper threads in case
9156 // that when a regular thread pushes a hidden helper task to one hidden
9157 // helper thread, the thread has not been awaken once since they're released
9158 // by the main thread after creating the team.
9159 KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num)(&__kmp_hit_hidden_helper_threads_num)->fetch_add(1, std
::memory_order_acq_rel)
;
9160 while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num)(&__kmp_hit_hidden_helper_threads_num)->load(std::memory_order_acquire
)
!=
9161 __kmp_hidden_helper_threads_num)
9162 ;
9163
9164 // If main thread, then wait for signal
9165 if (__kmpc_master(nullptr, *gtid)) {
9166 // First, unset the initial state and release the initial thread
9167 TCW_4(__kmp_init_hidden_helper_threads, FALSE)(__kmp_init_hidden_helper_threads) = (0);
9168 __kmp_hidden_helper_initz_release();
9169 __kmp_hidden_helper_main_thread_wait();
9170 // Now wake up all worker threads
9171 for (int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) {
9172 __kmp_hidden_helper_worker_thread_signal();
9173 }
9174 }
9175}
9176} // namespace
9177
9178void __kmp_hidden_helper_threads_initz_routine() {
9179 // Create a new root for hidden helper team/threads
9180 const int gtid = __kmp_register_root(TRUE(!0));
9181 __kmp_hidden_helper_main_thread = __kmp_threads[gtid];
9182 __kmp_hidden_helper_threads = &__kmp_threads[gtid];
9183 __kmp_hidden_helper_main_thread->th.th_set_nproc =
9184 __kmp_hidden_helper_threads_num;
9185
9186 KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0)(&__kmp_hit_hidden_helper_threads_num)->store(0, std::
memory_order_release)
;
9187
9188 __kmpc_fork_call(nullptr, 0, __kmp_hidden_helper_wrapper_fn);
9189
9190 // Set the initialization flag to FALSE
9191 TCW_SYNC_4(__kmp_init_hidden_helper, FALSE)(__kmp_init_hidden_helper) = (0);
9192
9193 __kmp_hidden_helper_threads_deinitz_release();
9194}
9195
9196/* Nesting Mode:
9197 Set via KMP_NESTING_MODE, which takes an integer.
9198 Note: we skip duplicate topology levels, and skip levels with only
9199 one entity.
9200 KMP_NESTING_MODE=0 is the default, and doesn't use nesting mode.
9201 KMP_NESTING_MODE=1 sets as many nesting levels as there are distinct levels
9202 in the topology, and initializes the number of threads at each of those
9203 levels to the number of entities at each level, respectively, below the
9204 entity at the parent level.
9205 KMP_NESTING_MODE=N, where N>1, attempts to create up to N nesting levels,
9206 but starts with nesting OFF -- max-active-levels-var is 1 -- and requires
9207 the user to turn nesting on explicitly. This is an even more experimental
9208 option to this experimental feature, and may change or go away in the
9209 future.
9210*/
9211
9212// Allocate space to store nesting levels
9213void __kmp_init_nesting_mode() {
9214 int levels = KMP_HW_LAST;
9215 __kmp_nesting_mode_nlevels = levels;
9216 __kmp_nesting_nth_level = (int *)KMP_INTERNAL_MALLOC(levels * sizeof(int))malloc(levels * sizeof(int));
9217 for (int i = 0; i < levels; ++i)
9218 __kmp_nesting_nth_level[i] = 0;
9219 if (__kmp_nested_nth.size < levels) {
9220 __kmp_nested_nth.nth =
9221 (int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels * sizeof(int))realloc((__kmp_nested_nth.nth), (levels * sizeof(int)));
9222 __kmp_nested_nth.size = levels;
9223 }
9224}
9225
9226// Set # threads for top levels of nesting; must be called after topology set
9227void __kmp_set_nesting_mode_threads() {
9228 kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()__kmp_get_global_thread_id_reg()];
9229
9230 if (__kmp_nesting_mode == 1)
9231 __kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT2147483647;
9232 else if (__kmp_nesting_mode > 1)
9233 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9234
9235 if (__kmp_topology) { // use topology info
9236 int loc, hw_level;
9237 for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() &&
9238 loc < __kmp_nesting_mode_nlevels;
9239 loc++, hw_level++) {
9240 __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level);
9241 if (__kmp_nesting_nth_level[loc] == 1)
9242 loc--;
9243 }
9244 // Make sure all cores are used
9245 if (__kmp_nesting_mode > 1 && loc > 1) {
9246 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
9247 int num_cores = __kmp_topology->get_count(core_level);
9248 int upper_levels = 1;
9249 for (int level = 0; level < loc - 1; ++level)
9250 upper_levels *= __kmp_nesting_nth_level[level];
9251 if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores)
9252 __kmp_nesting_nth_level[loc - 1] =
9253 num_cores / __kmp_nesting_nth_level[loc - 2];
9254 }
9255 __kmp_nesting_mode_nlevels = loc;
9256 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9257 } else { // no topology info available; provide a reasonable guesstimation
9258 if (__kmp_avail_proc >= 4) {
9259 __kmp_nesting_nth_level[0] = __kmp_avail_proc / 2;
9260 __kmp_nesting_nth_level[1] = 2;
9261 __kmp_nesting_mode_nlevels = 2;
9262 } else {
9263 __kmp_nesting_nth_level[0] = __kmp_avail_proc;
9264 __kmp_nesting_mode_nlevels = 1;
9265 }
9266 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9267 }
9268 for (int i = 0; i < __kmp_nesting_mode_nlevels; ++i) {
9269 __kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i];
9270 }
9271 set__nproc(thread, __kmp_nesting_nth_level[0])(((thread)->th.th_current_task->td_icvs.nproc) = (__kmp_nesting_nth_level
[0]))
;
9272 if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode)
9273 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9274 if (get__max_active_levels(thread)((thread)->th.th_current_task->td_icvs.max_active_levels
)
> 1) {
9275 // if max levels was set, set nesting mode levels to same
9276 __kmp_nesting_mode_nlevels = get__max_active_levels(thread)((thread)->th.th_current_task->td_icvs.max_active_levels
)
;
9277 }
9278 if (__kmp_nesting_mode == 1) // turn on nesting for this case only
9279 set__max_active_levels(thread, __kmp_nesting_mode_nlevels)(((thread)->th.th_current_task->td_icvs.max_active_levels
) = (__kmp_nesting_mode_nlevels))
;
9280}
9281
9282// Empty symbols to export (see exports_so.txt) when feature is disabled
9283extern "C" {
9284#if !KMP_STATS_ENABLED0
9285void __kmp_reset_stats() {}
9286#endif
9287#if !USE_DEBUGGER0
9288int __kmp_omp_debug_struct_info = FALSE0;
9289int __kmp_debugging = FALSE0;
9290#endif
9291#if !USE_ITT_BUILD1 || !USE_ITT_NOTIFY1
9292void __kmp_itt_fini_ittlib() {}
9293void __kmp_itt_init_ittlib() {}
9294#endif
9295}
9296
9297// end of file