Bug Summary

File:build/source/openmp/runtime/src/kmp_runtime.cpp
Warning:line 2569, column 34
Dereference of null pointer

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name kmp_runtime.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-16/lib/clang/16 -I projects/openmp/runtime/src -I /build/source/openmp/runtime/src -I include -I /build/source/llvm/include -I /build/source/openmp/runtime/src/i18n -I /build/source/openmp/runtime/src/include -I /build/source/openmp/runtime/src/thirdparty/ittnotify -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D omp_EXPORTS -D _FORTIFY_SOURCE=2 -D NDEBUG -D _GNU_SOURCE -D _REENTRANT -D _FORTIFY_SOURCE=2 -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-16/lib/clang/16/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/source/= -source-date-epoch 1673561342 -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -Wno-extra -Wno-pedantic -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-frame-address -Wno-strict-aliasing -Wno-stringop-truncation -Wno-switch -Wno-uninitialized -Wno-cast-qual -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fno-rtti -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2023-01-13-042150-16221-1 -x c++ /build/source/openmp/runtime/src/kmp_runtime.cpp
1/*
2 * kmp_runtime.cpp -- KPTS runtime support library
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#include "kmp.h"
14#include "kmp_affinity.h"
15#include "kmp_atomic.h"
16#include "kmp_environment.h"
17#include "kmp_error.h"
18#include "kmp_i18n.h"
19#include "kmp_io.h"
20#include "kmp_itt.h"
21#include "kmp_settings.h"
22#include "kmp_stats.h"
23#include "kmp_str.h"
24#include "kmp_wait_release.h"
25#include "kmp_wrapper_getpid.h"
26#include "kmp_dispatch.h"
27#if KMP_USE_HIER_SCHED0
28#include "kmp_dispatch_hier.h"
29#endif
30
31#if OMPT_SUPPORT1
32#include "ompt-specific.h"
33#endif
34#if OMPD_SUPPORT1
35#include "ompd-specific.h"
36#endif
37
38#if OMP_PROFILING_SUPPORT0
39#include "llvm/Support/TimeProfiler.h"
40static char *ProfileTraceFile = nullptr;
41#endif
42
43/* these are temporary issues to be dealt with */
44#define KMP_USE_PRCTL0 0
45
46#if KMP_OS_WINDOWS0
47#include <process.h>
48#endif
49
50#if KMP_OS_WINDOWS0
51// windows does not need include files as it doesn't use shared memory
52#else
53#include <sys/mman.h>
54#include <sys/stat.h>
55#include <fcntl.h>
56#define SHM_SIZE1024 1024
57#endif
58
59#if defined(KMP_GOMP_COMPAT)
60char const __kmp_version_alt_comp[] =
61 KMP_VERSION_PREFIX"\x00@(#) " "LLVM OMP " "alternative compiler support: yes";
62#endif /* defined(KMP_GOMP_COMPAT) */
63
64char const __kmp_version_omp_api[] =
65 KMP_VERSION_PREFIX"\x00@(#) " "LLVM OMP " "API version: 5.0 (201611)";
66
67#ifdef KMP_DEBUG1
68char const __kmp_version_lock[] =
69 KMP_VERSION_PREFIX"\x00@(#) " "LLVM OMP " "lock type: run time selectable";
70#endif /* KMP_DEBUG */
71
72#define KMP_MIN(x, y)((x) < (y) ? (x) : (y)) ((x) < (y) ? (x) : (y))
73
74/* ------------------------------------------------------------------------ */
75
76#if KMP_USE_MONITOR
77kmp_info_t __kmp_monitor;
78#endif
79
80/* Forward declarations */
81
82void __kmp_cleanup(void);
83
84static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *, int tid,
85 int gtid);
86static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
87 kmp_internal_control_t *new_icvs,
88 ident_t *loc);
89#if KMP_AFFINITY_SUPPORTED1
90static void __kmp_partition_places(kmp_team_t *team,
91 int update_master_only = 0);
92#endif
93static void __kmp_do_serial_initialize(void);
94void __kmp_fork_barrier(int gtid, int tid);
95void __kmp_join_barrier(int gtid);
96void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc,
97 kmp_internal_control_t *new_icvs, ident_t *loc);
98
99#ifdef USE_LOAD_BALANCE1
100static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc);
101#endif
102
103static int __kmp_expand_threads(int nNeed);
104#if KMP_OS_WINDOWS0
105static int __kmp_unregister_root_other_thread(int gtid);
106#endif
107static void __kmp_reap_thread(kmp_info_t *thread, int is_root);
108kmp_info_t *__kmp_thread_pool_insert_pt = NULL__null;
109
110void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads,
111 int new_nthreads);
112void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads);
113
114/* Calculate the identifier of the current thread */
115/* fast (and somewhat portable) way to get unique identifier of executing
116 thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */
117int __kmp_get_global_thread_id() {
118 int i;
119 kmp_info_t **other_threads;
120 size_t stack_data;
121 char *stack_addr;
122 size_t stack_size;
123 char *stack_base;
124
125 KA_TRACE(if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n"
, __kmp_nth, __kmp_all_nth); }
126 1000,if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n"
, __kmp_nth, __kmp_all_nth); }
127 ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n"
, __kmp_nth, __kmp_all_nth); }
128 __kmp_nth, __kmp_all_nth))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n"
, __kmp_nth, __kmp_all_nth); }
;
129
130 /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to
131 a parallel region, made it return KMP_GTID_DNE to force serial_initialize
132 by caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee
133 __kmp_init_gtid for this to work. */
134
135 if (!TCR_4(__kmp_init_gtid)(__kmp_init_gtid))
136 return KMP_GTID_DNE(-2);
137
138#ifdef KMP_TDATA_GTID1
139 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) >= 3) {
140 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using TDATA\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: using TDATA\n"
); }
;
141 return __kmp_gtid;
142 }
143#endif
144 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) >= 2) {
145 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using keyed TLS\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: using keyed TLS\n"
); }
;
146 return __kmp_gtid_get_specific();
147 }
148 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using internal alg.\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: using internal alg.\n"
); }
;
149
150 stack_addr = (char *)&stack_data;
151 other_threads = __kmp_threads;
152
153 /* ATT: The code below is a source of potential bugs due to unsynchronized
154 access to __kmp_threads array. For example:
155 1. Current thread loads other_threads[i] to thr and checks it, it is
156 non-NULL.
157 2. Current thread is suspended by OS.
158 3. Another thread unregisters and finishes (debug versions of free()
159 may fill memory with something like 0xEF).
160 4. Current thread is resumed.
161 5. Current thread reads junk from *thr.
162 TODO: Fix it. --ln */
163
164 for (i = 0; i < __kmp_threads_capacity; i++) {
165
166 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i])((void *)(other_threads[i]));
167 if (!thr)
168 continue;
169
170 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize)((void *)(thr->th.th_info.ds.ds_stacksize));
171 stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase)((void *)(thr->th.th_info.ds.ds_stackbase));
172
173 /* stack grows down -- search through all of the active threads */
174
175 if (stack_addr <= stack_base) {
176 size_t stack_diff = stack_base - stack_addr;
177
178 if (stack_diff <= stack_size) {
179 /* The only way we can be closer than the allocated */
180 /* stack size is if we are running on this thread. */
181 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i)if (!(__kmp_gtid_get_specific() == i)) { __kmp_debug_assert("__kmp_gtid_get_specific() == i"
, "openmp/runtime/src/kmp_runtime.cpp", 181); }
;
182 return i;
183 }
184 }
185 }
186
187 /* get specific to try and determine our gtid */
188 KA_TRACE(1000,if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: internal alg. failed to find "
"thread, using TLS\n"); }
189 ("*** __kmp_get_global_thread_id: internal alg. failed to find "if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: internal alg. failed to find "
"thread, using TLS\n"); }
190 "thread, using TLS\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: internal alg. failed to find "
"thread, using TLS\n"); }
;
191 i = __kmp_gtid_get_specific();
192
193 /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */
194
195 /* if we havn't been assigned a gtid, then return code */
196 if (i < 0)
197 return i;
198
199 /* dynamically updated stack window for uber threads to avoid get_specific
200 call */
201 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
202 KMP_FATAL(StackOverflow, i)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_StackOverflow, i), __kmp_msg_null
)
;
203 }
204
205 stack_base = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
206 if (stack_addr > stack_base) {
207 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr)((other_threads[i]->th.th_info.ds.ds_stackbase)) = ((stack_addr
))
;
208 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,((other_threads[i]->th.th_info.ds.ds_stacksize)) = ((other_threads
[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base)
)
209 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -((other_threads[i]->th.th_info.ds.ds_stacksize)) = ((other_threads
[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base)
)
210 stack_base)((other_threads[i]->th.th_info.ds.ds_stacksize)) = ((other_threads
[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base)
)
;
211 } else {
212 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,((other_threads[i]->th.th_info.ds.ds_stacksize)) = ((stack_base
- stack_addr))
213 stack_base - stack_addr)((other_threads[i]->th.th_info.ds.ds_stacksize)) = ((stack_base
- stack_addr))
;
214 }
215
216 /* Reprint stack bounds for ubermaster since they have been refined */
217 if (__kmp_storage_map) {
218 char *stack_end = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
219 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
220 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
221 other_threads[i]->th.th_info.ds.ds_stacksize,
222 "th_%d stack (refinement)", i);
223 }
224 return i;
225}
226
227int __kmp_get_global_thread_id_reg() {
228 int gtid;
229
230 if (!__kmp_init_serial) {
231 gtid = KMP_GTID_DNE(-2);
232 } else
233#ifdef KMP_TDATA_GTID1
234 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) >= 3) {
235 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using TDATA\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id_reg: using TDATA\n"
); }
;
236 gtid = __kmp_gtid;
237 } else
238#endif
239 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) >= 2) {
240 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using keyed TLS\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id_reg: using keyed TLS\n"
); }
;
241 gtid = __kmp_gtid_get_specific();
242 } else {
243 KA_TRACE(1000,if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id_reg: using internal alg.\n"
); }
244 ("*** __kmp_get_global_thread_id_reg: using internal alg.\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id_reg: using internal alg.\n"
); }
;
245 gtid = __kmp_get_global_thread_id();
246 }
247
248 /* we must be a new uber master sibling thread */
249 if (gtid == KMP_GTID_DNE(-2)) {
250 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_get_global_thread_id_reg: Encountered new root thread. "
"Registering a new gtid.\n"); }
251 ("__kmp_get_global_thread_id_reg: Encountered new root thread. "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_get_global_thread_id_reg: Encountered new root thread. "
"Registering a new gtid.\n"); }
252 "Registering a new gtid.\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_get_global_thread_id_reg: Encountered new root thread. "
"Registering a new gtid.\n"); }
;
253 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
254 if (!__kmp_init_serial) {
255 __kmp_do_serial_initialize();
256 gtid = __kmp_gtid_get_specific();
257 } else {
258 gtid = __kmp_register_root(FALSE0);
259 }
260 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
261 /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */
262 }
263
264 KMP_DEBUG_ASSERT(gtid >= 0)if (!(gtid >= 0)) { __kmp_debug_assert("gtid >= 0", "openmp/runtime/src/kmp_runtime.cpp"
, 264); }
;
265
266 return gtid;
267}
268
269/* caller must hold forkjoin_lock */
270void __kmp_check_stack_overlap(kmp_info_t *th) {
271 int f;
272 char *stack_beg = NULL__null;
273 char *stack_end = NULL__null;
274 int gtid;
275
276 KA_TRACE(10, ("__kmp_check_stack_overlap: called\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_check_stack_overlap: called\n"
); }
;
277 if (__kmp_storage_map) {
278 stack_end = (char *)th->th.th_info.ds.ds_stackbase;
279 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
280
281 gtid = __kmp_gtid_from_thread(th);
282
283 if (gtid == KMP_GTID_MONITOR(-4)) {
284 __kmp_print_storage_map_gtid(
285 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
286 "th_%s stack (%s)", "mon",
287 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
288 } else {
289 __kmp_print_storage_map_gtid(
290 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
291 "th_%d stack (%s)", gtid,
292 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
293 }
294 }
295
296 /* No point in checking ubermaster threads since they use refinement and
297 * cannot overlap */
298 gtid = __kmp_gtid_from_thread(th);
299 if (__kmp_env_checks == TRUE(!0) && !KMP_UBER_GTID(gtid)) {
300 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_check_stack_overlap: performing extensive checking\n"
); }
301 ("__kmp_check_stack_overlap: performing extensive checking\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_check_stack_overlap: performing extensive checking\n"
); }
;
302 if (stack_beg == NULL__null) {
303 stack_end = (char *)th->th.th_info.ds.ds_stackbase;
304 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
305 }
306
307 for (f = 0; f < __kmp_threads_capacity; f++) {
308 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f])((void *)(__kmp_threads[f]));
309
310 if (f_th && f_th != th) {
311 char *other_stack_end =
312 (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase)((void *)(f_th->th.th_info.ds.ds_stackbase));
313 char *other_stack_beg =
314 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize)((void *)(f_th->th.th_info.ds.ds_stacksize));
315 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
316 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
317
318 /* Print the other stack values before the abort */
319 if (__kmp_storage_map)
320 __kmp_print_storage_map_gtid(
321 -1, other_stack_beg, other_stack_end,
322 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize)((void *)(f_th->th.th_info.ds.ds_stacksize)),
323 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
324
325 __kmp_fatal(KMP_MSG(StackOverlap)__kmp_msg_format(kmp_i18n_msg_StackOverlap), KMP_HNT(ChangeStackLimit)__kmp_msg_format(kmp_i18n_hnt_ChangeStackLimit),
326 __kmp_msg_null);
327 }
328 }
329 }
330 }
331 KA_TRACE(10, ("__kmp_check_stack_overlap: returning\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_check_stack_overlap: returning\n"
); }
;
332}
333
334/* ------------------------------------------------------------------------ */
335
336void __kmp_infinite_loop(void) {
337 static int done = FALSE0;
338
339 while (!done) {
340 KMP_YIELD(TRUE){ __kmp_x86_pause(); if (((!0)) && (((__kmp_use_yield
== 1) || (__kmp_use_yield == 2 && (((__kmp_nth) >
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))))))) __kmp_yield
(); }
;
341 }
342}
343
344#define MAX_MESSAGE512 512
345
346void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, size_t size,
347 char const *format, ...) {
348 char buffer[MAX_MESSAGE512];
349 va_list ap;
350
351 va_start(ap, format)__builtin_va_start(ap, format);
352 KMP_SNPRINTFsnprintf(buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1,
353 p2, (unsigned long)size, format);
354 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
355 __kmp_vprintf(kmp_err, buffer, ap);
356#if KMP_PRINT_DATA_PLACEMENT
357 int node;
358 if (gtid >= 0) {
359 if (p1 <= p2 && (char *)p2 - (char *)p1 == size) {
360 if (__kmp_storage_map_verbose) {
361 node = __kmp_get_host_node(p1);
362 if (node < 0) /* doesn't work, so don't try this next time */
363 __kmp_storage_map_verbose = FALSE0;
364 else {
365 char *last;
366 int lastNode;
367 int localProc = __kmp_get_cpu_from_gtid(gtid);
368
369 const int page_size = KMP_GET_PAGE_SIZE()getpagesize();
370
371 p1 = (void *)((size_t)p1 & ~((size_t)page_size - 1));
372 p2 = (void *)(((size_t)p2 - 1) & ~((size_t)page_size - 1));
373 if (localProc >= 0)
374 __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid,
375 localProc >> 1);
376 else
377 __kmp_printf_no_lock(" GTID %d\n", gtid);
378#if KMP_USE_PRCTL0
379 /* The more elaborate format is disabled for now because of the prctl
380 * hanging bug. */
381 do {
382 last = p1;
383 lastNode = node;
384 /* This loop collates adjacent pages with the same host node. */
385 do {
386 (char *)p1 += page_size;
387 } while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
388 __kmp_printf_no_lock(" %p-%p memNode %d\n", last, (char *)p1 - 1,
389 lastNode);
390 } while (p1 <= p2);
391#else
392 __kmp_printf_no_lock(" %p-%p memNode %d\n", p1,
393 (char *)p1 + (page_size - 1),
394 __kmp_get_host_node(p1));
395 if (p1 < p2) {
396 __kmp_printf_no_lock(" %p-%p memNode %d\n", p2,
397 (char *)p2 + (page_size - 1),
398 __kmp_get_host_node(p2));
399 }
400#endif
401 }
402 }
403 } else
404 __kmp_printf_no_lock(" %s\n", KMP_I18N_STR(StorageMapWarning)__kmp_i18n_catgets(kmp_i18n_str_StorageMapWarning));
405 }
406#endif /* KMP_PRINT_DATA_PLACEMENT */
407 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
408}
409
410void __kmp_warn(char const *format, ...) {
411 char buffer[MAX_MESSAGE512];
412 va_list ap;
413
414 if (__kmp_generate_warnings == kmp_warnings_off) {
415 return;
416 }
417
418 va_start(ap, format)__builtin_va_start(ap, format);
419
420 KMP_SNPRINTFsnprintf(buffer, sizeof(buffer), "OMP warning: %s\n", format);
421 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
422 __kmp_vprintf(kmp_err, buffer, ap);
423 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
424
425 va_end(ap)__builtin_va_end(ap);
426}
427
428void __kmp_abort_process() {
429 // Later threads may stall here, but that's ok because abort() will kill them.
430 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
431
432 if (__kmp_debug_buf) {
433 __kmp_dump_debug_buffer();
434 }
435
436 if (KMP_OS_WINDOWS0) {
437 // Let other threads know of abnormal termination and prevent deadlock
438 // if abort happened during library initialization or shutdown
439 __kmp_global.g.g_abort = SIGABRT6;
440
441 /* On Windows* OS by default abort() causes pop-up error box, which stalls
442 nightly testing. Unfortunately, we cannot reliably suppress pop-up error
443 boxes. _set_abort_behavior() works well, but this function is not
444 available in VS7 (this is not problem for DLL, but it is a problem for
445 static OpenMP RTL). SetErrorMode (and so, timelimit utility) does not
446 help, at least in some versions of MS C RTL.
447
448 It seems following sequence is the only way to simulate abort() and
449 avoid pop-up error box. */
450 raise(SIGABRT6);
451 _exit(3); // Just in case, if signal ignored, exit anyway.
452 } else {
453 __kmp_unregister_library();
454 abort();
455 }
456
457 __kmp_infinite_loop();
458 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
459
460} // __kmp_abort_process
461
462void __kmp_abort_thread(void) {
463 // TODO: Eliminate g_abort global variable and this function.
464 // In case of abort just call abort(), it will kill all the threads.
465 __kmp_infinite_loop();
466} // __kmp_abort_thread
467
468/* Print out the storage map for the major kmp_info_t thread data structures
469 that are allocated together. */
470
471static void __kmp_print_thread_storage_map(kmp_info_t *thr, int gtid) {
472 __kmp_print_storage_map_gtid(gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d",
473 gtid);
474
475 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
476 sizeof(kmp_desc_t), "th_%d.th_info", gtid);
477
478 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
479 sizeof(kmp_local_t), "th_%d.th_local", gtid);
480
481 __kmp_print_storage_map_gtid(
482 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
483 sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid);
484
485 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
486 &thr->th.th_bar[bs_plain_barrier + 1],
487 sizeof(kmp_balign_t), "th_%d.th_bar[plain]",
488 gtid);
489
490 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
491 &thr->th.th_bar[bs_forkjoin_barrier + 1],
492 sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]",
493 gtid);
494
495#if KMP_FAST_REDUCTION_BARRIER1
496 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
497 &thr->th.th_bar[bs_reduction_barrier + 1],
498 sizeof(kmp_balign_t), "th_%d.th_bar[reduction]",
499 gtid);
500#endif // KMP_FAST_REDUCTION_BARRIER
501}
502
503/* Print out the storage map for the major kmp_team_t team data structures
504 that are allocated together. */
505
506static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team,
507 int team_id, int num_thr) {
508 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
509 __kmp_print_storage_map_gtid(-1, team, team + 1, sizeof(kmp_team_t), "%s_%d",
510 header, team_id);
511
512 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
513 &team->t.t_bar[bs_last_barrier],
514 sizeof(kmp_balign_team_t) * bs_last_barrier,
515 "%s_%d.t_bar", header, team_id);
516
517 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
518 &team->t.t_bar[bs_plain_barrier + 1],
519 sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]",
520 header, team_id);
521
522 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
523 &team->t.t_bar[bs_forkjoin_barrier + 1],
524 sizeof(kmp_balign_team_t),
525 "%s_%d.t_bar[forkjoin]", header, team_id);
526
527#if KMP_FAST_REDUCTION_BARRIER1
528 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
529 &team->t.t_bar[bs_reduction_barrier + 1],
530 sizeof(kmp_balign_team_t),
531 "%s_%d.t_bar[reduction]", header, team_id);
532#endif // KMP_FAST_REDUCTION_BARRIER
533
534 __kmp_print_storage_map_gtid(
535 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
536 sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id);
537
538 __kmp_print_storage_map_gtid(
539 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
540 sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id);
541
542 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
543 &team->t.t_disp_buffer[num_disp_buff],
544 sizeof(dispatch_shared_info_t) * num_disp_buff,
545 "%s_%d.t_disp_buffer", header, team_id);
546}
547
548static void __kmp_init_allocator() {
549 __kmp_init_memkind();
550 __kmp_init_target_mem();
551}
552static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
553
554/* ------------------------------------------------------------------------ */
555
556#if KMP_DYNAMIC_LIB1
557#if KMP_OS_WINDOWS0
558
559BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
560 //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
561
562 switch (fdwReason) {
563
564 case DLL_PROCESS_ATTACH:
565 KA_TRACE(10, ("DllMain: PROCESS_ATTACH\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("DllMain: PROCESS_ATTACH\n"
); }
;
566
567 return TRUE(!0);
568
569 case DLL_PROCESS_DETACH:
570 KA_TRACE(10, ("DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()))if (kmp_a_debug >= 10) { __kmp_debug_printf ("DllMain: PROCESS_DETACH T#%d\n"
, __kmp_gtid_get_specific()); }
;
571
572 // According to Windows* documentation for DllMain entry point:
573 // for DLL_PROCESS_DETACH, lpReserved is used for telling the difference:
574 // lpReserved == NULL when FreeLibrary() is called,
575 // lpReserved != NULL when the process is terminated.
576 // When FreeLibrary() is called, worker threads remain alive. So the
577 // runtime's state is consistent and executing proper shutdown is OK.
578 // When the process is terminated, worker threads have exited or been
579 // forcefully terminated by the OS and only the shutdown thread remains.
580 // This can leave the runtime in an inconsistent state.
581 // Hence, only attempt proper cleanup when FreeLibrary() is called.
582 // Otherwise, rely on OS to reclaim resources.
583 if (lpReserved == NULL__null)
584 __kmp_internal_end_library(__kmp_gtid_get_specific());
585
586 return TRUE(!0);
587
588 case DLL_THREAD_ATTACH:
589 KA_TRACE(10, ("DllMain: THREAD_ATTACH\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("DllMain: THREAD_ATTACH\n"
); }
;
590
591 /* if we want to register new siblings all the time here call
592 * __kmp_get_gtid(); */
593 return TRUE(!0);
594
595 case DLL_THREAD_DETACH:
596 KA_TRACE(10, ("DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()))if (kmp_a_debug >= 10) { __kmp_debug_printf ("DllMain: THREAD_DETACH T#%d\n"
, __kmp_gtid_get_specific()); }
;
597
598 __kmp_internal_end_thread(__kmp_gtid_get_specific());
599 return TRUE(!0);
600 }
601
602 return TRUE(!0);
603}
604
605#endif /* KMP_OS_WINDOWS */
606#endif /* KMP_DYNAMIC_LIB */
607
608/* __kmp_parallel_deo -- Wait until it's our turn. */
609void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
610 int gtid = *gtid_ref;
611#ifdef BUILD_PARALLEL_ORDERED1
612 kmp_team_t *team = __kmp_team_from_gtid(gtid);
613#endif /* BUILD_PARALLEL_ORDERED */
614
615 if (__kmp_env_consistency_check) {
616 if (__kmp_threads[gtid]->th.th_root->r.r_active)
617#if KMP_USE_DYNAMIC_LOCK1
618 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL__null, 0);
619#else
620 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL__null);
621#endif
622 }
623#ifdef BUILD_PARALLEL_ORDERED1
624 if (!team->t.t_serialized) {
625 KMP_MB();
626 KMP_WAIT__kmp_wait_4(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ__kmp_eq_4,
627 NULL__null);
628 KMP_MB();
629 }
630#endif /* BUILD_PARALLEL_ORDERED */
631}
632
633/* __kmp_parallel_dxo -- Signal the next task. */
634void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
635 int gtid = *gtid_ref;
636#ifdef BUILD_PARALLEL_ORDERED1
637 int tid = __kmp_tid_from_gtid(gtid);
638 kmp_team_t *team = __kmp_team_from_gtid(gtid);
639#endif /* BUILD_PARALLEL_ORDERED */
640
641 if (__kmp_env_consistency_check) {
642 if (__kmp_threads[gtid]->th.th_root->r.r_active)
643 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
644 }
645#ifdef BUILD_PARALLEL_ORDERED1
646 if (!team->t.t_serialized) {
647 KMP_MB(); /* Flush all pending memory write invalidates. */
648
649 /* use the tid of the next thread in this team */
650 /* TODO replace with general release procedure */
651 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
652
653 KMP_MB(); /* Flush all pending memory write invalidates. */
654 }
655#endif /* BUILD_PARALLEL_ORDERED */
656}
657
658/* ------------------------------------------------------------------------ */
659/* The BARRIER for a SINGLE process section is always explicit */
660
661int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws) {
662 int status;
663 kmp_info_t *th;
664 kmp_team_t *team;
665
666 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
667 __kmp_parallel_initialize();
668 __kmp_resume_if_soft_paused();
669
670 th = __kmp_threads[gtid];
671 team = th->th.th_team;
672 status = 0;
673
674 th->th.th_ident = id_ref;
675
676 if (team->t.t_serialized) {
677 status = 1;
678 } else {
679 kmp_int32 old_this = th->th.th_local.this_construct;
680
681 ++th->th.th_local.this_construct;
682 /* try to set team count to thread count--success means thread got the
683 single block */
684 /* TODO: Should this be acquire or release? */
685 if (team->t.t_construct == old_this) {
686 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
687 th->th.th_local.this_construct);
688 }
689#if USE_ITT_BUILD1
690 if (__itt_metadata_add_ptr__kmp_itt_metadata_add_ptr__3_0 && __kmp_forkjoin_frames_mode == 3 &&
691 KMP_MASTER_GTID(gtid)(0 == __kmp_tid_from_gtid((gtid))) && th->th.th_teams_microtask == NULL__null &&
692 team->t.t_active_level == 1) {
693 // Only report metadata by primary thread of active team at level 1
694 __kmp_itt_metadata_single(id_ref);
695 }
696#endif /* USE_ITT_BUILD */
697 }
698
699 if (__kmp_env_consistency_check) {
700 if (status && push_ws) {
701 __kmp_push_workshare(gtid, ct_psingle, id_ref);
702 } else {
703 __kmp_check_workshare(gtid, ct_psingle, id_ref);
704 }
705 }
706#if USE_ITT_BUILD1
707 if (status) {
708 __kmp_itt_single_start(gtid);
709 }
710#endif /* USE_ITT_BUILD */
711 return status;
712}
713
714void __kmp_exit_single(int gtid) {
715#if USE_ITT_BUILD1
716 __kmp_itt_single_end(gtid);
717#endif /* USE_ITT_BUILD */
718 if (__kmp_env_consistency_check)
719 __kmp_pop_workshare(gtid, ct_psingle, NULL__null);
720}
721
722/* determine if we can go parallel or must use a serialized parallel region and
723 * how many threads we can use
724 * set_nproc is the number of threads requested for the team
725 * returns 0 if we should serialize or only use one thread,
726 * otherwise the number of threads to use
727 * The forkjoin lock is held by the caller. */
728static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
729 int master_tid, int set_nthreads,
730 int enter_teams) {
731 int capacity;
732 int new_nthreads;
733 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 733); }
;
734 KMP_DEBUG_ASSERT(root && parent_team)if (!(root && parent_team)) { __kmp_debug_assert("root && parent_team"
, "openmp/runtime/src/kmp_runtime.cpp", 734); }
;
735 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
736
737 // If dyn-var is set, dynamically adjust the number of desired threads,
738 // according to the method specified by dynamic_mode.
739 new_nthreads = set_nthreads;
740 if (!get__dynamic_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.dynamic)
) {
741 ;
742 }
743#ifdef USE_LOAD_BALANCE1
744 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
745 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
746 if (new_nthreads == 1) {
747 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d load balance reduced "
"reservation to 1 thread\n", master_tid); }
748 "reservation to 1 thread\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d load balance reduced "
"reservation to 1 thread\n", master_tid); }
749 master_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d load balance reduced "
"reservation to 1 thread\n", master_tid); }
;
750 return 1;
751 }
752 if (new_nthreads < set_nthreads) {
753 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d load balance reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
754 "reservation to %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d load balance reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
755 master_tid, new_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d load balance reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
;
756 }
757 }
758#endif /* USE_LOAD_BALANCE */
759 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
760 new_nthreads = __kmp_avail_proc - __kmp_nth +
761 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
762 if (new_nthreads <= 1) {
763 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d thread limit reduced "
"reservation to 1 thread\n", master_tid); }
764 "reservation to 1 thread\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d thread limit reduced "
"reservation to 1 thread\n", master_tid); }
765 master_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d thread limit reduced "
"reservation to 1 thread\n", master_tid); }
;
766 return 1;
767 }
768 if (new_nthreads < set_nthreads) {
769 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d thread limit reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
770 "reservation to %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d thread limit reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
771 master_tid, new_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d thread limit reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
;
772 } else {
773 new_nthreads = set_nthreads;
774 }
775 } else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
776 if (set_nthreads > 2) {
777 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
778 new_nthreads = (new_nthreads % set_nthreads) + 1;
779 if (new_nthreads == 1) {
780 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d dynamic random reduced "
"reservation to 1 thread\n", master_tid); }
781 "reservation to 1 thread\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d dynamic random reduced "
"reservation to 1 thread\n", master_tid); }
782 master_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d dynamic random reduced "
"reservation to 1 thread\n", master_tid); }
;
783 return 1;
784 }
785 if (new_nthreads < set_nthreads) {
786 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d dynamic random reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
787 "reservation to %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d dynamic random reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
788 master_tid, new_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d dynamic random reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
;
789 }
790 }
791 } else {
792 KMP_ASSERT(0)if (!(0)) { __kmp_debug_assert("0", "openmp/runtime/src/kmp_runtime.cpp"
, 792); }
;
793 }
794
795 // Respect KMP_ALL_THREADS/KMP_DEVICE_THREAD_LIMIT.
796 if (__kmp_nth + new_nthreads -
797 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
798 __kmp_max_nth) {
799 int tl_nthreads = __kmp_max_nth - __kmp_nth +
800 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
801 if (tl_nthreads <= 0) {
802 tl_nthreads = 1;
803 }
804
805 // If dyn-var is false, emit a 1-time warning.
806 if (!get__dynamic_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.dynamic)
&& (!__kmp_reserve_warn)) {
807 __kmp_reserve_warn = 1;
808 __kmp_msg(kmp_ms_warning,
809 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, set_nthreads, tl_nthreads
)
,
810 KMP_HNT(Unset_ALL_THREADS)__kmp_msg_format(kmp_i18n_hnt_Unset_ALL_THREADS), __kmp_msg_null);
811 }
812 if (tl_nthreads == 1) {
813 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
"reduced reservation to 1 thread\n", master_tid); }
814 "reduced reservation to 1 thread\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
"reduced reservation to 1 thread\n", master_tid); }
815 master_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
"reduced reservation to 1 thread\n", master_tid); }
;
816 return 1;
817 }
818 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
"reservation to %d threads\n", master_tid, tl_nthreads); }
819 "reservation to %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
"reservation to %d threads\n", master_tid, tl_nthreads); }
820 master_tid, tl_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
"reservation to %d threads\n", master_tid, tl_nthreads); }
;
821 new_nthreads = tl_nthreads;
822 }
823
824 // Respect OMP_THREAD_LIMIT
825 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
826 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
827 if (cg_nthreads + new_nthreads -
828 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
829 max_cg_threads) {
830 int tl_nthreads = max_cg_threads - cg_nthreads +
831 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
832 if (tl_nthreads <= 0) {
833 tl_nthreads = 1;
834 }
835
836 // If dyn-var is false, emit a 1-time warning.
837 if (!get__dynamic_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.dynamic)
&& (!__kmp_reserve_warn)) {
838 __kmp_reserve_warn = 1;
839 __kmp_msg(kmp_ms_warning,
840 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, set_nthreads, tl_nthreads
)
,
841 KMP_HNT(Unset_ALL_THREADS)__kmp_msg_format(kmp_i18n_hnt_Unset_ALL_THREADS), __kmp_msg_null);
842 }
843 if (tl_nthreads == 1) {
844 KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
"reduced reservation to 1 thread\n", master_tid); }
845 "reduced reservation to 1 thread\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
"reduced reservation to 1 thread\n", master_tid); }
846 master_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
"reduced reservation to 1 thread\n", master_tid); }
;
847 return 1;
848 }
849 KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
"reservation to %d threads\n", master_tid, tl_nthreads); }
850 "reservation to %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
"reservation to %d threads\n", master_tid, tl_nthreads); }
851 master_tid, tl_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
"reservation to %d threads\n", master_tid, tl_nthreads); }
;
852 new_nthreads = tl_nthreads;
853 }
854
855 // Check if the threads array is large enough, or needs expanding.
856 // See comment in __kmp_register_root() about the adjustment if
857 // __kmp_threads[0] == NULL.
858 capacity = __kmp_threads_capacity;
859 if (TCR_PTR(__kmp_threads[0])((void *)(__kmp_threads[0])) == NULL__null) {
860 --capacity;
861 }
862 // If it is not for initializing the hidden helper team, we need to take
863 // __kmp_hidden_helper_threads_num out of the capacity because it is included
864 // in __kmp_threads_capacity.
865 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)(__kmp_init_hidden_helper_threads)) {
866 capacity -= __kmp_hidden_helper_threads_num;
867 }
868 if (__kmp_nth + new_nthreads -
869 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
870 capacity) {
871 // Expand the threads array.
872 int slotsRequired = __kmp_nth + new_nthreads -
873 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
874 capacity;
875 int slotsAdded = __kmp_expand_threads(slotsRequired);
876 if (slotsAdded < slotsRequired) {
877 // The threads array was not expanded enough.
878 new_nthreads -= (slotsRequired - slotsAdded);
879 KMP_ASSERT(new_nthreads >= 1)if (!(new_nthreads >= 1)) { __kmp_debug_assert("new_nthreads >= 1"
, "openmp/runtime/src/kmp_runtime.cpp", 879); }
;
880
881 // If dyn-var is false, emit a 1-time warning.
882 if (!get__dynamic_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.dynamic)
&& (!__kmp_reserve_warn)) {
883 __kmp_reserve_warn = 1;
884 if (__kmp_tp_cached) {
885 __kmp_msg(kmp_ms_warning,
886 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, set_nthreads, new_nthreads
)
,
887 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity)__kmp_msg_format(kmp_i18n_hnt_Set_ALL_THREADPRIVATE, __kmp_tp_capacity
)
,
888 KMP_HNT(PossibleSystemLimitOnThreads)__kmp_msg_format(kmp_i18n_hnt_PossibleSystemLimitOnThreads), __kmp_msg_null);
889 } else {
890 __kmp_msg(kmp_ms_warning,
891 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, set_nthreads, new_nthreads
)
,
892 KMP_HNT(SystemLimitOnThreads)__kmp_msg_format(kmp_i18n_hnt_SystemLimitOnThreads), __kmp_msg_null);
893 }
894 }
895 }
896 }
897
898#ifdef KMP_DEBUG1
899 if (new_nthreads == 1) {
900 KC_TRACE(10,if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d serializing team after reclaiming "
"dead roots and rechecking; requested %d threads\n", __kmp_get_global_thread_id
(), set_nthreads); }
901 ("__kmp_reserve_threads: T#%d serializing team after reclaiming "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d serializing team after reclaiming "
"dead roots and rechecking; requested %d threads\n", __kmp_get_global_thread_id
(), set_nthreads); }
902 "dead roots and rechecking; requested %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d serializing team after reclaiming "
"dead roots and rechecking; requested %d threads\n", __kmp_get_global_thread_id
(), set_nthreads); }
903 __kmp_get_gtid(), set_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d serializing team after reclaiming "
"dead roots and rechecking; requested %d threads\n", __kmp_get_global_thread_id
(), set_nthreads); }
;
904 } else {
905 KC_TRACE(10, ("__kmp_reserve_threads: T#%d allocating %d threads; requested"if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d allocating %d threads; requested"
" %d threads\n", __kmp_get_global_thread_id(), new_nthreads,
set_nthreads); }
906 " %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d allocating %d threads; requested"
" %d threads\n", __kmp_get_global_thread_id(), new_nthreads,
set_nthreads); }
907 __kmp_get_gtid(), new_nthreads, set_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d allocating %d threads; requested"
" %d threads\n", __kmp_get_global_thread_id(), new_nthreads,
set_nthreads); }
;
908 }
909#endif // KMP_DEBUG
910 return new_nthreads;
911}
912
913/* Allocate threads from the thread pool and assign them to the new team. We are
914 assured that there are enough threads available, because we checked on that
915 earlier within critical section forkjoin */
916static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
917 kmp_info_t *master_th, int master_gtid,
918 int fork_teams_workers) {
919 int i;
920 int use_hot_team;
921
922 KA_TRACE(10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_team_threads: new_nprocs = %d\n"
, team->t.t_nproc); }
;
923 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid())if (!(master_gtid == __kmp_get_global_thread_id())) { __kmp_debug_assert
("master_gtid == __kmp_get_global_thread_id()", "openmp/runtime/src/kmp_runtime.cpp"
, 923); }
;
924 KMP_MB();
925
926 /* first, let's setup the primary thread */
927 master_th->th.th_info.ds.ds_tid = 0;
928 master_th->th.th_team = team;
929 master_th->th.th_team_nproc = team->t.t_nproc;
930 master_th->th.th_team_master = master_th;
931 master_th->th.th_team_serialized = FALSE0;
932 master_th->th.th_dispatch = &team->t.t_dispatch[0];
933
934/* make sure we are not the optimized hot team */
935#if KMP_NESTED_HOT_TEAMS1
936 use_hot_team = 0;
937 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
938 if (hot_teams) { // hot teams array is not allocated if
939 // KMP_HOT_TEAMS_MAX_LEVEL=0
940 int level = team->t.t_active_level - 1; // index in array of hot teams
941 if (master_th->th.th_teams_microtask) { // are we inside the teams?
942 if (master_th->th.th_teams_size.nteams > 1) {
943 ++level; // level was not increased in teams construct for
944 // team_of_masters
945 }
946 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
947 master_th->th.th_teams_level == team->t.t_level) {
948 ++level; // level was not increased in teams construct for
949 // team_of_workers before the parallel
950 } // team->t.t_level will be increased inside parallel
951 }
952 if (level < __kmp_hot_teams_max_level) {
953 if (hot_teams[level].hot_team) {
954 // hot team has already been allocated for given level
955 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team)if (!(hot_teams[level].hot_team == team)) { __kmp_debug_assert
("hot_teams[level].hot_team == team", "openmp/runtime/src/kmp_runtime.cpp"
, 955); }
;
956 use_hot_team = 1; // the team is ready to use
957 } else {
958 use_hot_team = 0; // AC: threads are not allocated yet
959 hot_teams[level].hot_team = team; // remember new hot team
960 hot_teams[level].hot_team_nth = team->t.t_nproc;
961 }
962 } else {
963 use_hot_team = 0;
964 }
965 }
966#else
967 use_hot_team = team == root->r.r_hot_team;
968#endif
969 if (!use_hot_team) {
970
971 /* install the primary thread */
972 team->t.t_threads[0] = master_th;
973 __kmp_initialize_info(master_th, team, 0, master_gtid);
974
975 /* now, install the worker threads */
976 for (i = 1; i < team->t.t_nproc; i++) {
977
978 /* fork or reallocate a new thread and install it in team */
979 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
980 team->t.t_threads[i] = thr;
981 KMP_DEBUG_ASSERT(thr)if (!(thr)) { __kmp_debug_assert("thr", "openmp/runtime/src/kmp_runtime.cpp"
, 981); }
;
982 KMP_DEBUG_ASSERT(thr->th.th_team == team)if (!(thr->th.th_team == team)) { __kmp_debug_assert("thr->th.th_team == team"
, "openmp/runtime/src/kmp_runtime.cpp", 982); }
;
983 /* align team and thread arrived states */
984 KA_TRACE(20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
"T#%d(%d:%d) join =%llu, plain=%llu\n", __kmp_gtid_from_tid(
0, team), team->t.t_id, 0, __kmp_gtid_from_tid(i, team), team
->t.t_id, i, team->t.t_bar[bs_forkjoin_barrier].b_arrived
, team->t.t_bar[bs_plain_barrier].b_arrived); }
985 "T#%d(%d:%d) join =%llu, plain=%llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
"T#%d(%d:%d) join =%llu, plain=%llu\n", __kmp_gtid_from_tid(
0, team), team->t.t_id, 0, __kmp_gtid_from_tid(i, team), team
->t.t_id, i, team->t.t_bar[bs_forkjoin_barrier].b_arrived
, team->t.t_bar[bs_plain_barrier].b_arrived); }
986 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
"T#%d(%d:%d) join =%llu, plain=%llu\n", __kmp_gtid_from_tid(
0, team), team->t.t_id, 0, __kmp_gtid_from_tid(i, team), team
->t.t_id, i, team->t.t_bar[bs_forkjoin_barrier].b_arrived
, team->t.t_bar[bs_plain_barrier].b_arrived); }
987 __kmp_gtid_from_tid(i, team), team->t.t_id, i,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
"T#%d(%d:%d) join =%llu, plain=%llu\n", __kmp_gtid_from_tid(
0, team), team->t.t_id, 0, __kmp_gtid_from_tid(i, team), team
->t.t_id, i, team->t.t_bar[bs_forkjoin_barrier].b_arrived
, team->t.t_bar[bs_plain_barrier].b_arrived); }
988 team->t.t_bar[bs_forkjoin_barrier].b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
"T#%d(%d:%d) join =%llu, plain=%llu\n", __kmp_gtid_from_tid(
0, team), team->t.t_id, 0, __kmp_gtid_from_tid(i, team), team
->t.t_id, i, team->t.t_bar[bs_forkjoin_barrier].b_arrived
, team->t.t_bar[bs_plain_barrier].b_arrived); }
989 team->t.t_bar[bs_plain_barrier].b_arrived))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
"T#%d(%d:%d) join =%llu, plain=%llu\n", __kmp_gtid_from_tid(
0, team), team->t.t_id, 0, __kmp_gtid_from_tid(i, team), team
->t.t_id, i, team->t.t_bar[bs_forkjoin_barrier].b_arrived
, team->t.t_bar[bs_plain_barrier].b_arrived); }
;
990 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
991 thr->th.th_teams_level = master_th->th.th_teams_level;
992 thr->th.th_teams_size = master_th->th.th_teams_size;
993 { // Initialize threads' barrier data.
994 int b;
995 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
996 for (b = 0; b < bs_last_barrier; ++b) {
997 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
998 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG)if (!(balign[b].bb.wait_flag != 2)) { __kmp_debug_assert("balign[b].bb.wait_flag != 2"
, "openmp/runtime/src/kmp_runtime.cpp", 998); }
;
999#if USE_DEBUGGER0
1000 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1001#endif
1002 }
1003 }
1004 }
1005
1006#if KMP_AFFINITY_SUPPORTED1
1007 // Do not partition the places list for teams construct workers who
1008 // haven't actually been forked to do real work yet. This partitioning
1009 // will take place in the parallel region nested within the teams construct.
1010 if (!fork_teams_workers) {
1011 __kmp_partition_places(team);
1012 }
1013#endif
1014 }
1015
1016 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1017 for (i = 0; i < team->t.t_nproc; i++) {
1018 kmp_info_t *thr = team->t.t_threads[i];
1019 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1020 thr->th.th_prev_level != team->t.t_level) {
1021 team->t.t_display_affinity = 1;
1022 break;
1023 }
1024 }
1025 }
1026
1027 KMP_MB();
1028}
1029
1030#if KMP_ARCH_X860 || KMP_ARCH_X86_641
1031// Propagate any changes to the floating point control registers out to the team
1032// We try to avoid unnecessary writes to the relevant cache line in the team
1033// structure, so we don't make changes unless they are needed.
1034inline static void propagateFPControl(kmp_team_t *team) {
1035 if (__kmp_inherit_fp_control) {
1036 kmp_int16 x87_fpu_control_word;
1037 kmp_uint32 mxcsr;
1038
1039 // Get primary thread's values of FPU control flags (both X87 and vector)
1040 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1041 __kmp_store_mxcsr(&mxcsr);
1042 mxcsr &= KMP_X86_MXCSR_MASK0xffffffc0;
1043
1044 // There is no point looking at t_fp_control_saved here.
1045 // If it is TRUE, we still have to update the values if they are different
1046 // from those we now have. If it is FALSE we didn't save anything yet, but
1047 // our objective is the same. We have to ensure that the values in the team
1048 // are the same as those we have.
1049 // So, this code achieves what we need whether or not t_fp_control_saved is
1050 // true. By checking whether the value needs updating we avoid unnecessary
1051 // writes that would put the cache-line into a written state, causing all
1052 // threads in the team to have to read it again.
1053 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word)if ((team->t.t_x87_fpu_control_word) != (x87_fpu_control_word
)) (team->t.t_x87_fpu_control_word) = (x87_fpu_control_word
)
;
1054 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr)if ((team->t.t_mxcsr) != (mxcsr)) (team->t.t_mxcsr) = (
mxcsr)
;
1055 // Although we don't use this value, other code in the runtime wants to know
1056 // whether it should restore them. So we must ensure it is correct.
1057 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE)if ((team->t.t_fp_control_saved) != ((!0))) (team->t.t_fp_control_saved
) = ((!0))
;
1058 } else {
1059 // Similarly here. Don't write to this cache-line in the team structure
1060 // unless we have to.
1061 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE)if ((team->t.t_fp_control_saved) != (0)) (team->t.t_fp_control_saved
) = (0)
;
1062 }
1063}
1064
1065// Do the opposite, setting the hardware registers to the updated values from
1066// the team.
1067inline static void updateHWFPControl(kmp_team_t *team) {
1068 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1069 // Only reset the fp control regs if they have been changed in the team.
1070 // the parallel region that we are exiting.
1071 kmp_int16 x87_fpu_control_word;
1072 kmp_uint32 mxcsr;
1073 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1074 __kmp_store_mxcsr(&mxcsr);
1075 mxcsr &= KMP_X86_MXCSR_MASK0xffffffc0;
1076
1077 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1078 __kmp_clear_x87_fpu_status_word();
1079 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1080 }
1081
1082 if (team->t.t_mxcsr != mxcsr) {
1083 __kmp_load_mxcsr(&team->t.t_mxcsr);
1084 }
1085 }
1086}
1087#else
1088#define propagateFPControl(x) ((void)0)
1089#define updateHWFPControl(x) ((void)0)
1090#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1091
1092static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team,
1093 int realloc); // forward declaration
1094
1095/* Run a parallel region that has been serialized, so runs only in a team of the
1096 single primary thread. */
1097void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
1098 kmp_info_t *this_thr;
1099 kmp_team_t *serial_team;
1100
1101 KC_TRACE(10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: called by T#%d\n"
, global_tid); }
;
1102
1103 /* Skip all this code for autopar serialized loops since it results in
1104 unacceptable overhead */
1105 if (loc != NULL__null && (loc->flags & KMP_IDENT_AUTOPAR))
1106 return;
1107
1108 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
1109 __kmp_parallel_initialize();
1110 __kmp_resume_if_soft_paused();
1111
1112 this_thr = __kmp_threads[global_tid];
1113 serial_team = this_thr->th.th_serial_team;
1114
1115 /* utilize the serialized team held by this thread */
1116 KMP_DEBUG_ASSERT(serial_team)if (!(serial_team)) { __kmp_debug_assert("serial_team", "openmp/runtime/src/kmp_runtime.cpp"
, 1116); }
;
1117 KMP_MB();
1118
1119 if (__kmp_tasking_mode != tskm_immediate_exec) {
1120 KMP_DEBUG_ASSERT(if (!(this_thr->th.th_task_team == this_thr->th.th_team
->t.t_task_team[this_thr->th.th_task_state])) { __kmp_debug_assert
("this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 1122); }
1121 this_thr->th.th_task_team ==if (!(this_thr->th.th_task_team == this_thr->th.th_team
->t.t_task_team[this_thr->th.th_task_state])) { __kmp_debug_assert
("this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 1122); }
1122 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state])if (!(this_thr->th.th_task_team == this_thr->th.th_team
->t.t_task_team[this_thr->th.th_task_state])) { __kmp_debug_assert
("this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 1122); }
;
1123 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==if (!(serial_team->t.t_task_team[this_thr->th.th_task_state
] == __null)) { __kmp_debug_assert("serial_team->t.t_task_team[this_thr->th.th_task_state] == __null"
, "openmp/runtime/src/kmp_runtime.cpp", 1124); }
1124 NULL)if (!(serial_team->t.t_task_team[this_thr->th.th_task_state
] == __null)) { __kmp_debug_assert("serial_team->t.t_task_team[this_thr->th.th_task_state] == __null"
, "openmp/runtime/src/kmp_runtime.cpp", 1124); }
;
1125 KA_TRACE(20, ("__kmpc_serialized_parallel: T#%d pushing task_team %p / "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d pushing task_team %p / "
"team %p, new task_team = NULL\n", global_tid, this_thr->
th.th_task_team, this_thr->th.th_team); }
1126 "team %p, new task_team = NULL\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d pushing task_team %p / "
"team %p, new task_team = NULL\n", global_tid, this_thr->
th.th_task_team, this_thr->th.th_team); }
1127 global_tid, this_thr->th.th_task_team, this_thr->th.th_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d pushing task_team %p / "
"team %p, new task_team = NULL\n", global_tid, this_thr->
th.th_task_team, this_thr->th.th_team); }
;
1128 this_thr->th.th_task_team = NULL__null;
1129 }
1130
1131 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1132 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1133 proc_bind = proc_bind_false;
1134 } else if (proc_bind == proc_bind_default) {
1135 // No proc_bind clause was specified, so use the current value
1136 // of proc-bind-var for this parallel region.
1137 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1138 }
1139 // Reset for next parallel region
1140 this_thr->th.th_set_proc_bind = proc_bind_default;
1141
1142#if OMPT_SUPPORT1
1143 ompt_data_t ompt_parallel_data = ompt_data_none{0};
1144 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid)__ompt_load_return_address(global_tid);
1145 if (ompt_enabled.enabled &&
1146 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1147
1148 ompt_task_info_t *parent_task_info;
1149 parent_task_info = OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info));
1150
1151 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
1152 if (ompt_enabled.ompt_callback_parallel_begin) {
1153 int team_size = 1;
1154
1155 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)ompt_callback_parallel_begin_callback(
1156 &(parent_task_info->task_data), &(parent_task_info->frame),
1157 &ompt_parallel_data, team_size,
1158 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1159 }
1160 }
1161#endif // OMPT_SUPPORT
1162
1163 if (this_thr->th.th_team != serial_team) {
1164 // Nested level will be an index in the nested nthreads array
1165 int level = this_thr->th.th_team->t.t_level;
1166
1167 if (serial_team->t.t_serialized) {
1168 /* this serial team was already used
1169 TODO increase performance by making this locks more specific */
1170 kmp_team_t *new_team;
1171
1172 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1173
1174 new_team =
1175 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1176#if OMPT_SUPPORT1
1177 ompt_parallel_data,
1178#endif
1179 proc_bind, &this_thr->th.th_current_task->td_icvs,
1180 0 USE_NESTED_HOT_ARG(NULL), __null);
1181 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1182 KMP_ASSERT(new_team)if (!(new_team)) { __kmp_debug_assert("new_team", "openmp/runtime/src/kmp_runtime.cpp"
, 1182); }
;
1183
1184 /* setup new serialized team and install it */
1185 new_team->t.t_threads[0] = this_thr;
1186 new_team->t.t_parent = this_thr->th.th_team;
1187 serial_team = new_team;
1188 this_thr->th.th_serial_team = serial_team;
1189
1190 KF_TRACE(if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n"
, global_tid, serial_team); }
1191 10,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n"
, global_tid, serial_team); }
1192 ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n"
, global_tid, serial_team); }
1193 global_tid, serial_team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n"
, global_tid, serial_team); }
;
1194
1195 /* TODO the above breaks the requirement that if we run out of resources,
1196 then we can still guarantee that serialized teams are ok, since we may
1197 need to allocate a new one */
1198 } else {
1199 KF_TRACE(if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n"
, global_tid, serial_team); }
1200 10,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n"
, global_tid, serial_team); }
1201 ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n"
, global_tid, serial_team); }
1202 global_tid, serial_team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n"
, global_tid, serial_team); }
;
1203 }
1204
1205 /* we have to initialize this serial team */
1206 KMP_DEBUG_ASSERT(serial_team->t.t_threads)if (!(serial_team->t.t_threads)) { __kmp_debug_assert("serial_team->t.t_threads"
, "openmp/runtime/src/kmp_runtime.cpp", 1206); }
;
1207 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr)if (!(serial_team->t.t_threads[0] == this_thr)) { __kmp_debug_assert
("serial_team->t.t_threads[0] == this_thr", "openmp/runtime/src/kmp_runtime.cpp"
, 1207); }
;
1208 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team)if (!(this_thr->th.th_team != serial_team)) { __kmp_debug_assert
("this_thr->th.th_team != serial_team", "openmp/runtime/src/kmp_runtime.cpp"
, 1208); }
;
1209 serial_team->t.t_ident = loc;
1210 serial_team->t.t_serialized = 1;
1211 serial_team->t.t_nproc = 1;
1212 serial_team->t.t_parent = this_thr->th.th_team;
1213 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1214 this_thr->th.th_team = serial_team;
1215 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1216
1217 KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d curtask=%p\n"
, global_tid, this_thr->th.th_current_task); }
1218 this_thr->th.th_current_task))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d curtask=%p\n"
, global_tid, this_thr->th.th_current_task); }
;
1219 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1)if (!(this_thr->th.th_current_task->td_flags.executing ==
1)) { __kmp_debug_assert("this_thr->th.th_current_task->td_flags.executing == 1"
, "openmp/runtime/src/kmp_runtime.cpp", 1219); }
;
1220 this_thr->th.th_current_task->td_flags.executing = 0;
1221
1222 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1223
1224 /* TODO: GEH: do ICVs work for nested serialized teams? Don't we need an
1225 implicit task for each serialized task represented by
1226 team->t.t_serialized? */
1227 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1228 &this_thr->th.th_current_task->td_parent->td_icvs);
1229
1230 // Thread value exists in the nested nthreads array for the next nested
1231 // level
1232 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1233 this_thr->th.th_current_task->td_icvs.nproc =
1234 __kmp_nested_nth.nth[level + 1];
1235 }
1236
1237 if (__kmp_nested_proc_bind.used &&
1238 (level + 1 < __kmp_nested_proc_bind.used)) {
1239 this_thr->th.th_current_task->td_icvs.proc_bind =
1240 __kmp_nested_proc_bind.bind_types[level + 1];
1241 }
1242
1243#if USE_DEBUGGER0
1244 serial_team->t.t_pkfn = (microtask_t)(~0); // For the debugger.
1245#endif
1246 this_thr->th.th_info.ds.ds_tid = 0;
1247
1248 /* set thread cache values */
1249 this_thr->th.th_team_nproc = 1;
1250 this_thr->th.th_team_master = this_thr;
1251 this_thr->th.th_team_serialized = 1;
1252
1253 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1254 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1255 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator; // save
1256
1257 propagateFPControl(serial_team);
1258
1259 /* check if we need to allocate dispatch buffers stack */
1260 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch)if (!(serial_team->t.t_dispatch)) { __kmp_debug_assert("serial_team->t.t_dispatch"
, "openmp/runtime/src/kmp_runtime.cpp", 1260); }
;
1261 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1262 serial_team->t.t_dispatch->th_disp_buffer =
1263 (dispatch_private_info_t *)__kmp_allocate(___kmp_allocate((sizeof(dispatch_private_info_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 1264)
1264 sizeof(dispatch_private_info_t))___kmp_allocate((sizeof(dispatch_private_info_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 1264)
;
1265 }
1266 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1267
1268 KMP_MB();
1269
1270 } else {
1271 /* this serialized team is already being used,
1272 * that's fine, just add another nested level */
1273 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team)if (!(this_thr->th.th_team == serial_team)) { __kmp_debug_assert
("this_thr->th.th_team == serial_team", "openmp/runtime/src/kmp_runtime.cpp"
, 1273); }
;
1274 KMP_DEBUG_ASSERT(serial_team->t.t_threads)if (!(serial_team->t.t_threads)) { __kmp_debug_assert("serial_team->t.t_threads"
, "openmp/runtime/src/kmp_runtime.cpp", 1274); }
;
1275 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr)if (!(serial_team->t.t_threads[0] == this_thr)) { __kmp_debug_assert
("serial_team->t.t_threads[0] == this_thr", "openmp/runtime/src/kmp_runtime.cpp"
, 1275); }
;
1276 ++serial_team->t.t_serialized;
1277 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1278
1279 // Nested level will be an index in the nested nthreads array
1280 int level = this_thr->th.th_team->t.t_level;
1281 // Thread value exists in the nested nthreads array for the next nested
1282 // level
1283 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1284 this_thr->th.th_current_task->td_icvs.nproc =
1285 __kmp_nested_nth.nth[level + 1];
1286 }
1287 serial_team->t.t_level++;
1288 KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d increasing nesting level "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d increasing nesting level "
"of serial team %p to %d\n", global_tid, serial_team, serial_team
->t.t_level); }
1289 "of serial team %p to %d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d increasing nesting level "
"of serial team %p to %d\n", global_tid, serial_team, serial_team
->t.t_level); }
1290 global_tid, serial_team, serial_team->t.t_level))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d increasing nesting level "
"of serial team %p to %d\n", global_tid, serial_team, serial_team
->t.t_level); }
;
1291
1292 /* allocate/push dispatch buffers stack */
1293 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch)if (!(serial_team->t.t_dispatch)) { __kmp_debug_assert("serial_team->t.t_dispatch"
, "openmp/runtime/src/kmp_runtime.cpp", 1293); }
;
1294 {
1295 dispatch_private_info_t *disp_buffer =
1296 (dispatch_private_info_t *)__kmp_allocate(___kmp_allocate((sizeof(dispatch_private_info_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 1297)
1297 sizeof(dispatch_private_info_t))___kmp_allocate((sizeof(dispatch_private_info_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 1297)
;
1298 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1299 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1300 }
1301 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1302
1303 KMP_MB();
1304 }
1305 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq)if ((serial_team->t.t_cancel_request) != (cancel_noreq)) (
serial_team->t.t_cancel_request) = (cancel_noreq)
;
1306
1307 // Perform the display affinity functionality for
1308 // serialized parallel regions
1309 if (__kmp_display_affinity) {
1310 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1311 this_thr->th.th_prev_num_threads != 1) {
1312 // NULL means use the affinity-format-var ICV
1313 __kmp_aux_display_affinity(global_tid, NULL__null);
1314 this_thr->th.th_prev_level = serial_team->t.t_level;
1315 this_thr->th.th_prev_num_threads = 1;
1316 }
1317 }
1318
1319 if (__kmp_env_consistency_check)
1320 __kmp_push_parallel(global_tid, NULL__null);
1321#if OMPT_SUPPORT1
1322 serial_team->t.ompt_team_info.master_return_address = codeptr;
1323 if (ompt_enabled.enabled &&
1324 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1325 OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info))->frame.exit_frame.ptr =
1326 OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
1327
1328 ompt_lw_taskteam_t lw_taskteam;
1329 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1330 &ompt_parallel_data, codeptr);
1331
1332 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1333 // don't use lw_taskteam after linking. content was swaped
1334
1335 /* OMPT implicit task begin */
1336 if (ompt_enabled.ompt_callback_implicit_task) {
1337 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1338 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr)(&(this_thr->th.th_team->t.ompt_team_info.parallel_data
))
,
1339 OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data
))
, 1, __kmp_tid_from_gtid(global_tid),
1340 ompt_task_implicit); // TODO: Can this be ompt_task_initial?
1341 OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info))->thread_num =
1342 __kmp_tid_from_gtid(global_tid);
1343 }
1344
1345 /* OMPT state */
1346 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1347 OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info))->frame.exit_frame.ptr =
1348 OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
1349 }
1350#endif
1351}
1352
1353// Test if this fork is for a team closely nested in a teams construct
1354static inline bool __kmp_is_fork_in_teams(kmp_info_t *master_th,
1355 microtask_t microtask, int level,
1356 int teams_level, kmp_va_list ap) {
1357 return (master_th->th.th_teams_microtask && ap &&
1358 microtask != (microtask_t)__kmp_teams_master && level == teams_level);
1359}
1360
1361// Test if this fork is for the teams construct, i.e. to form the outer league
1362// of teams
1363static inline bool __kmp_is_entering_teams(int active_level, int level,
1364 int teams_level, kmp_va_list ap) {
1365 return ((ap == NULL__null && active_level == 0) ||
1366 (ap && teams_level > 0 && teams_level == level));
1367}
1368
1369// AC: This is start of parallel that is nested inside teams construct.
1370// The team is actual (hot), all workers are ready at the fork barrier.
1371// No lock needed to initialize the team a bit, then free workers.
1372static inline int
1373__kmp_fork_in_teams(ident_t *loc, int gtid, kmp_team_t *parent_team,
1374 kmp_int32 argc, kmp_info_t *master_th, kmp_root_t *root,
1375 enum fork_context_e call_context, microtask_t microtask,
1376 launch_t invoker, int master_set_numthreads, int level,
1377#if OMPT_SUPPORT1
1378 ompt_data_t ompt_parallel_data, void *return_address,
1379#endif
1380 kmp_va_list ap) {
1381 void **argv;
1382 int i;
1383
1384 parent_team->t.t_ident = loc;
1385 __kmp_alloc_argv_entries(argc, parent_team, TRUE(!0));
1386 parent_team->t.t_argc = argc;
1387 argv = (void **)parent_team->t.t_argv;
1388 for (i = argc - 1; i >= 0; --i) {
1389 *argv++ = va_arg(kmp_va_deref(ap), void *)__builtin_va_arg((*(ap)), void *);
1390 }
1391 // Increment our nested depth levels, but not increase the serialization
1392 if (parent_team == master_th->th.th_serial_team) {
1393 // AC: we are in serialized parallel
1394 __kmpc_serialized_parallel(loc, gtid);
1395 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1)if (!(parent_team->t.t_serialized > 1)) { __kmp_debug_assert
("parent_team->t.t_serialized > 1", "openmp/runtime/src/kmp_runtime.cpp"
, 1395); }
;
1396
1397 if (call_context == fork_context_gnu) {
1398 // AC: need to decrement t_serialized for enquiry functions to work
1399 // correctly, will restore at join time
1400 parent_team->t.t_serialized--;
1401 return TRUE(!0);
1402 }
1403
1404#if OMPD_SUPPORT1
1405 parent_team->t.t_pkfn = microtask;
1406#endif
1407
1408#if OMPT_SUPPORT1
1409 void *dummy;
1410 void **exit_frame_p;
1411 ompt_data_t *implicit_task_data;
1412 ompt_lw_taskteam_t lw_taskteam;
1413
1414 if (ompt_enabled.enabled) {
1415 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1416 &ompt_parallel_data, return_address);
1417 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1418
1419 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1420 // Don't use lw_taskteam after linking. Content was swapped.
1421
1422 /* OMPT implicit task begin */
1423 implicit_task_data = OMPT_CUR_TASK_DATA(master_th)(&(master_th->th.th_current_task->ompt_task_info.task_data
))
;
1424 if (ompt_enabled.ompt_callback_implicit_task) {
1425 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num = __kmp_tid_from_gtid(gtid);
1426 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1427 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
, implicit_task_data,
1428 1, OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, ompt_task_implicit);
1429 }
1430
1431 /* OMPT state */
1432 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1433 } else {
1434 exit_frame_p = &dummy;
1435 }
1436#endif
1437
1438 // AC: need to decrement t_serialized for enquiry functions to work
1439 // correctly, will restore at join time
1440 parent_team->t.t_serialized--;
1441
1442 {
1443 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel)((void)0);
1444 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK)((void)0);
1445 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1446#if OMPT_SUPPORT1
1447 ,
1448 exit_frame_p
1449#endif
1450 );
1451 }
1452
1453#if OMPT_SUPPORT1
1454 if (ompt_enabled.enabled) {
1455 *exit_frame_p = NULL__null;
1456 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->frame.exit_frame = ompt_data_none{0};
1457 if (ompt_enabled.ompt_callback_implicit_task) {
1458 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1459 ompt_scope_end, NULL__null, implicit_task_data, 1,
1460 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, ompt_task_implicit);
1461 }
1462 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
;
1463 __ompt_lw_taskteam_unlink(master_th);
1464 if (ompt_enabled.ompt_callback_parallel_end) {
1465 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)ompt_callback_parallel_end_callback(
1466 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th)(&(master_th->th.th_current_task->ompt_task_info.task_data
))
,
1467 OMPT_INVOKER(call_context)((call_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
| ompt_parallel_team, return_address);
1468 }
1469 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1470 }
1471#endif
1472 return TRUE(!0);
1473 }
1474
1475 parent_team->t.t_pkfn = microtask;
1476 parent_team->t.t_invoke = invoker;
1477 KMP_ATOMIC_INC(&root->r.r_in_parallel)(&root->r.r_in_parallel)->fetch_add(1, std::memory_order_acq_rel
)
;
1478 parent_team->t.t_active_level++;
1479 parent_team->t.t_level++;
1480 parent_team->t.t_def_allocator = master_th->th.th_def_allocator; // save
1481
1482 // If the threads allocated to the team are less than the thread limit, update
1483 // the thread limit here. th_teams_size.nth is specific to this team nested
1484 // in a teams construct, the team is fully created, and we're about to do
1485 // the actual fork. Best to do this here so that the subsequent uses below
1486 // and in the join have the correct value.
1487 master_th->th.th_teams_size.nth = parent_team->t.t_nproc;
1488
1489#if OMPT_SUPPORT1
1490 if (ompt_enabled.enabled) {
1491 ompt_lw_taskteam_t lw_taskteam;
1492 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, &ompt_parallel_data,
1493 return_address);
1494 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1, true);
1495 }
1496#endif
1497
1498 /* Change number of threads in the team if requested */
1499 if (master_set_numthreads) { // The parallel has num_threads clause
1500 if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1501 // AC: only can reduce number of threads dynamically, can't increase
1502 kmp_info_t **other_threads = parent_team->t.t_threads;
1503 // NOTE: if using distributed barrier, we need to run this code block
1504 // even when the team size appears not to have changed from the max.
1505 int old_proc = master_th->th.th_teams_size.nth;
1506 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1507 __kmp_resize_dist_barrier(parent_team, old_proc, master_set_numthreads);
1508 __kmp_add_threads_to_team(parent_team, master_set_numthreads);
1509 }
1510 parent_team->t.t_nproc = master_set_numthreads;
1511 for (i = 0; i < master_set_numthreads; ++i) {
1512 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1513 }
1514 }
1515 // Keep extra threads hot in the team for possible next parallels
1516 master_th->th.th_set_nproc = 0;
1517 }
1518
1519#if USE_DEBUGGER0
1520 if (__kmp_debugging) { // Let debugger override number of threads.
1521 int nth = __kmp_omp_num_threads(loc);
1522 if (nth > 0) { // 0 means debugger doesn't want to change num threads
1523 master_set_numthreads = nth;
1524 }
1525 }
1526#endif
1527
1528 // Figure out the proc_bind policy for the nested parallel within teams
1529 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1530 // proc_bind_default means don't update
1531 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1532 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1533 proc_bind = proc_bind_false;
1534 } else {
1535 // No proc_bind clause specified; use current proc-bind-var
1536 if (proc_bind == proc_bind_default) {
1537 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1538 }
1539 /* else: The proc_bind policy was specified explicitly on parallel clause.
1540 This overrides proc-bind-var for this parallel region, but does not
1541 change proc-bind-var. */
1542 // Figure the value of proc-bind-var for the child threads.
1543 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1544 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1545 master_th->th.th_current_task->td_icvs.proc_bind)) {
1546 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1547 }
1548 }
1549 KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind)if ((parent_team->t.t_proc_bind) != (proc_bind)) (parent_team
->t.t_proc_bind) = (proc_bind)
;
1550 // Need to change the bind-var ICV to correct value for each implicit task
1551 if (proc_bind_icv != proc_bind_default &&
1552 master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1553 kmp_info_t **other_threads = parent_team->t.t_threads;
1554 for (i = 0; i < master_th->th.th_team_nproc; ++i) {
1555 other_threads[i]->th.th_current_task->td_icvs.proc_bind = proc_bind_icv;
1556 }
1557 }
1558 // Reset for next parallel region
1559 master_th->th.th_set_proc_bind = proc_bind_default;
1560
1561#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
1562 if (((__itt_frame_submit_v3_ptr__kmp_itt_frame_submit_v3_ptr__3_0 && __itt_get_timestamp_ptr__kmp_itt_get_timestamp_ptr__3_0) ||
1563 KMP_ITT_DEBUG0) &&
1564 __kmp_forkjoin_frames_mode == 3 &&
1565 parent_team->t.t_active_level == 1 // only report frames at level 1
1566 && master_th->th.th_teams_size.nteams == 1) {
1567 kmp_uint64 tmp_time = __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0();
1568 master_th->th.th_frame_time = tmp_time;
1569 parent_team->t.t_region_time = tmp_time;
1570 }
1571 if (__itt_stack_caller_create_ptr__kmp_itt_stack_caller_create_ptr__3_0) {
1572 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL)if (!(parent_team->t.t_stack_id == __null)) { __kmp_debug_assert
("parent_team->t.t_stack_id == __null", "openmp/runtime/src/kmp_runtime.cpp"
, 1572); }
;
1573 // create new stack stitching id before entering fork barrier
1574 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1575 }
1576#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1577#if KMP_AFFINITY_SUPPORTED1
1578 __kmp_partition_places(parent_team);
1579#endif
1580
1581 KF_TRACE(10, ("__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid
); }
1582 "master_th=%p, gtid=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid
); }
1583 root, parent_team, master_th, gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid
); }
;
1584 __kmp_internal_fork(loc, gtid, parent_team);
1585 KF_TRACE(10, ("__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid
); }
1586 "master_th=%p, gtid=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid
); }
1587 root, parent_team, master_th, gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid
); }
;
1588
1589 if (call_context == fork_context_gnu)
1590 return TRUE(!0);
1591
1592 /* Invoke microtask for PRIMARY thread */
1593 KA_TRACE(20, ("__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n"
, gtid, parent_team->t.t_id, parent_team->t.t_pkfn); }
1594 parent_team->t.t_id, parent_team->t.t_pkfn))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n"
, gtid, parent_team->t.t_id, parent_team->t.t_pkfn); }
;
1595
1596 if (!parent_team->t.t_invoke(gtid)) {
1597 KMP_ASSERT2(0, "cannot invoke microtask for PRIMARY thread")if (!(0)) { __kmp_debug_assert(("cannot invoke microtask for PRIMARY thread"
), "openmp/runtime/src/kmp_runtime.cpp", 1597); }
;
1598 }
1599 KA_TRACE(20, ("__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n"
, gtid, parent_team->t.t_id, parent_team->t.t_pkfn); }
1600 parent_team->t.t_id, parent_team->t.t_pkfn))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n"
, gtid, parent_team->t.t_id, parent_team->t.t_pkfn); }
;
1601 KMP_MB(); /* Flush all pending memory write invalidates. */
1602
1603 KA_TRACE(20, ("__kmp_fork_in_teams: parallel exit T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_in_teams: parallel exit T#%d\n"
, gtid); }
;
1604
1605 return TRUE(!0);
1606}
1607
1608// Create a serialized parallel region
1609static inline int
1610__kmp_serial_fork_call(ident_t *loc, int gtid, enum fork_context_e call_context,
1611 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1612 kmp_info_t *master_th, kmp_team_t *parent_team,
1613#if OMPT_SUPPORT1
1614 ompt_data_t *ompt_parallel_data, void **return_address,
1615 ompt_data_t **parent_task_data,
1616#endif
1617 kmp_va_list ap) {
1618 kmp_team_t *team;
1619 int i;
1620 void **argv;
1621
1622/* josh todo: hypothetical question: what do we do for OS X*? */
1623#if KMP_OS_LINUX1 && \
1624 (KMP_ARCH_X860 || KMP_ARCH_X86_641 || KMP_ARCH_ARM || KMP_ARCH_AARCH640)
1625 void *args[argc];
1626#else
1627 void **args = (void **)KMP_ALLOCA(argc * sizeof(void *))__builtin_alloca (argc * sizeof(void *));
1628#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || \
1629 KMP_ARCH_AARCH64) */
1630
1631 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_serial_fork_call: T#%d serializing parallel region\n"
, gtid); }
1632 20, ("__kmp_serial_fork_call: T#%d serializing parallel region\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_serial_fork_call: T#%d serializing parallel region\n"
, gtid); }
;
1633
1634 __kmpc_serialized_parallel(loc, gtid);
1635
1636#if OMPD_SUPPORT1
1637 master_th->th.th_serial_team->t.t_pkfn = microtask;
1638#endif
1639
1640 if (call_context == fork_context_intel) {
1641 /* TODO this sucks, use the compiler itself to pass args! :) */
1642 master_th->th.th_serial_team->t.t_ident = loc;
1643 if (!ap) {
1644 // revert change made in __kmpc_serialized_parallel()
1645 master_th->th.th_serial_team->t.t_level--;
1646// Get args from parent team for teams construct
1647
1648#if OMPT_SUPPORT1
1649 void *dummy;
1650 void **exit_frame_p;
1651 ompt_task_info_t *task_info;
1652 ompt_lw_taskteam_t lw_taskteam;
1653
1654 if (ompt_enabled.enabled) {
1655 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1656 ompt_parallel_data, *return_address);
1657
1658 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1659 // don't use lw_taskteam after linking. content was swaped
1660 task_info = OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info));
1661 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1662 if (ompt_enabled.ompt_callback_implicit_task) {
1663 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num = __kmp_tid_from_gtid(gtid);
1664 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1665 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
,
1666 &(task_info->task_data), 1,
1667 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, ompt_task_implicit);
1668 }
1669
1670 /* OMPT state */
1671 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1672 } else {
1673 exit_frame_p = &dummy;
1674 }
1675#endif
1676
1677 {
1678 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel)((void)0);
1679 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK)((void)0);
1680 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1681#if OMPT_SUPPORT1
1682 ,
1683 exit_frame_p
1684#endif
1685 );
1686 }
1687
1688#if OMPT_SUPPORT1
1689 if (ompt_enabled.enabled) {
1690 *exit_frame_p = NULL__null;
1691 if (ompt_enabled.ompt_callback_implicit_task) {
1692 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1693 ompt_scope_end, NULL__null, &(task_info->task_data), 1,
1694 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, ompt_task_implicit);
1695 }
1696 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
;
1697 __ompt_lw_taskteam_unlink(master_th);
1698 if (ompt_enabled.ompt_callback_parallel_end) {
1699 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)ompt_callback_parallel_end_callback(
1700 ompt_parallel_data, *parent_task_data,
1701 OMPT_INVOKER(call_context)((call_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
| ompt_parallel_team, *return_address);
1702 }
1703 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1704 }
1705#endif
1706 } else if (microtask == (microtask_t)__kmp_teams_master) {
1707 KMP_DEBUG_ASSERT(master_th->th.th_team == master_th->th.th_serial_team)if (!(master_th->th.th_team == master_th->th.th_serial_team
)) { __kmp_debug_assert("master_th->th.th_team == master_th->th.th_serial_team"
, "openmp/runtime/src/kmp_runtime.cpp", 1707); }
;
1708 team = master_th->th.th_team;
1709 // team->t.t_pkfn = microtask;
1710 team->t.t_invoke = invoker;
1711 __kmp_alloc_argv_entries(argc, team, TRUE(!0));
1712 team->t.t_argc = argc;
1713 argv = (void **)team->t.t_argv;
1714 if (ap) {
1715 for (i = argc - 1; i >= 0; --i)
1716 *argv++ = va_arg(kmp_va_deref(ap), void *)__builtin_va_arg((*(ap)), void *);
1717 } else {
1718 for (i = 0; i < argc; ++i)
1719 // Get args from parent team for teams construct
1720 argv[i] = parent_team->t.t_argv[i];
1721 }
1722 // AC: revert change made in __kmpc_serialized_parallel()
1723 // because initial code in teams should have level=0
1724 team->t.t_level--;
1725 // AC: call special invoker for outer "parallel" of teams construct
1726 invoker(gtid);
1727#if OMPT_SUPPORT1
1728 if (ompt_enabled.enabled) {
1729 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info));
1730 if (ompt_enabled.ompt_callback_implicit_task) {
1731 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1732 ompt_scope_end, NULL__null, &(task_info->task_data), 0,
1733 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, ompt_task_initial);
1734 }
1735 if (ompt_enabled.ompt_callback_parallel_end) {
1736 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)ompt_callback_parallel_end_callback(
1737 ompt_parallel_data, *parent_task_data,
1738 OMPT_INVOKER(call_context)((call_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
| ompt_parallel_league,
1739 *return_address);
1740 }
1741 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1742 }
1743#endif
1744 } else {
1745 argv = args;
1746 for (i = argc - 1; i >= 0; --i)
1747 *argv++ = va_arg(kmp_va_deref(ap), void *)__builtin_va_arg((*(ap)), void *);
1748 KMP_MB();
1749
1750#if OMPT_SUPPORT1
1751 void *dummy;
1752 void **exit_frame_p;
1753 ompt_task_info_t *task_info;
1754 ompt_lw_taskteam_t lw_taskteam;
1755 ompt_data_t *implicit_task_data;
1756
1757 if (ompt_enabled.enabled) {
1758 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1759 ompt_parallel_data, *return_address);
1760 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1761 // don't use lw_taskteam after linking. content was swaped
1762 task_info = OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info));
1763 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1764
1765 /* OMPT implicit task begin */
1766 implicit_task_data = OMPT_CUR_TASK_DATA(master_th)(&(master_th->th.th_current_task->ompt_task_info.task_data
))
;
1767 if (ompt_enabled.ompt_callback_implicit_task) {
1768 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1769 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
,
1770 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1771 ompt_task_implicit);
1772 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num = __kmp_tid_from_gtid(gtid);
1773 }
1774
1775 /* OMPT state */
1776 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1777 } else {
1778 exit_frame_p = &dummy;
1779 }
1780#endif
1781
1782 {
1783 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel)((void)0);
1784 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK)((void)0);
1785 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1786#if OMPT_SUPPORT1
1787 ,
1788 exit_frame_p
1789#endif
1790 );
1791 }
1792
1793#if OMPT_SUPPORT1
1794 if (ompt_enabled.enabled) {
1795 *exit_frame_p = NULL__null;
1796 if (ompt_enabled.ompt_callback_implicit_task) {
1797 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1798 ompt_scope_end, NULL__null, &(task_info->task_data), 1,
1799 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, ompt_task_implicit);
1800 }
1801
1802 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
;
1803 __ompt_lw_taskteam_unlink(master_th);
1804 if (ompt_enabled.ompt_callback_parallel_end) {
1805 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)ompt_callback_parallel_end_callback(
1806 ompt_parallel_data, *parent_task_data,
1807 OMPT_INVOKER(call_context)((call_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
| ompt_parallel_team, *return_address);
1808 }
1809 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1810 }
1811#endif
1812 }
1813 } else if (call_context == fork_context_gnu) {
1814#if OMPT_SUPPORT1
1815 if (ompt_enabled.enabled) {
1816 ompt_lw_taskteam_t lwt;
1817 __ompt_lw_taskteam_init(&lwt, master_th, gtid, ompt_parallel_data,
1818 *return_address);
1819
1820 lwt.ompt_task_info.frame.exit_frame = ompt_data_none{0};
1821 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1822 }
1823// don't use lw_taskteam after linking. content was swaped
1824#endif
1825
1826 // we were called from GNU native code
1827 KA_TRACE(20, ("__kmp_serial_fork_call: T#%d serial exit\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_serial_fork_call: T#%d serial exit\n"
, gtid); }
;
1828 return FALSE0;
1829 } else {
1830 KMP_ASSERT2(call_context < fork_context_last,if (!(call_context < fork_context_last)) { __kmp_debug_assert
(("__kmp_serial_fork_call: unknown fork_context parameter"), "openmp/runtime/src/kmp_runtime.cpp"
, 1831); }
1831 "__kmp_serial_fork_call: unknown fork_context parameter")if (!(call_context < fork_context_last)) { __kmp_debug_assert
(("__kmp_serial_fork_call: unknown fork_context parameter"), "openmp/runtime/src/kmp_runtime.cpp"
, 1831); }
;
1832 }
1833
1834 KA_TRACE(20, ("__kmp_serial_fork_call: T#%d serial exit\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_serial_fork_call: T#%d serial exit\n"
, gtid); }
;
1835 KMP_MB();
1836 return FALSE0;
1837}
1838
1839/* most of the work for a fork */
1840/* return true if we really went parallel, false if serialized */
1841int __kmp_fork_call(ident_t *loc, int gtid,
1842 enum fork_context_e call_context, // Intel, GNU, ...
1843 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1844 kmp_va_list ap) {
1845 void **argv;
1846 int i;
1847 int master_tid;
1848 int master_this_cons;
1849 kmp_team_t *team;
1850 kmp_team_t *parent_team;
1851 kmp_info_t *master_th;
1852 kmp_root_t *root;
1853 int nthreads;
1854 int master_active;
1855 int master_set_numthreads;
1856 int level;
1857 int active_level;
1858 int teams_level;
1859#if KMP_NESTED_HOT_TEAMS1
1860 kmp_hot_team_ptr_t **p_hot_teams;
1861#endif
1862 { // KMP_TIME_BLOCK
1863 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call)((void)0);
1864 KMP_COUNT_VALUE(OMP_PARALLEL_args, argc)((void)0);
1865
1866 KA_TRACE(20, ("__kmp_fork_call: enter T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: enter T#%d\n"
, gtid); }
;
1867 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL__null) {
1868 /* Some systems prefer the stack for the root thread(s) to start with */
1869 /* some gap from the parent stack to prevent false sharing. */
1870 void *dummy = KMP_ALLOCA(__kmp_stkpadding)__builtin_alloca (__kmp_stkpadding);
1871 /* These 2 lines below are so this does not get optimized out */
1872 if (__kmp_stkpadding > KMP_MAX_STKPADDING(2 * 1024 * 1024))
1873 __kmp_stkpadding += (short)((kmp_int64)dummy);
1874 }
1875
1876 /* initialize if needed */
1877 KMP_DEBUG_ASSERT(if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 1878); }
1878 __kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 1878); }
; // AC: potentially unsafe, not in sync with shutdown
1879 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
1880 __kmp_parallel_initialize();
1881 __kmp_resume_if_soft_paused();
1882
1883 /* setup current data */
1884 // AC: potentially unsafe, not in sync with library shutdown,
1885 // __kmp_threads can be freed
1886 master_th = __kmp_threads[gtid];
1887
1888 parent_team = master_th->th.th_team;
1889 master_tid = master_th->th.th_info.ds.ds_tid;
1890 master_this_cons = master_th->th.th_local.this_construct;
1891 root = master_th->th.th_root;
1892 master_active = root->r.r_active;
1893 master_set_numthreads = master_th->th.th_set_nproc;
1894
1895#if OMPT_SUPPORT1
1896 ompt_data_t ompt_parallel_data = ompt_data_none{0};
1897 ompt_data_t *parent_task_data;
1898 ompt_frame_t *ompt_frame;
1899 void *return_address = NULL__null;
1900
1901 if (ompt_enabled.enabled) {
1902 __ompt_get_task_info_internal(0, NULL__null, &parent_task_data, &ompt_frame,
1903 NULL__null, NULL__null);
1904 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
1905 }
1906#endif
1907
1908 // Assign affinity to root thread if it hasn't happened yet
1909 __kmp_assign_root_init_mask();
1910
1911 // Nested level will be an index in the nested nthreads array
1912 level = parent_team->t.t_level;
1913 // used to launch non-serial teams even if nested is not allowed
1914 active_level = parent_team->t.t_active_level;
1915 // needed to check nesting inside the teams
1916 teams_level = master_th->th.th_teams_level;
1917#if KMP_NESTED_HOT_TEAMS1
1918 p_hot_teams = &master_th->th.th_hot_teams;
1919 if (*p_hot_teams == NULL__null && __kmp_hot_teams_max_level > 0) {
1920 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(___kmp_allocate((sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level
), "openmp/runtime/src/kmp_runtime.cpp", 1921)
1921 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level)___kmp_allocate((sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level
), "openmp/runtime/src/kmp_runtime.cpp", 1921)
;
1922 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1923 // it is either actual or not needed (when active_level > 0)
1924 (*p_hot_teams)[0].hot_team_nth = 1;
1925 }
1926#endif
1927
1928#if OMPT_SUPPORT1
1929 if (ompt_enabled.enabled) {
1930 if (ompt_enabled.ompt_callback_parallel_begin) {
1931 int team_size = master_set_numthreads
1932 ? master_set_numthreads
1933 : get__nproc_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.nproc)
;
1934 int flags = OMPT_INVOKER(call_context)((call_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
|
1935 ((microtask == (microtask_t)__kmp_teams_master)
1936 ? ompt_parallel_league
1937 : ompt_parallel_team);
1938 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)ompt_callback_parallel_begin_callback(
1939 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
1940 return_address);
1941 }
1942 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1943 }
1944#endif
1945
1946 master_th->th.th_ident = loc;
1947
1948 // Parallel closely nested in teams construct:
1949 if (__kmp_is_fork_in_teams(master_th, microtask, level, teams_level, ap)) {
1950 return __kmp_fork_in_teams(loc, gtid, parent_team, argc, master_th, root,
1951 call_context, microtask, invoker,
1952 master_set_numthreads, level,
1953#if OMPT_SUPPORT1
1954 ompt_parallel_data, return_address,
1955#endif
1956 ap);
1957 } // End parallel closely nested in teams construct
1958
1959#if KMP_DEBUG1
1960 if (__kmp_tasking_mode != tskm_immediate_exec) {
1961 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==if (!(master_th->th.th_task_team == parent_team->t.t_task_team
[master_th->th.th_task_state])) { __kmp_debug_assert("master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 1962); }
1962 parent_team->t.t_task_team[master_th->th.th_task_state])if (!(master_th->th.th_task_team == parent_team->t.t_task_team
[master_th->th.th_task_state])) { __kmp_debug_assert("master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 1962); }
;
1963 }
1964#endif
1965
1966 // Need this to happen before we determine the number of threads, not while
1967 // we are allocating the team
1968 //__kmp_push_current_task_to_thread(master_th, parent_team, 0);
1969
1970 // Determine the number of threads
1971 int enter_teams =
1972 __kmp_is_entering_teams(active_level, level, teams_level, ap);
1973 if ((!enter_teams &&
1974 (parent_team->t.t_active_level >=
1975 master_th->th.th_current_task->td_icvs.max_active_levels)) ||
1976 (__kmp_library == library_serial)) {
1977 KC_TRACE(10, ("__kmp_fork_call: T#%d serializing team\n", gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: T#%d serializing team\n"
, gtid); }
;
1978 nthreads = 1;
1979 } else {
1980 nthreads = master_set_numthreads
1981 ? master_set_numthreads
1982 // TODO: get nproc directly from current task
1983 : get__nproc_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.nproc)
;
1984 // Check if we need to take forkjoin lock? (no need for serialized
1985 // parallel out of teams construct).
1986 if (nthreads > 1) {
1987 /* determine how many new threads we can use */
1988 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1989 /* AC: If we execute teams from parallel region (on host), then teams
1990 should be created but each can only have 1 thread if nesting is
1991 disabled. If teams called from serial region, then teams and their
1992 threads should be created regardless of the nesting setting. */
1993 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
1994 nthreads, enter_teams);
1995 if (nthreads == 1) {
1996 // Free lock for single thread execution here; for multi-thread
1997 // execution it will be freed later after team of threads created
1998 // and initialized
1999 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2000 }
2001 }
2002 }
2003 KMP_DEBUG_ASSERT(nthreads > 0)if (!(nthreads > 0)) { __kmp_debug_assert("nthreads > 0"
, "openmp/runtime/src/kmp_runtime.cpp", 2003); }
;
2004
2005 // If we temporarily changed the set number of threads then restore it now
2006 master_th->th.th_set_nproc = 0;
2007
2008 if (nthreads == 1) {
2009 return __kmp_serial_fork_call(loc, gtid, call_context, argc, microtask,
2010 invoker, master_th, parent_team,
2011#if OMPT_SUPPORT1
2012 &ompt_parallel_data, &return_address,
2013 &parent_task_data,
2014#endif
2015 ap);
2016 } // if (nthreads == 1)
2017
2018 // GEH: only modify the executing flag in the case when not serialized
2019 // serialized case is handled in kmpc_serialized_parallel
2020 KF_TRACE(10, ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
"curtask=%p, curtask_max_aclevel=%d\n", parent_team->t.t_active_level
, master_th, master_th->th.th_current_task, master_th->
th.th_current_task->td_icvs.max_active_levels); }
2021 "curtask=%p, curtask_max_aclevel=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
"curtask=%p, curtask_max_aclevel=%d\n", parent_team->t.t_active_level
, master_th, master_th->th.th_current_task, master_th->
th.th_current_task->td_icvs.max_active_levels); }
2022 parent_team->t.t_active_level, master_th,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
"curtask=%p, curtask_max_aclevel=%d\n", parent_team->t.t_active_level
, master_th, master_th->th.th_current_task, master_th->
th.th_current_task->td_icvs.max_active_levels); }
2023 master_th->th.th_current_task,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
"curtask=%p, curtask_max_aclevel=%d\n", parent_team->t.t_active_level
, master_th, master_th->th.th_current_task, master_th->
th.th_current_task->td_icvs.max_active_levels); }
2024 master_th->th.th_current_task->td_icvs.max_active_levels))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
"curtask=%p, curtask_max_aclevel=%d\n", parent_team->t.t_active_level
, master_th, master_th->th.th_current_task, master_th->
th.th_current_task->td_icvs.max_active_levels); }
;
2025 // TODO: GEH - cannot do this assertion because root thread not set up as
2026 // executing
2027 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );
2028 master_th->th.th_current_task->td_flags.executing = 0;
2029
2030 if (!master_th->th.th_teams_microtask || level > teams_level) {
2031 /* Increment our nested depth level */
2032 KMP_ATOMIC_INC(&root->r.r_in_parallel)(&root->r.r_in_parallel)->fetch_add(1, std::memory_order_acq_rel
)
;
2033 }
2034
2035 // See if we need to make a copy of the ICVs.
2036 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
2037 if ((level + 1 < __kmp_nested_nth.used) &&
2038 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
2039 nthreads_icv = __kmp_nested_nth.nth[level + 1];
2040 } else {
2041 nthreads_icv = 0; // don't update
2042 }
2043
2044 // Figure out the proc_bind_policy for the new team.
2045 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2046 // proc_bind_default means don't update
2047 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
2048 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2049 proc_bind = proc_bind_false;
2050 } else {
2051 // No proc_bind clause specified; use current proc-bind-var for this
2052 // parallel region
2053 if (proc_bind == proc_bind_default) {
2054 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2055 }
2056 // Have teams construct take proc_bind value from KMP_TEAMS_PROC_BIND
2057 if (master_th->th.th_teams_microtask &&
2058 microtask == (microtask_t)__kmp_teams_master) {
2059 proc_bind = __kmp_teams_proc_bind;
2060 }
2061 /* else: The proc_bind policy was specified explicitly on parallel clause.
2062 This overrides proc-bind-var for this parallel region, but does not
2063 change proc-bind-var. */
2064 // Figure the value of proc-bind-var for the child threads.
2065 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2066 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2067 master_th->th.th_current_task->td_icvs.proc_bind)) {
2068 // Do not modify the proc bind icv for the two teams construct forks
2069 // They just let the proc bind icv pass through
2070 if (!master_th->th.th_teams_microtask ||
2071 !(microtask == (microtask_t)__kmp_teams_master || ap == NULL__null))
2072 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2073 }
2074 }
2075
2076 // Reset for next parallel region
2077 master_th->th.th_set_proc_bind = proc_bind_default;
2078
2079 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
2080 kmp_internal_control_t new_icvs;
2081 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2082 new_icvs.next = NULL__null;
2083 if (nthreads_icv > 0) {
2084 new_icvs.nproc = nthreads_icv;
2085 }
2086 if (proc_bind_icv != proc_bind_default) {
2087 new_icvs.proc_bind = proc_bind_icv;
2088 }
2089
2090 /* allocate a new parallel team */
2091 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: before __kmp_allocate_team\n"
); }
;
2092 team = __kmp_allocate_team(root, nthreads, nthreads,
2093#if OMPT_SUPPORT1
2094 ompt_parallel_data,
2095#endif
2096 proc_bind, &new_icvs,
2097 argc USE_NESTED_HOT_ARG(master_th), master_th);
2098 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2099 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
2100 } else {
2101 /* allocate a new parallel team */
2102 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: before __kmp_allocate_team\n"
); }
;
2103 team = __kmp_allocate_team(root, nthreads, nthreads,
2104#if OMPT_SUPPORT1
2105 ompt_parallel_data,
2106#endif
2107 proc_bind,
2108 &master_th->th.th_current_task->td_icvs,
2109 argc USE_NESTED_HOT_ARG(master_th), master_th);
2110 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2111 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
2112 &master_th->th.th_current_task->td_icvs);
2113 }
2114 KF_TRACE(if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n"
, team); }
2115 10, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n"
, team); }
;
2116
2117 /* setup the new team */
2118 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid)if ((team->t.t_master_tid) != (master_tid)) (team->t.t_master_tid
) = (master_tid)
;
2119 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons)if ((team->t.t_master_this_cons) != (master_this_cons)) (team
->t.t_master_this_cons) = (master_this_cons)
;
2120 KMP_CHECK_UPDATE(team->t.t_ident, loc)if ((team->t.t_ident) != (loc)) (team->t.t_ident) = (loc
)
;
2121 KMP_CHECK_UPDATE(team->t.t_parent, parent_team)if ((team->t.t_parent) != (parent_team)) (team->t.t_parent
) = (parent_team)
;
2122 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask)if ((team->t.t_pkfn) != (microtask)) (((team->t.t_pkfn)
)) = (((microtask)))
;
2123#if OMPT_SUPPORT1
2124 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,if ((team->t.ompt_team_info.master_return_address) != (return_address
)) (((team->t.ompt_team_info.master_return_address))) = ((
(return_address)))
2125 return_address)if ((team->t.ompt_team_info.master_return_address) != (return_address
)) (((team->t.ompt_team_info.master_return_address))) = ((
(return_address)))
;
2126#endif
2127 KMP_CHECK_UPDATE(team->t.t_invoke, invoker)if ((team->t.t_invoke) != (invoker)) (team->t.t_invoke)
= (invoker)
; // TODO move to root, maybe
2128 // TODO: parent_team->t.t_level == INT_MAX ???
2129 if (!master_th->th.th_teams_microtask || level > teams_level) {
2130 int new_level = parent_team->t.t_level + 1;
2131 KMP_CHECK_UPDATE(team->t.t_level, new_level)if ((team->t.t_level) != (new_level)) (team->t.t_level)
= (new_level)
;
2132 new_level = parent_team->t.t_active_level + 1;
2133 KMP_CHECK_UPDATE(team->t.t_active_level, new_level)if ((team->t.t_active_level) != (new_level)) (team->t.t_active_level
) = (new_level)
;
2134 } else {
2135 // AC: Do not increase parallel level at start of the teams construct
2136 int new_level = parent_team->t.t_level;
2137 KMP_CHECK_UPDATE(team->t.t_level, new_level)if ((team->t.t_level) != (new_level)) (team->t.t_level)
= (new_level)
;
2138 new_level = parent_team->t.t_active_level;
2139 KMP_CHECK_UPDATE(team->t.t_active_level, new_level)if ((team->t.t_active_level) != (new_level)) (team->t.t_active_level
) = (new_level)
;
2140 }
2141 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.sched)
;
2142 // set primary thread's schedule as new run-time schedule
2143 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched)if ((team->t.t_sched.sched) != (new_sched.sched)) (team->
t.t_sched.sched) = (new_sched.sched)
;
2144
2145 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq)if ((team->t.t_cancel_request) != (cancel_noreq)) (team->
t.t_cancel_request) = (cancel_noreq)
;
2146 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator)if ((team->t.t_def_allocator) != (master_th->th.th_def_allocator
)) (team->t.t_def_allocator) = (master_th->th.th_def_allocator
)
;
2147
2148 // Update the floating point rounding in the team if required.
2149 propagateFPControl(team);
2150#if OMPD_SUPPORT1
2151 if (ompd_state & OMPD_ENABLE_BP0x1)
2152 ompd_bp_parallel_begin();
2153#endif
2154
2155 if (__kmp_tasking_mode != tskm_immediate_exec) {
2156 // Set primary thread's task team to team's task team. Unless this is hot
2157 // team, it should be NULL.
2158 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==if (!(master_th->th.th_task_team == parent_team->t.t_task_team
[master_th->th.th_task_state])) { __kmp_debug_assert("master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 2159); }
2159 parent_team->t.t_task_team[master_th->th.th_task_state])if (!(master_th->th.th_task_team == parent_team->t.t_task_team
[master_th->th.th_task_state])) { __kmp_debug_assert("master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 2159); }
;
2160 KA_TRACE(20, ("__kmp_fork_call: Primary T#%d pushing task_team %p / team "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: Primary T#%d pushing task_team %p / team "
"%p, new task_team %p / team %p\n", __kmp_gtid_from_thread(master_th
), master_th->th.th_task_team, parent_team, team->t.t_task_team
[master_th->th.th_task_state], team); }
2161 "%p, new task_team %p / team %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: Primary T#%d pushing task_team %p / team "
"%p, new task_team %p / team %p\n", __kmp_gtid_from_thread(master_th
), master_th->th.th_task_team, parent_team, team->t.t_task_team
[master_th->th.th_task_state], team); }
2162 __kmp_gtid_from_thread(master_th),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: Primary T#%d pushing task_team %p / team "
"%p, new task_team %p / team %p\n", __kmp_gtid_from_thread(master_th
), master_th->th.th_task_team, parent_team, team->t.t_task_team
[master_th->th.th_task_state], team); }
2163 master_th->th.th_task_team, parent_team,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: Primary T#%d pushing task_team %p / team "
"%p, new task_team %p / team %p\n", __kmp_gtid_from_thread(master_th
), master_th->th.th_task_team, parent_team, team->t.t_task_team
[master_th->th.th_task_state], team); }
2164 team->t.t_task_team[master_th->th.th_task_state], team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: Primary T#%d pushing task_team %p / team "
"%p, new task_team %p / team %p\n", __kmp_gtid_from_thread(master_th
), master_th->th.th_task_team, parent_team, team->t.t_task_team
[master_th->th.th_task_state], team); }
;
2165
2166 if (active_level || master_th->th.th_task_team) {
2167 // Take a memo of primary thread's task_state
2168 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack)if (!(master_th->th.th_task_state_memo_stack)) { __kmp_debug_assert
("master_th->th.th_task_state_memo_stack", "openmp/runtime/src/kmp_runtime.cpp"
, 2168); }
;
2169 if (master_th->th.th_task_state_top >=
2170 master_th->th.th_task_state_stack_sz) { // increase size
2171 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2172 kmp_uint8 *old_stack, *new_stack;
2173 kmp_uint32 i;
2174 new_stack = (kmp_uint8 *)__kmp_allocate(new_size)___kmp_allocate((new_size), "openmp/runtime/src/kmp_runtime.cpp"
, 2174)
;
2175 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2176 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2177 }
2178 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2179 ++i) { // zero-init rest of stack
2180 new_stack[i] = 0;
2181 }
2182 old_stack = master_th->th.th_task_state_memo_stack;
2183 master_th->th.th_task_state_memo_stack = new_stack;
2184 master_th->th.th_task_state_stack_sz = new_size;
2185 __kmp_free(old_stack)___kmp_free((old_stack), "openmp/runtime/src/kmp_runtime.cpp"
, 2185)
;
2186 }
2187 // Store primary thread's task_state on stack
2188 master_th->th
2189 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2190 master_th->th.th_task_state;
2191 master_th->th.th_task_state_top++;
2192#if KMP_NESTED_HOT_TEAMS1
2193 if (master_th->th.th_hot_teams &&
2194 active_level < __kmp_hot_teams_max_level &&
2195 team == master_th->th.th_hot_teams[active_level].hot_team) {
2196 // Restore primary thread's nested state if nested hot team
2197 master_th->th.th_task_state =
2198 master_th->th
2199 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2200 } else {
2201#endif
2202 master_th->th.th_task_state = 0;
2203#if KMP_NESTED_HOT_TEAMS1
2204 }
2205#endif
2206 }
2207#if !KMP_NESTED_HOT_TEAMS1
2208 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||if (!((master_th->th.th_task_team == __null) || (team == root
->r.r_hot_team))) { __kmp_debug_assert("(master_th->th.th_task_team == __null) || (team == root->r.r_hot_team)"
, "openmp/runtime/src/kmp_runtime.cpp", 2209); }
2209 (team == root->r.r_hot_team))if (!((master_th->th.th_task_team == __null) || (team == root
->r.r_hot_team))) { __kmp_debug_assert("(master_th->th.th_task_team == __null) || (team == root->r.r_hot_team)"
, "openmp/runtime/src/kmp_runtime.cpp", 2209); }
;
2210#endif
2211 }
2212
2213 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n"
, gtid, parent_team->t.t_id, team->t.t_master_tid, team
->t.t_id, team->t.t_nproc); }
2214 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n"
, gtid, parent_team->t.t_id, team->t.t_master_tid, team
->t.t_id, team->t.t_nproc); }
2215 ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n"
, gtid, parent_team->t.t_id, team->t.t_master_tid, team
->t.t_id, team->t.t_nproc); }
2216 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n"
, gtid, parent_team->t.t_id, team->t.t_master_tid, team
->t.t_id, team->t.t_nproc); }
2217 team->t.t_nproc))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n"
, gtid, parent_team->t.t_id, team->t.t_master_tid, team
->t.t_id, team->t.t_nproc); }
;
2218 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||if (!(team != root->r.r_hot_team || (team->t.t_master_tid
== 0 && (team->t.t_parent == root->r.r_root_team
|| team->t.t_parent->t.t_serialized)))) { __kmp_debug_assert
("team != root->r.r_hot_team || (team->t.t_master_tid == 0 && (team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized))"
, "openmp/runtime/src/kmp_runtime.cpp", 2221); }
2219 (team->t.t_master_tid == 0 &&if (!(team != root->r.r_hot_team || (team->t.t_master_tid
== 0 && (team->t.t_parent == root->r.r_root_team
|| team->t.t_parent->t.t_serialized)))) { __kmp_debug_assert
("team != root->r.r_hot_team || (team->t.t_master_tid == 0 && (team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized))"
, "openmp/runtime/src/kmp_runtime.cpp", 2221); }
2220 (team->t.t_parent == root->r.r_root_team ||if (!(team != root->r.r_hot_team || (team->t.t_master_tid
== 0 && (team->t.t_parent == root->r.r_root_team
|| team->t.t_parent->t.t_serialized)))) { __kmp_debug_assert
("team != root->r.r_hot_team || (team->t.t_master_tid == 0 && (team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized))"
, "openmp/runtime/src/kmp_runtime.cpp", 2221); }
2221 team->t.t_parent->t.t_serialized)))if (!(team != root->r.r_hot_team || (team->t.t_master_tid
== 0 && (team->t.t_parent == root->r.r_root_team
|| team->t.t_parent->t.t_serialized)))) { __kmp_debug_assert
("team != root->r.r_hot_team || (team->t.t_master_tid == 0 && (team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized))"
, "openmp/runtime/src/kmp_runtime.cpp", 2221); }
;
2222 KMP_MB();
2223
2224 /* now, setup the arguments */
2225 argv = (void **)team->t.t_argv;
2226 if (ap) {
2227 for (i = argc - 1; i >= 0; --i) {
2228 void *new_argv = va_arg(kmp_va_deref(ap), void *)__builtin_va_arg((*(ap)), void *);
2229 KMP_CHECK_UPDATE(*argv, new_argv)if ((*argv) != (new_argv)) (*argv) = (new_argv);
2230 argv++;
2231 }
2232 } else {
2233 for (i = 0; i < argc; ++i) {
2234 // Get args from parent team for teams construct
2235 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i])if ((argv[i]) != (team->t.t_parent->t.t_argv[i])) (argv
[i]) = (team->t.t_parent->t.t_argv[i])
;
2236 }
2237 }
2238
2239 /* now actually fork the threads */
2240 KMP_CHECK_UPDATE(team->t.t_master_active, master_active)if ((team->t.t_master_active) != (master_active)) (team->
t.t_master_active) = (master_active)
;
2241 if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
2242 root->r.r_active = TRUE(!0);
2243
2244 __kmp_fork_team_threads(root, team, master_th, gtid, !ap);
2245 __kmp_setup_icv_copy(team, nthreads,
2246 &master_th->th.th_current_task->td_icvs, loc);
2247
2248#if OMPT_SUPPORT1
2249 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2250#endif
2251
2252 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2253
2254#if USE_ITT_BUILD1
2255 if (team->t.t_active_level == 1 // only report frames at level 1
2256 && !master_th->th.th_teams_microtask) { // not in teams construct
2257#if USE_ITT_NOTIFY1
2258 if ((__itt_frame_submit_v3_ptr__kmp_itt_frame_submit_v3_ptr__3_0 || KMP_ITT_DEBUG0) &&
2259 (__kmp_forkjoin_frames_mode == 3 ||
2260 __kmp_forkjoin_frames_mode == 1)) {
2261 kmp_uint64 tmp_time = 0;
2262 if (__itt_get_timestamp_ptr__kmp_itt_get_timestamp_ptr__3_0)
2263 tmp_time = __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0();
2264 // Internal fork - report frame begin
2265 master_th->th.th_frame_time = tmp_time;
2266 if (__kmp_forkjoin_frames_mode == 3)
2267 team->t.t_region_time = tmp_time;
2268 } else
2269// only one notification scheme (either "submit" or "forking/joined", not both)
2270#endif /* USE_ITT_NOTIFY */
2271 if ((__itt_frame_begin_v3_ptr__kmp_itt_frame_begin_v3_ptr__3_0 || KMP_ITT_DEBUG0) &&
2272 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2273 // Mark start of "parallel" region for Intel(R) VTune(TM) analyzer.
2274 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2275 }
2276 }
2277#endif /* USE_ITT_BUILD */
2278
2279 /* now go on and do the work */
2280 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team)if (!(team == __kmp_threads[gtid]->th.th_team)) { __kmp_debug_assert
("team == __kmp_threads[gtid]->th.th_team", "openmp/runtime/src/kmp_runtime.cpp"
, 2280); }
;
2281 KMP_MB();
2282 KF_TRACE(10,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n"
, root, team, master_th, gtid); }
2283 ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n"
, root, team, master_th, gtid); }
2284 root, team, master_th, gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n"
, root, team, master_th, gtid); }
;
2285
2286#if USE_ITT_BUILD1
2287 if (__itt_stack_caller_create_ptr__kmp_itt_stack_caller_create_ptr__3_0) {
2288 // create new stack stitching id before entering fork barrier
2289 if (!enter_teams) {
2290 KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL)if (!(team->t.t_stack_id == __null)) { __kmp_debug_assert(
"team->t.t_stack_id == __null", "openmp/runtime/src/kmp_runtime.cpp"
, 2290); }
;
2291 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2292 } else if (parent_team->t.t_serialized) {
2293 // keep stack stitching id in the serialized parent_team;
2294 // current team will be used for parallel inside the teams;
2295 // if parent_team is active, then it already keeps stack stitching id
2296 // for the league of teams
2297 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL)if (!(parent_team->t.t_stack_id == __null)) { __kmp_debug_assert
("parent_team->t.t_stack_id == __null", "openmp/runtime/src/kmp_runtime.cpp"
, 2297); }
;
2298 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
2299 }
2300 }
2301#endif /* USE_ITT_BUILD */
2302
2303 // AC: skip __kmp_internal_fork at teams construct, let only primary
2304 // threads execute
2305 if (ap) {
2306 __kmp_internal_fork(loc, gtid, team);
2307 KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_internal_fork : after : root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, team, master_th, gtid); }
2308 "master_th=%p, gtid=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_internal_fork : after : root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, team, master_th, gtid); }
2309 root, team, master_th, gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_internal_fork : after : root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, team, master_th, gtid); }
;
2310 }
2311
2312 if (call_context == fork_context_gnu) {
2313 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: parallel exit T#%d\n"
, gtid); }
;
2314 return TRUE(!0);
2315 }
2316
2317 /* Invoke microtask for PRIMARY thread */
2318 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n"
, gtid, team->t.t_id, team->t.t_pkfn); }
2319 team->t.t_id, team->t.t_pkfn))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n"
, gtid, team->t.t_id, team->t.t_pkfn); }
;
2320 } // END of timer KMP_fork_call block
2321
2322#if KMP_STATS_ENABLED0
2323 // If beginning a teams construct, then change thread state
2324 stats_state_e previous_state = KMP_GET_THREAD_STATE()((void)0);
2325 if (!ap) {
2326 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION)((void)0);
2327 }
2328#endif
2329
2330 if (!team->t.t_invoke(gtid)) {
2331 KMP_ASSERT2(0, "cannot invoke microtask for PRIMARY thread")if (!(0)) { __kmp_debug_assert(("cannot invoke microtask for PRIMARY thread"
), "openmp/runtime/src/kmp_runtime.cpp", 2331); }
;
2332 }
2333
2334#if KMP_STATS_ENABLED0
2335 // If was beginning of a teams construct, then reset thread state
2336 if (!ap) {
2337 KMP_SET_THREAD_STATE(previous_state)((void)0);
2338 }
2339#endif
2340
2341 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n"
, gtid, team->t.t_id, team->t.t_pkfn); }
2342 team->t.t_id, team->t.t_pkfn))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n"
, gtid, team->t.t_id, team->t.t_pkfn); }
;
2343 KMP_MB(); /* Flush all pending memory write invalidates. */
2344
2345 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: parallel exit T#%d\n"
, gtid); }
;
2346#if OMPT_SUPPORT1
2347 if (ompt_enabled.enabled) {
2348 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2349 }
2350#endif
2351
2352 return TRUE(!0);
2353}
2354
2355#if OMPT_SUPPORT1
2356static inline void __kmp_join_restore_state(kmp_info_t *thread,
2357 kmp_team_t *team) {
2358 // restore state outside the region
2359 thread->th.ompt_thread_info.state =
2360 ((team->t.t_serialized) ? ompt_state_work_serial
2361 : ompt_state_work_parallel);
2362}
2363
2364static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread,
2365 kmp_team_t *team, ompt_data_t *parallel_data,
2366 int flags, void *codeptr) {
2367 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2368 if (ompt_enabled.ompt_callback_parallel_end) {
2369 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)ompt_callback_parallel_end_callback(
2370 parallel_data, &(task_info->task_data), flags, codeptr);
2371 }
2372
2373 task_info->frame.enter_frame = ompt_data_none{0};
2374 __kmp_join_restore_state(thread, team);
2375}
2376#endif
2377
2378void __kmp_join_call(ident_t *loc, int gtid
2379#if OMPT_SUPPORT1
2380 ,
2381 enum fork_context_e fork_context
2382#endif
2383 ,
2384 int exit_teams) {
2385 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call)((void)0);
2386 kmp_team_t *team;
2387 kmp_team_t *parent_team;
2388 kmp_info_t *master_th;
2389 kmp_root_t *root;
2390 int master_active;
2391
2392 KA_TRACE(20, ("__kmp_join_call: enter T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: enter T#%d\n"
, gtid); }
;
1
Assuming 'kmp_a_debug' is < 20
2
Taking false branch
2393
2394 /* setup current data */
2395 master_th = __kmp_threads[gtid];
2396 root = master_th->th.th_root;
2397 team = master_th->th.th_team;
2398 parent_team = team->t.t_parent;
2399
2400 master_th->th.th_ident = loc;
2401
2402#if OMPT_SUPPORT1
2403 void *team_microtask = (void *)team->t.t_pkfn;
2404 // For GOMP interface with serialized parallel, need the
2405 // __kmpc_end_serialized_parallel to call hooks for OMPT end-implicit-task
2406 // and end-parallel events.
2407 if (ompt_enabled.enabled &&
3
Assuming field 'enabled' is 0
2408 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2409 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2410 }
2411#endif
2412
2413#if KMP_DEBUG1
2414 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
4
Assuming '__kmp_tasking_mode' is equal to tskm_immediate_exec
2415 KA_TRACE(20, ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
"th_task_team = %p\n", __kmp_gtid_from_thread(master_th), team
, team->t.t_task_team[master_th->th.th_task_state], master_th
->th.th_task_team); }
2416 "th_task_team = %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
"th_task_team = %p\n", __kmp_gtid_from_thread(master_th), team
, team->t.t_task_team[master_th->th.th_task_state], master_th
->th.th_task_team); }
2417 __kmp_gtid_from_thread(master_th), team,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
"th_task_team = %p\n", __kmp_gtid_from_thread(master_th), team
, team->t.t_task_team[master_th->th.th_task_state], master_th
->th.th_task_team); }
2418 team->t.t_task_team[master_th->th.th_task_state],if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
"th_task_team = %p\n", __kmp_gtid_from_thread(master_th), team
, team->t.t_task_team[master_th->th.th_task_state], master_th
->th.th_task_team); }
2419 master_th->th.th_task_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
"th_task_team = %p\n", __kmp_gtid_from_thread(master_th), team
, team->t.t_task_team[master_th->th.th_task_state], master_th
->th.th_task_team); }
;
2420 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==if (!(master_th->th.th_task_team == team->t.t_task_team
[master_th->th.th_task_state])) { __kmp_debug_assert("master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 2421); }
2421 team->t.t_task_team[master_th->th.th_task_state])if (!(master_th->th.th_task_team == team->t.t_task_team
[master_th->th.th_task_state])) { __kmp_debug_assert("master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 2421); }
;
2422 }
2423#endif
2424
2425 if (team->t.t_serialized) {
5
Assuming field 't_serialized' is 0
6
Taking false branch
2426 if (master_th->th.th_teams_microtask) {
2427 // We are in teams construct
2428 int level = team->t.t_level;
2429 int tlevel = master_th->th.th_teams_level;
2430 if (level == tlevel) {
2431 // AC: we haven't incremented it earlier at start of teams construct,
2432 // so do it here - at the end of teams construct
2433 team->t.t_level++;
2434 } else if (level == tlevel + 1) {
2435 // AC: we are exiting parallel inside teams, need to increment
2436 // serialization in order to restore it in the next call to
2437 // __kmpc_end_serialized_parallel
2438 team->t.t_serialized++;
2439 }
2440 }
2441 __kmpc_end_serialized_parallel(loc, gtid);
2442
2443#if OMPT_SUPPORT1
2444 if (ompt_enabled.enabled) {
2445 if (fork_context == fork_context_gnu) {
2446 __ompt_lw_taskteam_unlink(master_th);
2447 }
2448 __kmp_join_restore_state(master_th, parent_team);
2449 }
2450#endif
2451
2452 return;
2453 }
2454
2455 master_active = team->t.t_master_active;
2456
2457 if (!exit_teams) {
7
Assuming 'exit_teams' is 0
8
Taking true branch
2458 // AC: No barrier for internal teams at exit from teams construct.
2459 // But there is barrier for external team (league).
2460 __kmp_internal_join(loc, gtid, team);
2461#if USE_ITT_BUILD1
2462 if (__itt_stack_caller_create_ptr__kmp_itt_stack_caller_create_ptr__3_0) {
9
Assuming '__kmp_itt_stack_caller_create_ptr__3_0' is null
10
Taking false branch
2463 KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL)if (!(team->t.t_stack_id != __null)) { __kmp_debug_assert(
"team->t.t_stack_id != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 2463); }
;
2464 // destroy the stack stitching id after join barrier
2465 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2466 team->t.t_stack_id = NULL__null;
2467 }
2468#endif
2469 } else {
2470 master_th->th.th_task_state =
2471 0; // AC: no tasking in teams (out of any parallel)
2472#if USE_ITT_BUILD1
2473 if (__itt_stack_caller_create_ptr__kmp_itt_stack_caller_create_ptr__3_0 && parent_team->t.t_serialized) {
2474 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL)if (!(parent_team->t.t_stack_id != __null)) { __kmp_debug_assert
("parent_team->t.t_stack_id != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 2474); }
;
2475 // destroy the stack stitching id on exit from the teams construct
2476 // if parent_team is active, then the id will be destroyed later on
2477 // by master of the league of teams
2478 __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
2479 parent_team->t.t_stack_id = NULL__null;
2480 }
2481#endif
2482
2483 if (team->t.t_nproc > 1 &&
2484 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2485 team->t.b->update_num_threads(team->t.t_nproc);
2486 __kmp_add_threads_to_team(team, team->t.t_nproc);
2487 }
2488 }
2489
2490 KMP_MB();
2491
2492#if OMPT_SUPPORT1
2493 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2494 void *codeptr = team->t.ompt_team_info.master_return_address;
2495#endif
2496
2497#if USE_ITT_BUILD1
2498 // Mark end of "parallel" region for Intel(R) VTune(TM) analyzer.
2499 if (team->t.t_active_level == 1 &&
11
Assuming field 't_active_level' is not equal to 1
2500 (!master_th->th.th_teams_microtask || /* not in teams construct */
2501 master_th->th.th_teams_size.nteams == 1)) {
2502 master_th->th.th_ident = loc;
2503 // only one notification scheme (either "submit" or "forking/joined", not
2504 // both)
2505 if ((__itt_frame_submit_v3_ptr__kmp_itt_frame_submit_v3_ptr__3_0 || KMP_ITT_DEBUG0) &&
2506 __kmp_forkjoin_frames_mode == 3)
2507 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2508 master_th->th.th_frame_time, 0, loc,
2509 master_th->th.th_team_nproc, 1);
2510 else if ((__itt_frame_end_v3_ptr__kmp_itt_frame_end_v3_ptr__3_0 || KMP_ITT_DEBUG0) &&
2511 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2512 __kmp_itt_region_joined(gtid);
2513 } // active_level == 1
2514#endif /* USE_ITT_BUILD */
2515
2516#if KMP_AFFINITY_SUPPORTED1
2517 if (!exit_teams
11.1
'exit_teams' is 0
) {
12
Taking true branch
2518 // Restore master thread's partition.
2519 master_th->th.th_first_place = team->t.t_first_place;
2520 master_th->th.th_last_place = team->t.t_last_place;
2521 }
2522#endif // KMP_AFFINITY_SUPPORTED
2523
2524 if (master_th->th.th_teams_microtask && !exit_teams
13.1
'exit_teams' is 0
&&
13
Assuming field 'th_teams_microtask' is non-null
16
Taking true branch
2525 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
14
Assuming field 't_pkfn' is not equal to __kmp_teams_master
2526 team->t.t_level == master_th->th.th_teams_level + 1) {
15
Assuming the condition is true
2527// AC: We need to leave the team structure intact at the end of parallel
2528// inside the teams construct, so that at the next parallel same (hot) team
2529// works, only adjust nesting levels
2530#if OMPT_SUPPORT1
2531 ompt_data_t ompt_parallel_data = ompt_data_none{0};
2532 if (ompt_enabled.enabled) {
17
Assuming field 'enabled' is 0
18
Taking false branch
2533 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2534 if (ompt_enabled.ompt_callback_implicit_task) {
2535 int ompt_team_size = team->t.t_nproc;
2536 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
2537 ompt_scope_end, NULL__null, &(task_info->task_data), ompt_team_size,
2538 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, ompt_task_implicit);
2539 }
2540 task_info->frame.exit_frame = ompt_data_none{0};
2541 task_info->task_data = ompt_data_none{0};
2542 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
;
2543 __ompt_lw_taskteam_unlink(master_th);
2544 }
2545#endif
2546 /* Decrement our nested depth level */
2547 team->t.t_level--;
2548 team->t.t_active_level--;
2549 KMP_ATOMIC_DEC(&root->r.r_in_parallel)(&root->r.r_in_parallel)->fetch_sub(1, std::memory_order_acq_rel
)
;
2550
2551 // Restore number of threads in the team if needed. This code relies on
2552 // the proper adjustment of th_teams_size.nth after the fork in
2553 // __kmp_teams_master on each teams primary thread in the case that
2554 // __kmp_reserve_threads reduced it.
2555 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
19
Assuming field 'th_team_nproc' is < field 'nth'
20
Taking true branch
2556 int old_num = master_th->th.th_team_nproc;
2557 int new_num = master_th->th.th_teams_size.nth;
2558 kmp_info_t **other_threads = team->t.t_threads;
2559 team->t.t_nproc = new_num;
2560 for (int i = 0; i < old_num; ++i) {
21
Assuming 'i' is < 'old_num'
22
Loop condition is true. Entering loop body
23
Assuming 'i' is >= 'old_num'
24
Loop condition is false. Execution continues on line 2564
2561 other_threads[i]->th.th_team_nproc = new_num;
2562 }
2563 // Adjust states of non-used threads of the team
2564 for (int i = old_num; i < new_num; ++i) {
25
Assuming 'i' is < 'new_num'
26
Loop condition is true. Entering loop body
2565 // Re-initialize thread's barrier data.
2566 KMP_DEBUG_ASSERT(other_threads[i])if (!(other_threads[i])) { __kmp_debug_assert("other_threads[i]"
, "openmp/runtime/src/kmp_runtime.cpp", 2566); }
;
27
Assuming the condition is true
28
Taking true branch
2567 kmp_balign_t *balign = other_threads[i]->th.th_bar;
29
'balign' initialized to a null pointer value
2568 for (int b = 0; b < bs_last_barrier; ++b) {
30
Loop condition is true. Entering loop body
2569 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
31
Dereference of null pointer
2570 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG)if (!(balign[b].bb.wait_flag != 2)) { __kmp_debug_assert("balign[b].bb.wait_flag != 2"
, "openmp/runtime/src/kmp_runtime.cpp", 2570); }
;
2571#if USE_DEBUGGER0
2572 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2573#endif
2574 }
2575 if (__kmp_tasking_mode != tskm_immediate_exec) {
2576 // Synchronize thread's task state
2577 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2578 }
2579 }
2580 }
2581
2582#if OMPT_SUPPORT1
2583 if (ompt_enabled.enabled) {
2584 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2585 OMPT_INVOKER(fork_context)((fork_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
| ompt_parallel_team, codeptr);
2586 }
2587#endif
2588
2589 return;
2590 }
2591
2592 /* do cleanup and restore the parent team */
2593 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2594 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2595
2596 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2597
2598 /* jc: The following lock has instructions with REL and ACQ semantics,
2599 separating the parallel user code called in this parallel region
2600 from the serial user code called after this function returns. */
2601 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2602
2603 if (!master_th->th.th_teams_microtask ||
2604 team->t.t_level > master_th->th.th_teams_level) {
2605 /* Decrement our nested depth level */
2606 KMP_ATOMIC_DEC(&root->r.r_in_parallel)(&root->r.r_in_parallel)->fetch_sub(1, std::memory_order_acq_rel
)
;
2607 }
2608 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0)if (!(root->r.r_in_parallel >= 0)) { __kmp_debug_assert
("root->r.r_in_parallel >= 0", "openmp/runtime/src/kmp_runtime.cpp"
, 2608); }
;
2609
2610#if OMPT_SUPPORT1
2611 if (ompt_enabled.enabled) {
2612 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2613 if (ompt_enabled.ompt_callback_implicit_task) {
2614 int flags = (team_microtask == (void *)__kmp_teams_master)
2615 ? ompt_task_initial
2616 : ompt_task_implicit;
2617 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2618 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
2619 ompt_scope_end, NULL__null, &(task_info->task_data), ompt_team_size,
2620 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, flags);
2621 }
2622 task_info->frame.exit_frame = ompt_data_none{0};
2623 task_info->task_data = ompt_data_none{0};
2624 }
2625#endif
2626
2627 KF_TRACE(10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n"
, 0, master_th, team); }
2628 master_th, team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n"
, 0, master_th, team); }
;
2629 __kmp_pop_current_task_from_thread(master_th);
2630
2631 master_th->th.th_def_allocator = team->t.t_def_allocator;
2632
2633#if OMPD_SUPPORT1
2634 if (ompd_state & OMPD_ENABLE_BP0x1)
2635 ompd_bp_parallel_end();
2636#endif
2637 updateHWFPControl(team);
2638
2639 if (root->r.r_active != master_active)
2640 root->r.r_active = master_active;
2641
2642 __kmp_free_team(root, team USE_NESTED_HOT_ARG(, master_th
2643 master_th), master_th); // this will free worker threads
2644
2645 /* this race was fun to find. make sure the following is in the critical
2646 region otherwise assertions may fail occasionally since the old team may be
2647 reallocated and the hierarchy appears inconsistent. it is actually safe to
2648 run and won't cause any bugs, but will cause those assertion failures. it's
2649 only one deref&assign so might as well put this in the critical region */
2650 master_th->th.th_team = parent_team;
2651 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2652 master_th->th.th_team_master = parent_team->t.t_threads[0];
2653 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2654
2655 /* restore serialized team, if need be */
2656 if (parent_team->t.t_serialized &&
2657 parent_team != master_th->th.th_serial_team &&
2658 parent_team != root->r.r_root_team) {
2659 __kmp_free_team(root,
2660 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL), __null);
2661 master_th->th.th_serial_team = parent_team;
2662 }
2663
2664 if (__kmp_tasking_mode != tskm_immediate_exec) {
2665 if (master_th->th.th_task_state_top >
2666 0) { // Restore task state from memo stack
2667 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack)if (!(master_th->th.th_task_state_memo_stack)) { __kmp_debug_assert
("master_th->th.th_task_state_memo_stack", "openmp/runtime/src/kmp_runtime.cpp"
, 2667); }
;
2668 // Remember primary thread's state if we re-use this nested hot team
2669 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2670 master_th->th.th_task_state;
2671 --master_th->th.th_task_state_top; // pop
2672 // Now restore state at this level
2673 master_th->th.th_task_state =
2674 master_th->th
2675 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2676 }
2677 // Copy the task team from the parent team to the primary thread
2678 master_th->th.th_task_team =
2679 parent_team->t.t_task_team[master_th->th.th_task_state];
2680 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n"
, __kmp_gtid_from_thread(master_th), master_th->th.th_task_team
, parent_team); }
2681 ("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n"
, __kmp_gtid_from_thread(master_th), master_th->th.th_task_team
, parent_team); }
2682 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n"
, __kmp_gtid_from_thread(master_th), master_th->th.th_task_team
, parent_team); }
2683 parent_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n"
, __kmp_gtid_from_thread(master_th), master_th->th.th_task_team
, parent_team); }
;
2684 }
2685
2686 // TODO: GEH - cannot do this assertion because root thread not set up as
2687 // executing
2688 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 );
2689 master_th->th.th_current_task->td_flags.executing = 1;
2690
2691 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2692
2693#if KMP_AFFINITY_SUPPORTED1
2694 if (master_th->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
2695 __kmp_reset_root_init_mask(gtid);
2696 }
2697#endif
2698#if OMPT_SUPPORT1
2699 int flags =
2700 OMPT_INVOKER(fork_context)((fork_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
|
2701 ((team_microtask == (void *)__kmp_teams_master) ? ompt_parallel_league
2702 : ompt_parallel_team);
2703 if (ompt_enabled.enabled) {
2704 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2705 codeptr);
2706 }
2707#endif
2708
2709 KMP_MB();
2710 KA_TRACE(20, ("__kmp_join_call: exit T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: exit T#%d\n"
, gtid); }
;
2711}
2712
2713/* Check whether we should push an internal control record onto the
2714 serial team stack. If so, do it. */
2715void __kmp_save_internal_controls(kmp_info_t *thread) {
2716
2717 if (thread->th.th_team != thread->th.th_serial_team) {
2718 return;
2719 }
2720 if (thread->th.th_team->t.t_serialized > 1) {
2721 int push = 0;
2722
2723 if (thread->th.th_team->t.t_control_stack_top == NULL__null) {
2724 push = 1;
2725 } else {
2726 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2727 thread->th.th_team->t.t_serialized) {
2728 push = 1;
2729 }
2730 }
2731 if (push) { /* push a record on the serial team's stack */
2732 kmp_internal_control_t *control =
2733 (kmp_internal_control_t *)__kmp_allocate(___kmp_allocate((sizeof(kmp_internal_control_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 2734)
2734 sizeof(kmp_internal_control_t))___kmp_allocate((sizeof(kmp_internal_control_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 2734)
;
2735
2736 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2737
2738 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2739
2740 control->next = thread->th.th_team->t.t_control_stack_top;
2741 thread->th.th_team->t.t_control_stack_top = control;
2742 }
2743 }
2744}
2745
2746/* Changes set_nproc */
2747void __kmp_set_num_threads(int new_nth, int gtid) {
2748 kmp_info_t *thread;
2749 kmp_root_t *root;
2750
2751 KF_TRACE(10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_num_threads: new __kmp_nth = %d\n"
, new_nth); }
;
2752 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 2752); }
;
2753
2754 if (new_nth < 1)
2755 new_nth = 1;
2756 else if (new_nth > __kmp_max_nth)
2757 new_nth = __kmp_max_nth;
2758
2759 KMP_COUNT_VALUE(OMP_set_numthreads, new_nth)((void)0);
2760 thread = __kmp_threads[gtid];
2761 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2762 return; // nothing to do
2763
2764 __kmp_save_internal_controls(thread);
2765
2766 set__nproc(thread, new_nth)(((thread)->th.th_current_task->td_icvs.nproc) = (new_nth
))
;
2767
2768 // If this omp_set_num_threads() call will cause the hot team size to be
2769 // reduced (in the absence of a num_threads clause), then reduce it now,
2770 // rather than waiting for the next parallel region.
2771 root = thread->th.th_root;
2772 if (__kmp_init_parallel && (!root->r.r_active) &&
2773 (root->r.r_hot_team->t.t_nproc > new_nth)
2774#if KMP_NESTED_HOT_TEAMS1
2775 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2776#endif
2777 ) {
2778 kmp_team_t *hot_team = root->r.r_hot_team;
2779 int f;
2780
2781 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2782
2783 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2784 __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
2785 }
2786 // Release the extra threads we don't need any more.
2787 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2788 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL)if (!(hot_team->t.t_threads[f] != __null)) { __kmp_debug_assert
("hot_team->t.t_threads[f] != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 2788); }
;
2789 if (__kmp_tasking_mode != tskm_immediate_exec) {
2790 // When decreasing team size, threads no longer in the team should unref
2791 // task team.
2792 hot_team->t.t_threads[f]->th.th_task_team = NULL__null;
2793 }
2794 __kmp_free_thread(hot_team->t.t_threads[f]);
2795 hot_team->t.t_threads[f] = NULL__null;
2796 }
2797 hot_team->t.t_nproc = new_nth;
2798#if KMP_NESTED_HOT_TEAMS1
2799 if (thread->th.th_hot_teams) {
2800 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team)if (!(hot_team == thread->th.th_hot_teams[0].hot_team)) { __kmp_debug_assert
("hot_team == thread->th.th_hot_teams[0].hot_team", "openmp/runtime/src/kmp_runtime.cpp"
, 2800); }
;
2801 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2802 }
2803#endif
2804
2805 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2806 hot_team->t.b->update_num_threads(new_nth);
2807 __kmp_add_threads_to_team(hot_team, new_nth);
2808 }
2809
2810 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2811
2812 // Update the t_nproc field in the threads that are still active.
2813 for (f = 0; f < new_nth; f++) {
2814 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL)if (!(hot_team->t.t_threads[f] != __null)) { __kmp_debug_assert
("hot_team->t.t_threads[f] != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 2814); }
;
2815 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2816 }
2817 // Special flag in case omp_set_num_threads() call
2818 hot_team->t.t_size_changed = -1;
2819 }
2820}
2821
2822/* Changes max_active_levels */
2823void __kmp_set_max_active_levels(int gtid, int max_active_levels) {
2824 kmp_info_t *thread;
2825
2826 KF_TRACE(10, ("__kmp_set_max_active_levels: new max_active_levels for thread "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: new max_active_levels for thread "
"%d = (%d)\n", gtid, max_active_levels); }
2827 "%d = (%d)\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: new max_active_levels for thread "
"%d = (%d)\n", gtid, max_active_levels); }
2828 gtid, max_active_levels))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: new max_active_levels for thread "
"%d = (%d)\n", gtid, max_active_levels); }
;
2829 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 2829); }
;
2830
2831 // validate max_active_levels
2832 if (max_active_levels < 0) {
2833 KMP_WARNING(ActiveLevelsNegative, max_active_levels)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_ActiveLevelsNegative
, max_active_levels), __kmp_msg_null)
;
2834 // We ignore this call if the user has specified a negative value.
2835 // The current setting won't be changed. The last valid setting will be
2836 // used. A warning will be issued (if warnings are allowed as controlled by
2837 // the KMP_WARNINGS env var).
2838 KF_TRACE(10, ("__kmp_set_max_active_levels: the call is ignored: new "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: the call is ignored: new "
"max_active_levels for thread %d = (%d)\n", gtid, max_active_levels
); }
2839 "max_active_levels for thread %d = (%d)\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: the call is ignored: new "
"max_active_levels for thread %d = (%d)\n", gtid, max_active_levels
); }
2840 gtid, max_active_levels))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: the call is ignored: new "
"max_active_levels for thread %d = (%d)\n", gtid, max_active_levels
); }
;
2841 return;
2842 }
2843 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT2147483647) {
2844 // it's OK, the max_active_levels is within the valid range: [ 0;
2845 // KMP_MAX_ACTIVE_LEVELS_LIMIT ]
2846 // We allow a zero value. (implementation defined behavior)
2847 } else {
2848 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_ActiveLevelsExceedLimit
, max_active_levels, 2147483647), __kmp_msg_null)
2849 KMP_MAX_ACTIVE_LEVELS_LIMIT)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_ActiveLevelsExceedLimit
, max_active_levels, 2147483647), __kmp_msg_null)
;
2850 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT2147483647;
2851 // Current upper limit is MAX_INT. (implementation defined behavior)
2852 // If the input exceeds the upper limit, we correct the input to be the
2853 // upper limit. (implementation defined behavior)
2854 // Actually, the flow should never get here until we use MAX_INT limit.
2855 }
2856 KF_TRACE(10, ("__kmp_set_max_active_levels: after validation: new "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: after validation: new "
"max_active_levels for thread %d = (%d)\n", gtid, max_active_levels
); }
2857 "max_active_levels for thread %d = (%d)\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: after validation: new "
"max_active_levels for thread %d = (%d)\n", gtid, max_active_levels
); }
2858 gtid, max_active_levels))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: after validation: new "
"max_active_levels for thread %d = (%d)\n", gtid, max_active_levels
); }
;
2859
2860 thread = __kmp_threads[gtid];
2861
2862 __kmp_save_internal_controls(thread);
2863
2864 set__max_active_levels(thread, max_active_levels)(((thread)->th.th_current_task->td_icvs.max_active_levels
) = (max_active_levels))
;
2865}
2866
2867/* Gets max_active_levels */
2868int __kmp_get_max_active_levels(int gtid) {
2869 kmp_info_t *thread;
2870
2871 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d\n", gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_max_active_levels: thread %d\n"
, gtid); }
;
2872 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 2872); }
;
2873
2874 thread = __kmp_threads[gtid];
2875 KMP_DEBUG_ASSERT(thread->th.th_current_task)if (!(thread->th.th_current_task)) { __kmp_debug_assert("thread->th.th_current_task"
, "openmp/runtime/src/kmp_runtime.cpp", 2875); }
;
2876 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d, curtask=%p, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_max_active_levels: thread %d, curtask=%p, "
"curtask_maxaclevel=%d\n", gtid, thread->th.th_current_task
, thread->th.th_current_task->td_icvs.max_active_levels
); }
2877 "curtask_maxaclevel=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_max_active_levels: thread %d, curtask=%p, "
"curtask_maxaclevel=%d\n", gtid, thread->th.th_current_task
, thread->th.th_current_task->td_icvs.max_active_levels
); }
2878 gtid, thread->th.th_current_task,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_max_active_levels: thread %d, curtask=%p, "
"curtask_maxaclevel=%d\n", gtid, thread->th.th_current_task
, thread->th.th_current_task->td_icvs.max_active_levels
); }
2879 thread->th.th_current_task->td_icvs.max_active_levels))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_max_active_levels: thread %d, curtask=%p, "
"curtask_maxaclevel=%d\n", gtid, thread->th.th_current_task
, thread->th.th_current_task->td_icvs.max_active_levels
); }
;
2880 return thread->th.th_current_task->td_icvs.max_active_levels;
2881}
2882
2883// nteams-var per-device ICV
2884void __kmp_set_num_teams(int num_teams) {
2885 if (num_teams > 0)
2886 __kmp_nteams = num_teams;
2887}
2888int __kmp_get_max_teams(void) { return __kmp_nteams; }
2889// teams-thread-limit-var per-device ICV
2890void __kmp_set_teams_thread_limit(int limit) {
2891 if (limit > 0)
2892 __kmp_teams_thread_limit = limit;
2893}
2894int __kmp_get_teams_thread_limit(void) { return __kmp_teams_thread_limit; }
2895
2896KMP_BUILD_ASSERT(sizeof(kmp_sched_t) == sizeof(int))static_assert(sizeof(kmp_sched_t) == sizeof(int), "Build condition error"
)
;
2897KMP_BUILD_ASSERT(sizeof(enum sched_type) == sizeof(int))static_assert(sizeof(enum sched_type) == sizeof(int), "Build condition error"
)
;
2898
2899/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */
2900void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) {
2901 kmp_info_t *thread;
2902 kmp_sched_t orig_kind;
2903 // kmp_team_t *team;
2904
2905 KF_TRACE(10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n"
, gtid, (int)kind, chunk); }
2906 gtid, (int)kind, chunk))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n"
, gtid, (int)kind, chunk); }
;
2907 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 2907); }
;
2908
2909 // Check if the kind parameter is valid, correct if needed.
2910 // Valid parameters should fit in one of two intervals - standard or extended:
2911 // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper>
2912 // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103
2913 orig_kind = kind;
2914 kind = __kmp_sched_without_mods(kind);
2915
2916 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2917 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2918 // TODO: Hint needs attention in case we change the default schedule.
2919 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind)__kmp_msg_format(kmp_i18n_msg_ScheduleKindOutOfRange, kind),
2920 KMP_HNT(DefaultScheduleKindUsed, "static, no chunk")__kmp_msg_format(kmp_i18n_hnt_DefaultScheduleKindUsed, "static, no chunk"
)
,
2921 __kmp_msg_null);
2922 kind = kmp_sched_default;
2923 chunk = 0; // ignore chunk value in case of bad kind
2924 }
2925
2926 thread = __kmp_threads[gtid];
2927
2928 __kmp_save_internal_controls(thread);
2929
2930 if (kind < kmp_sched_upper_std) {
2931 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK1) {
2932 // differ static chunked vs. unchunked: chunk should be invalid to
2933 // indicate unchunked schedule (which is the default)
2934 thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static;
2935 } else {
2936 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2937 __kmp_sch_map[kind - kmp_sched_lower - 1];
2938 }
2939 } else {
2940 // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2941 // kmp_sched_lower - 2 ];
2942 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2943 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2944 kmp_sched_lower - 2];
2945 }
2946 __kmp_sched_apply_mods_intkind(
2947 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2948 if (kind == kmp_sched_auto || chunk < 1) {
2949 // ignore parameter chunk for schedule auto
2950 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK1;
2951 } else {
2952 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2953 }
2954}
2955
2956/* Gets def_sched_var ICV values */
2957void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk) {
2958 kmp_info_t *thread;
2959 enum sched_type th_type;
2960
2961 KF_TRACE(10, ("__kmp_get_schedule: thread %d\n", gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_schedule: thread %d\n"
, gtid); }
;
2962 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 2962); }
;
2963
2964 thread = __kmp_threads[gtid];
2965
2966 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2967 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)(enum sched_type)( (th_type) & ~(kmp_sch_modifier_nonmonotonic
| kmp_sch_modifier_monotonic))
) {
2968 case kmp_sch_static:
2969 case kmp_sch_static_greedy:
2970 case kmp_sch_static_balanced:
2971 *kind = kmp_sched_static;
2972 __kmp_sched_apply_mods_stdkind(kind, th_type);
2973 *chunk = 0; // chunk was not set, try to show this fact via zero value
2974 return;
2975 case kmp_sch_static_chunked:
2976 *kind = kmp_sched_static;
2977 break;
2978 case kmp_sch_dynamic_chunked:
2979 *kind = kmp_sched_dynamic;
2980 break;
2981 case kmp_sch_guided_chunked:
2982 case kmp_sch_guided_iterative_chunked:
2983 case kmp_sch_guided_analytical_chunked:
2984 *kind = kmp_sched_guided;
2985 break;
2986 case kmp_sch_auto:
2987 *kind = kmp_sched_auto;
2988 break;
2989 case kmp_sch_trapezoidal:
2990 *kind = kmp_sched_trapezoidal;
2991 break;
2992#if KMP_STATIC_STEAL_ENABLED1
2993 case kmp_sch_static_steal:
2994 *kind = kmp_sched_static_steal;
2995 break;
2996#endif
2997 default:
2998 KMP_FATAL(UnknownSchedulingType, th_type)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_UnknownSchedulingType
, th_type), __kmp_msg_null)
;
2999 }
3000
3001 __kmp_sched_apply_mods_stdkind(kind, th_type);
3002 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
3003}
3004
3005int __kmp_get_ancestor_thread_num(int gtid, int level) {
3006
3007 int ii, dd;
3008 kmp_team_t *team;
3009 kmp_info_t *thr;
3010
3011 KF_TRACE(10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_ancestor_thread_num: thread %d %d\n"
, gtid, level); }
;
3012 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 3012); }
;
3013
3014 // validate level
3015 if (level == 0)
3016 return 0;
3017 if (level < 0)
3018 return -1;
3019 thr = __kmp_threads[gtid];
3020 team = thr->th.th_team;
3021 ii = team->t.t_level;
3022 if (level > ii)
3023 return -1;
3024
3025 if (thr->th.th_teams_microtask) {
3026 // AC: we are in teams region where multiple nested teams have same level
3027 int tlevel = thr->th.th_teams_level; // the level of the teams construct
3028 if (level <=
3029 tlevel) { // otherwise usual algorithm works (will not touch the teams)
3030 KMP_DEBUG_ASSERT(ii >= tlevel)if (!(ii >= tlevel)) { __kmp_debug_assert("ii >= tlevel"
, "openmp/runtime/src/kmp_runtime.cpp", 3030); }
;
3031 // AC: As we need to pass by the teams league, we need to artificially
3032 // increase ii
3033 if (ii == tlevel) {
3034 ii += 2; // three teams have same level
3035 } else {
3036 ii++; // two teams have same level
3037 }
3038 }
3039 }
3040
3041 if (ii == level)
3042 return __kmp_tid_from_gtid(gtid);
3043
3044 dd = team->t.t_serialized;
3045 level++;
3046 while (ii > level) {
3047 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3048 }
3049 if ((team->t.t_serialized) && (!dd)) {
3050 team = team->t.t_parent;
3051 continue;
3052 }
3053 if (ii > level) {
3054 team = team->t.t_parent;
3055 dd = team->t.t_serialized;
3056 ii--;
3057 }
3058 }
3059
3060 return (dd > 1) ? (0) : (team->t.t_master_tid);
3061}
3062
3063int __kmp_get_team_size(int gtid, int level) {
3064
3065 int ii, dd;
3066 kmp_team_t *team;
3067 kmp_info_t *thr;
3068
3069 KF_TRACE(10, ("__kmp_get_team_size: thread %d %d\n", gtid, level))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_team_size: thread %d %d\n"
, gtid, level); }
;
3070 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 3070); }
;
3071
3072 // validate level
3073 if (level == 0)
3074 return 1;
3075 if (level < 0)
3076 return -1;
3077 thr = __kmp_threads[gtid];
3078 team = thr->th.th_team;
3079 ii = team->t.t_level;
3080 if (level > ii)
3081 return -1;
3082
3083 if (thr->th.th_teams_microtask) {
3084 // AC: we are in teams region where multiple nested teams have same level
3085 int tlevel = thr->th.th_teams_level; // the level of the teams construct
3086 if (level <=
3087 tlevel) { // otherwise usual algorithm works (will not touch the teams)
3088 KMP_DEBUG_ASSERT(ii >= tlevel)if (!(ii >= tlevel)) { __kmp_debug_assert("ii >= tlevel"
, "openmp/runtime/src/kmp_runtime.cpp", 3088); }
;
3089 // AC: As we need to pass by the teams league, we need to artificially
3090 // increase ii
3091 if (ii == tlevel) {
3092 ii += 2; // three teams have same level
3093 } else {
3094 ii++; // two teams have same level
3095 }
3096 }
3097 }
3098
3099 while (ii > level) {
3100 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3101 }
3102 if (team->t.t_serialized && (!dd)) {
3103 team = team->t.t_parent;
3104 continue;
3105 }
3106 if (ii > level) {
3107 team = team->t.t_parent;
3108 ii--;
3109 }
3110 }
3111
3112 return team->t.t_nproc;
3113}
3114
3115kmp_r_sched_t __kmp_get_schedule_global() {
3116 // This routine created because pairs (__kmp_sched, __kmp_chunk) and
3117 // (__kmp_static, __kmp_guided) may be changed by kmp_set_defaults
3118 // independently. So one can get the updated schedule here.
3119
3120 kmp_r_sched_t r_sched;
3121
3122 // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static,
3123 // __kmp_guided. __kmp_sched should keep original value, so that user can set
3124 // KMP_SCHEDULE multiple times, and thus have different run-time schedules in
3125 // different roots (even in OMP 2.5)
3126 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched)(enum sched_type)( (__kmp_sched) & ~(kmp_sch_modifier_nonmonotonic
| kmp_sch_modifier_monotonic))
;
3127 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched)((enum sched_type)( (__kmp_sched) & (kmp_sch_modifier_nonmonotonic
| kmp_sch_modifier_monotonic)))
;
3128 if (s == kmp_sch_static) {
3129 // replace STATIC with more detailed schedule (balanced or greedy)
3130 r_sched.r_sched_type = __kmp_static;
3131 } else if (s == kmp_sch_guided_chunked) {
3132 // replace GUIDED with more detailed schedule (iterative or analytical)
3133 r_sched.r_sched_type = __kmp_guided;
3134 } else { // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other
3135 r_sched.r_sched_type = __kmp_sched;
3136 }
3137 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers)(r_sched.r_sched_type = (enum sched_type)((kmp_int32)r_sched.
r_sched_type | (kmp_int32)sched_modifiers))
;
3138
3139 if (__kmp_chunk < KMP_DEFAULT_CHUNK1) {
3140 // __kmp_chunk may be wrong here (if it was not ever set)
3141 r_sched.chunk = KMP_DEFAULT_CHUNK1;
3142 } else {
3143 r_sched.chunk = __kmp_chunk;
3144 }
3145
3146 return r_sched;
3147}
3148
3149/* Allocate (realloc == FALSE) * or reallocate (realloc == TRUE)
3150 at least argc number of *t_argv entries for the requested team. */
3151static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, int realloc) {
3152
3153 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 3153); }
;
3154 if (!realloc || argc > team->t.t_max_argc) {
3155
3156 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: needed entries=%d, "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: needed entries=%d, "
"current entries=%d\n", team->t.t_id, argc, (realloc) ? team
->t.t_max_argc : 0); }
3157 "current entries=%d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: needed entries=%d, "
"current entries=%d\n", team->t.t_id, argc, (realloc) ? team
->t.t_max_argc : 0); }
3158 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: needed entries=%d, "
"current entries=%d\n", team->t.t_id, argc, (realloc) ? team
->t.t_max_argc : 0); }
;
3159 /* if previously allocated heap space for args, free them */
3160 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3161 __kmp_free((void *)team->t.t_argv)___kmp_free(((void *)team->t.t_argv), "openmp/runtime/src/kmp_runtime.cpp"
, 3161)
;
3162
3163 if (argc <= KMP_INLINE_ARGV_ENTRIES(int)((4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 *
sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) %
64)) / (sizeof(void *)))
) {
3164 /* use unused space in the cache line for arguments */
3165 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES(int)((4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 *
sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) %
64)) / (sizeof(void *)))
;
3166 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: inline allocate %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: inline allocate %d "
"argv entries\n", team->t.t_id, team->t.t_max_argc); }
3167 "argv entries\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: inline allocate %d "
"argv entries\n", team->t.t_id, team->t.t_max_argc); }
3168 team->t.t_id, team->t.t_max_argc))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: inline allocate %d "
"argv entries\n", team->t.t_id, team->t.t_max_argc); }
;
3169 team->t.t_argv = &team->t.t_inline_argv[0];
3170 if (__kmp_storage_map) {
3171 __kmp_print_storage_map_gtid(
3172 -1, &team->t.t_inline_argv[0],
3173 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES(int)((4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 *
sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) %
64)) / (sizeof(void *)))
],
3174 (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES(int)((4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 *
sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) %
64)) / (sizeof(void *)))
), "team_%d.t_inline_argv",
3175 team->t.t_id);
3176 }
3177 } else {
3178 /* allocate space for arguments in the heap */
3179 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES100 >> 1))
3180 ? KMP_MIN_MALLOC_ARGV_ENTRIES100
3181 : 2 * argc;
3182 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
"argv entries\n", team->t.t_id, team->t.t_max_argc); }
3183 "argv entries\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
"argv entries\n", team->t.t_id, team->t.t_max_argc); }
3184 team->t.t_id, team->t.t_max_argc))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
"argv entries\n", team->t.t_id, team->t.t_max_argc); }
;
3185 team->t.t_argv =
3186 (void **)__kmp_page_allocate(sizeof(void *) * team->t.t_max_argc)___kmp_page_allocate((sizeof(void *) * team->t.t_max_argc)
, "openmp/runtime/src/kmp_runtime.cpp", 3186)
;
3187 if (__kmp_storage_map) {
3188 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3189 &team->t.t_argv[team->t.t_max_argc],
3190 sizeof(void *) * team->t.t_max_argc,
3191 "team_%d.t_argv", team->t.t_id);
3192 }
3193 }
3194 }
3195}
3196
3197static void __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) {
3198 int i;
3199 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3200 team->t.t_threads =
3201 (kmp_info_t **)__kmp_allocate(sizeof(kmp_info_t *) * max_nth)___kmp_allocate((sizeof(kmp_info_t *) * max_nth), "openmp/runtime/src/kmp_runtime.cpp"
, 3201)
;
3202 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(___kmp_allocate((sizeof(dispatch_shared_info_t) * num_disp_buff
), "openmp/runtime/src/kmp_runtime.cpp", 3203)
3203 sizeof(dispatch_shared_info_t) * num_disp_buff)___kmp_allocate((sizeof(dispatch_shared_info_t) * num_disp_buff
), "openmp/runtime/src/kmp_runtime.cpp", 3203)
;
3204 team->t.t_dispatch =
3205 (kmp_disp_t *)__kmp_allocate(sizeof(kmp_disp_t) * max_nth)___kmp_allocate((sizeof(kmp_disp_t) * max_nth), "openmp/runtime/src/kmp_runtime.cpp"
, 3205)
;
3206 team->t.t_implicit_task_taskdata =
3207 (kmp_taskdata_t *)__kmp_allocate(sizeof(kmp_taskdata_t) * max_nth)___kmp_allocate((sizeof(kmp_taskdata_t) * max_nth), "openmp/runtime/src/kmp_runtime.cpp"
, 3207)
;
3208 team->t.t_max_nproc = max_nth;
3209
3210 /* setup dispatch buffers */
3211 for (i = 0; i < num_disp_buff; ++i) {
3212 team->t.t_disp_buffer[i].buffer_index = i;
3213 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3214 }
3215}
3216
3217static void __kmp_free_team_arrays(kmp_team_t *team) {
3218 /* Note: this does not free the threads in t_threads (__kmp_free_threads) */
3219 int i;
3220 for (i = 0; i < team->t.t_max_nproc; ++i) {
3221 if (team->t.t_dispatch[i].th_disp_buffer != NULL__null) {
3222 __kmp_free(team->t.t_dispatch[i].th_disp_buffer)___kmp_free((team->t.t_dispatch[i].th_disp_buffer), "openmp/runtime/src/kmp_runtime.cpp"
, 3222)
;
3223 team->t.t_dispatch[i].th_disp_buffer = NULL__null;
3224 }
3225 }
3226#if KMP_USE_HIER_SCHED0
3227 __kmp_dispatch_free_hierarchies(team);
3228#endif
3229 __kmp_free(team->t.t_threads)___kmp_free((team->t.t_threads), "openmp/runtime/src/kmp_runtime.cpp"
, 3229)
;
3230 __kmp_free(team->t.t_disp_buffer)___kmp_free((team->t.t_disp_buffer), "openmp/runtime/src/kmp_runtime.cpp"
, 3230)
;
3231 __kmp_free(team->t.t_dispatch)___kmp_free((team->t.t_dispatch), "openmp/runtime/src/kmp_runtime.cpp"
, 3231)
;
3232 __kmp_free(team->t.t_implicit_task_taskdata)___kmp_free((team->t.t_implicit_task_taskdata), "openmp/runtime/src/kmp_runtime.cpp"
, 3232)
;
3233 team->t.t_threads = NULL__null;
3234 team->t.t_disp_buffer = NULL__null;
3235 team->t.t_dispatch = NULL__null;
3236 team->t.t_implicit_task_taskdata = 0;
3237}
3238
3239static void __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) {
3240 kmp_info_t **oldThreads = team->t.t_threads;
3241
3242 __kmp_free(team->t.t_disp_buffer)___kmp_free((team->t.t_disp_buffer), "openmp/runtime/src/kmp_runtime.cpp"
, 3242)
;
3243 __kmp_free(team->t.t_dispatch)___kmp_free((team->t.t_dispatch), "openmp/runtime/src/kmp_runtime.cpp"
, 3243)
;
3244 __kmp_free(team->t.t_implicit_task_taskdata)___kmp_free((team->t.t_implicit_task_taskdata), "openmp/runtime/src/kmp_runtime.cpp"
, 3244)
;
3245 __kmp_allocate_team_arrays(team, max_nth);
3246
3247 KMP_MEMCPYmemcpy(team->t.t_threads, oldThreads,
3248 team->t.t_nproc * sizeof(kmp_info_t *));
3249
3250 __kmp_free(oldThreads)___kmp_free((oldThreads), "openmp/runtime/src/kmp_runtime.cpp"
, 3250)
;
3251}
3252
3253static kmp_internal_control_t __kmp_get_global_icvs(void) {
3254
3255 kmp_r_sched_t r_sched =
3256 __kmp_get_schedule_global(); // get current state of scheduling globals
3257
3258 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0)if (!(__kmp_nested_proc_bind.used > 0)) { __kmp_debug_assert
("__kmp_nested_proc_bind.used > 0", "openmp/runtime/src/kmp_runtime.cpp"
, 3258); }
;
3259
3260 kmp_internal_control_t g_icvs = {
3261 0, // int serial_nesting_level; //corresponds to value of th_team_serialized
3262 (kmp_int8)__kmp_global.g.g_dynamic, // internal control for dynamic
3263 // adjustment of threads (per thread)
3264 (kmp_int8)__kmp_env_blocktime, // int bt_set; //internal control for
3265 // whether blocktime is explicitly set
3266 __kmp_dflt_blocktime, // int blocktime; //internal control for blocktime
3267#if KMP_USE_MONITOR
3268 __kmp_bt_intervals, // int bt_intervals; //internal control for blocktime
3269// intervals
3270#endif
3271 __kmp_dflt_team_nth, // int nproc; //internal control for # of threads for
3272 // next parallel region (per thread)
3273 // (use a max ub on value if __kmp_parallel_initialize not called yet)
3274 __kmp_cg_max_nth, // int thread_limit;
3275 __kmp_dflt_max_active_levels, // int max_active_levels; //internal control
3276 // for max_active_levels
3277 r_sched, // kmp_r_sched_t sched; //internal control for runtime schedule
3278 // {sched,chunk} pair
3279 __kmp_nested_proc_bind.bind_types[0],
3280 __kmp_default_device,
3281 NULL__null // struct kmp_internal_control *next;
3282 };
3283
3284 return g_icvs;
3285}
3286
3287static kmp_internal_control_t __kmp_get_x_global_icvs(const kmp_team_t *team) {
3288
3289 kmp_internal_control_t gx_icvs;
3290 gx_icvs.serial_nesting_level =
3291 0; // probably =team->t.t_serial like in save_inter_controls
3292 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3293 gx_icvs.next = NULL__null;
3294
3295 return gx_icvs;
3296}
3297
3298static void __kmp_initialize_root(kmp_root_t *root) {
3299 int f;
3300 kmp_team_t *root_team;
3301 kmp_team_t *hot_team;
3302 int hot_team_max_nth;
3303 kmp_r_sched_t r_sched =
3304 __kmp_get_schedule_global(); // get current state of scheduling globals
3305 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3306 KMP_DEBUG_ASSERT(root)if (!(root)) { __kmp_debug_assert("root", "openmp/runtime/src/kmp_runtime.cpp"
, 3306); }
;
3307 KMP_ASSERT(!root->r.r_begin)if (!(!root->r.r_begin)) { __kmp_debug_assert("!root->r.r_begin"
, "openmp/runtime/src/kmp_runtime.cpp", 3307); }
;
3308
3309 /* setup the root state structure */
3310 __kmp_init_lock(&root->r.r_begin_lock);
3311 root->r.r_begin = FALSE0;
3312 root->r.r_active = FALSE0;
3313 root->r.r_in_parallel = 0;
3314 root->r.r_blocktime = __kmp_dflt_blocktime;
3315#if KMP_AFFINITY_SUPPORTED1
3316 root->r.r_affinity_assigned = FALSE0;
3317#endif
3318
3319 /* setup the root team for this task */
3320 /* allocate the root team structure */
3321 KF_TRACE(10, ("__kmp_initialize_root: before root_team\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_root: before root_team\n"
); }
;
3322
3323 root_team =
3324 __kmp_allocate_team(root,
3325 1, // new_nproc
3326 1, // max_nproc
3327#if OMPT_SUPPORT1
3328 ompt_data_none{0}, // root parallel id
3329#endif
3330 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3331 0 // argc
3332 USE_NESTED_HOT_ARG(NULL), __null // primary thread is unknown
3333 );
3334#if USE_DEBUGGER0
3335 // Non-NULL value should be assigned to make the debugger display the root
3336 // team.
3337 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0))((root_team->t.t_pkfn)) = (((microtask_t)(~0)));
3338#endif
3339
3340 KF_TRACE(10, ("__kmp_initialize_root: after root_team = %p\n", root_team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_root: after root_team = %p\n"
, root_team); }
;
3341
3342 root->r.r_root_team = root_team;
3343 root_team->t.t_control_stack_top = NULL__null;
3344
3345 /* initialize root team */
3346 root_team->t.t_threads[0] = NULL__null;
3347 root_team->t.t_nproc = 1;
3348 root_team->t.t_serialized = 1;
3349 // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3350 root_team->t.t_sched.sched = r_sched.sched;
3351 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n"
, root_team->t.t_id, 0, 0); }
3352 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n"
, root_team->t.t_id, 0, 0); }
3353 ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n"
, root_team->t.t_id, 0, 0); }
3354 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n"
, root_team->t.t_id, 0, 0); }
;
3355
3356 /* setup the hot team for this task */
3357 /* allocate the hot team structure */
3358 KF_TRACE(10, ("__kmp_initialize_root: before hot_team\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_root: before hot_team\n"
); }
;
3359
3360 hot_team =
3361 __kmp_allocate_team(root,
3362 1, // new_nproc
3363 __kmp_dflt_team_nth_ub * 2, // max_nproc
3364#if OMPT_SUPPORT1
3365 ompt_data_none{0}, // root parallel id
3366#endif
3367 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3368 0 // argc
3369 USE_NESTED_HOT_ARG(NULL), __null // primary thread is unknown
3370 );
3371 KF_TRACE(10, ("__kmp_initialize_root: after hot_team = %p\n", hot_team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_root: after hot_team = %p\n"
, hot_team); }
;
3372
3373 root->r.r_hot_team = hot_team;
3374 root_team->t.t_control_stack_top = NULL__null;
3375
3376 /* first-time initialization */
3377 hot_team->t.t_parent = root_team;
3378
3379 /* initialize hot team */
3380 hot_team_max_nth = hot_team->t.t_max_nproc;
3381 for (f = 0; f < hot_team_max_nth; ++f) {
3382 hot_team->t.t_threads[f] = NULL__null;
3383 }
3384 hot_team->t.t_nproc = 1;
3385 // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3386 hot_team->t.t_sched.sched = r_sched.sched;
3387 hot_team->t.t_size_changed = 0;
3388}
3389
3390#ifdef KMP_DEBUG1
3391
3392typedef struct kmp_team_list_item {
3393 kmp_team_p const *entry;
3394 struct kmp_team_list_item *next;
3395} kmp_team_list_item_t;
3396typedef kmp_team_list_item_t *kmp_team_list_t;
3397
3398static void __kmp_print_structure_team_accum( // Add team to list of teams.
3399 kmp_team_list_t list, // List of teams.
3400 kmp_team_p const *team // Team to add.
3401) {
3402
3403 // List must terminate with item where both entry and next are NULL.
3404 // Team is added to the list only once.
3405 // List is sorted in ascending order by team id.
3406 // Team id is *not* a key.
3407
3408 kmp_team_list_t l;
3409
3410 KMP_DEBUG_ASSERT(list != NULL)if (!(list != __null)) { __kmp_debug_assert("list != __null",
"openmp/runtime/src/kmp_runtime.cpp", 3410); }
;
3411 if (team == NULL__null) {
3412 return;
3413 }
3414
3415 __kmp_print_structure_team_accum(list, team->t.t_parent);
3416 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3417
3418 // Search list for the team.
3419 l = list;
3420 while (l->next != NULL__null && l->entry != team) {
3421 l = l->next;
3422 }
3423 if (l->next != NULL__null) {
3424 return; // Team has been added before, exit.
3425 }
3426
3427 // Team is not found. Search list again for insertion point.
3428 l = list;
3429 while (l->next != NULL__null && l->entry->t.t_id <= team->t.t_id) {
3430 l = l->next;
3431 }
3432
3433 // Insert team.
3434 {
3435 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(malloc(sizeof(kmp_team_list_item_t))
3436 sizeof(kmp_team_list_item_t))malloc(sizeof(kmp_team_list_item_t));
3437 *item = *l;
3438 l->entry = team;
3439 l->next = item;
3440 }
3441}
3442
3443static void __kmp_print_structure_team(char const *title, kmp_team_p const *team
3444
3445) {
3446 __kmp_printf("%s", title);
3447 if (team != NULL__null) {
3448 __kmp_printf("%2x %p\n", team->t.t_id, team);
3449 } else {
3450 __kmp_printf(" - (nil)\n");
3451 }
3452}
3453
3454static void __kmp_print_structure_thread(char const *title,
3455 kmp_info_p const *thread) {
3456 __kmp_printf("%s", title);
3457 if (thread != NULL__null) {
3458 __kmp_printf("%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3459 } else {
3460 __kmp_printf(" - (nil)\n");
3461 }
3462}
3463
3464void __kmp_print_structure(void) {
3465
3466 kmp_team_list_t list;
3467
3468 // Initialize list of teams.
3469 list =
3470 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(sizeof(kmp_team_list_item_t))malloc(sizeof(kmp_team_list_item_t));
3471 list->entry = NULL__null;
3472 list->next = NULL__null;
3473
3474 __kmp_printf("\n------------------------------\nGlobal Thread "
3475 "Table\n------------------------------\n");
3476 {
3477 int gtid;
3478 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3479 __kmp_printf("%2d", gtid);
3480 if (__kmp_threads != NULL__null) {
3481 __kmp_printf(" %p", __kmp_threads[gtid]);
3482 }
3483 if (__kmp_root != NULL__null) {
3484 __kmp_printf(" %p", __kmp_root[gtid]);
3485 }
3486 __kmp_printf("\n");
3487 }
3488 }
3489
3490 // Print out __kmp_threads array.
3491 __kmp_printf("\n------------------------------\nThreads\n--------------------"
3492 "----------\n");
3493 if (__kmp_threads != NULL__null) {
3494 int gtid;
3495 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3496 kmp_info_t const *thread = __kmp_threads[gtid];
3497 if (thread != NULL__null) {
3498 __kmp_printf("GTID %2d %p:\n", gtid, thread);
3499 __kmp_printf(" Our Root: %p\n", thread->th.th_root);
3500 __kmp_print_structure_team(" Our Team: ", thread->th.th_team);
3501 __kmp_print_structure_team(" Serial Team: ",
3502 thread->th.th_serial_team);
3503 __kmp_printf(" Threads: %2d\n", thread->th.th_team_nproc);
3504 __kmp_print_structure_thread(" Primary: ",
3505 thread->th.th_team_master);
3506 __kmp_printf(" Serialized?: %2d\n", thread->th.th_team_serialized);
3507 __kmp_printf(" Set NProc: %2d\n", thread->th.th_set_nproc);
3508 __kmp_printf(" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3509 __kmp_print_structure_thread(" Next in pool: ",
3510 thread->th.th_next_pool);
3511 __kmp_printf("\n");
3512 __kmp_print_structure_team_accum(list, thread->th.th_team);
3513 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3514 }
3515 }
3516 } else {
3517 __kmp_printf("Threads array is not allocated.\n");
3518 }
3519
3520 // Print out __kmp_root array.
3521 __kmp_printf("\n------------------------------\nUbers\n----------------------"
3522 "--------\n");
3523 if (__kmp_root != NULL__null) {
3524 int gtid;
3525 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3526 kmp_root_t const *root = __kmp_root[gtid];
3527 if (root != NULL__null) {
3528 __kmp_printf("GTID %2d %p:\n", gtid, root);
3529 __kmp_print_structure_team(" Root Team: ", root->r.r_root_team);
3530 __kmp_print_structure_team(" Hot Team: ", root->r.r_hot_team);
3531 __kmp_print_structure_thread(" Uber Thread: ",
3532 root->r.r_uber_thread);
3533 __kmp_printf(" Active?: %2d\n", root->r.r_active);
3534 __kmp_printf(" In Parallel: %2d\n",
3535 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel)(&root->r.r_in_parallel)->load(std::memory_order_relaxed
)
);
3536 __kmp_printf("\n");
3537 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3538 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3539 }
3540 }
3541 } else {
3542 __kmp_printf("Ubers array is not allocated.\n");
3543 }
3544
3545 __kmp_printf("\n------------------------------\nTeams\n----------------------"
3546 "--------\n");
3547 while (list->next != NULL__null) {
3548 kmp_team_p const *team = list->entry;
3549 int i;
3550 __kmp_printf("Team %2x %p:\n", team->t.t_id, team);
3551 __kmp_print_structure_team(" Parent Team: ", team->t.t_parent);
3552 __kmp_printf(" Primary TID: %2d\n", team->t.t_master_tid);
3553 __kmp_printf(" Max threads: %2d\n", team->t.t_max_nproc);
3554 __kmp_printf(" Levels of serial: %2d\n", team->t.t_serialized);
3555 __kmp_printf(" Number threads: %2d\n", team->t.t_nproc);
3556 for (i = 0; i < team->t.t_nproc; ++i) {
3557 __kmp_printf(" Thread %2d: ", i);
3558 __kmp_print_structure_thread("", team->t.t_threads[i]);
3559 }
3560 __kmp_print_structure_team(" Next in pool: ", team->t.t_next_pool);
3561 __kmp_printf("\n");
3562 list = list->next;
3563 }
3564
3565 // Print out __kmp_thread_pool and __kmp_team_pool.
3566 __kmp_printf("\n------------------------------\nPools\n----------------------"
3567 "--------\n");
3568 __kmp_print_structure_thread("Thread pool: ",
3569 CCAST(kmp_info_t *, __kmp_thread_pool)const_cast<kmp_info_t *>(__kmp_thread_pool));
3570 __kmp_print_structure_team("Team pool: ",
3571 CCAST(kmp_team_t *, __kmp_team_pool)const_cast<kmp_team_t *>(__kmp_team_pool));
3572 __kmp_printf("\n");
3573
3574 // Free team list.
3575 while (list != NULL__null) {
3576 kmp_team_list_item_t *item = list;
3577 list = list->next;
3578 KMP_INTERNAL_FREE(item)free(item);
3579 }
3580}
3581
3582#endif
3583
3584//---------------------------------------------------------------------------
3585// Stuff for per-thread fast random number generator
3586// Table of primes
3587static const unsigned __kmp_primes[] = {
3588 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3589 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3590 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3591 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3592 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3593 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3594 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3595 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3596 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3597 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3598 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3599
3600//---------------------------------------------------------------------------
3601// __kmp_get_random: Get a random number using a linear congruential method.
3602unsigned short __kmp_get_random(kmp_info_t *thread) {
3603 unsigned x = thread->th.th_x;
3604 unsigned short r = (unsigned short)(x >> 16);
3605
3606 thread->th.th_x = x * thread->th.th_a + 1;
3607
3608 KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n",if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_get_random: THREAD: %d, RETURN: %u\n"
, thread->th.th_info.ds.ds_tid, r); }
3609 thread->th.th_info.ds.ds_tid, r))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_get_random: THREAD: %d, RETURN: %u\n"
, thread->th.th_info.ds.ds_tid, r); }
;
3610
3611 return r;
3612}
3613//--------------------------------------------------------
3614// __kmp_init_random: Initialize a random number generator
3615void __kmp_init_random(kmp_info_t *thread) {
3616 unsigned seed = thread->th.th_info.ds.ds_tid;
3617
3618 thread->th.th_a =
3619 __kmp_primes[seed % (sizeof(__kmp_primes) / sizeof(__kmp_primes[0]))];
3620 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3621 KA_TRACE(30,if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_init_random: THREAD: %u; A: %u\n"
, seed, thread->th.th_a); }
3622 ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_init_random: THREAD: %u; A: %u\n"
, seed, thread->th.th_a); }
;
3623}
3624
3625#if KMP_OS_WINDOWS0
3626/* reclaim array entries for root threads that are already dead, returns number
3627 * reclaimed */
3628static int __kmp_reclaim_dead_roots(void) {
3629 int i, r = 0;
3630
3631 for (i = 0; i < __kmp_threads_capacity; ++i) {
3632 if (KMP_UBER_GTID(i) &&
3633 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])((void *)(__kmp_threads[i]))) &&
3634 !__kmp_root[i]
3635 ->r.r_active) { // AC: reclaim only roots died in non-active state
3636 r += __kmp_unregister_root_other_thread(i);
3637 }
3638 }
3639 return r;
3640}
3641#endif
3642
3643/* This function attempts to create free entries in __kmp_threads and
3644 __kmp_root, and returns the number of free entries generated.
3645
3646 For Windows* OS static library, the first mechanism used is to reclaim array
3647 entries for root threads that are already dead.
3648
3649 On all platforms, expansion is attempted on the arrays __kmp_threads_ and
3650 __kmp_root, with appropriate update to __kmp_threads_capacity. Array
3651 capacity is increased by doubling with clipping to __kmp_tp_capacity, if
3652 threadprivate cache array has been created. Synchronization with
3653 __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock.
3654
3655 After any dead root reclamation, if the clipping value allows array expansion
3656 to result in the generation of a total of nNeed free slots, the function does
3657 that expansion. If not, nothing is done beyond the possible initial root
3658 thread reclamation.
3659
3660 If any argument is negative, the behavior is undefined. */
3661static int __kmp_expand_threads(int nNeed) {
3662 int added = 0;
3663 int minimumRequiredCapacity;
3664 int newCapacity;
3665 kmp_info_t **newThreads;
3666 kmp_root_t **newRoot;
3667
3668 // All calls to __kmp_expand_threads should be under __kmp_forkjoin_lock, so
3669 // resizing __kmp_threads does not need additional protection if foreign
3670 // threads are present
3671
3672#if KMP_OS_WINDOWS0 && !KMP_DYNAMIC_LIB1
3673 /* only for Windows static library */
3674 /* reclaim array entries for root threads that are already dead */
3675 added = __kmp_reclaim_dead_roots();
3676
3677 if (nNeed) {
3678 nNeed -= added;
3679 if (nNeed < 0)
3680 nNeed = 0;
3681 }
3682#endif
3683 if (nNeed <= 0)
3684 return added;
3685
3686 // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. If
3687 // __kmp_max_nth is set to some value less than __kmp_sys_max_nth by the
3688 // user via KMP_DEVICE_THREAD_LIMIT, then __kmp_threads_capacity may become
3689 // > __kmp_max_nth in one of two ways:
3690 //
3691 // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0]
3692 // may not be reused by another thread, so we may need to increase
3693 // __kmp_threads_capacity to __kmp_max_nth + 1.
3694 //
3695 // 2) New foreign root(s) are encountered. We always register new foreign
3696 // roots. This may cause a smaller # of threads to be allocated at
3697 // subsequent parallel regions, but the worker threads hang around (and
3698 // eventually go to sleep) and need slots in the __kmp_threads[] array.
3699 //
3700 // Anyway, that is the reason for moving the check to see if
3701 // __kmp_max_nth was exceeded into __kmp_reserve_threads()
3702 // instead of having it performed here. -BB
3703
3704 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity)if (!(__kmp_sys_max_nth >= __kmp_threads_capacity)) { __kmp_debug_assert
("__kmp_sys_max_nth >= __kmp_threads_capacity", "openmp/runtime/src/kmp_runtime.cpp"
, 3704); }
;
3705
3706 /* compute expansion headroom to check if we can expand */
3707 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3708 /* possible expansion too small -- give up */
3709 return added;
3710 }
3711 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3712
3713 newCapacity = __kmp_threads_capacity;
3714 do {
3715 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3716 : __kmp_sys_max_nth;
3717 } while (newCapacity < minimumRequiredCapacity);
3718 newThreads = (kmp_info_t **)__kmp_allocate(___kmp_allocate(((sizeof(kmp_info_t *) + sizeof(kmp_root_t *)
) * newCapacity + 64), "openmp/runtime/src/kmp_runtime.cpp", 3719
)
3719 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE)___kmp_allocate(((sizeof(kmp_info_t *) + sizeof(kmp_root_t *)
) * newCapacity + 64), "openmp/runtime/src/kmp_runtime.cpp", 3719
)
;
3720 newRoot =
3721 (kmp_root_t **)((char *)newThreads + sizeof(kmp_info_t *) * newCapacity);
3722 KMP_MEMCPYmemcpy(newThreads, __kmp_threads,
3723 __kmp_threads_capacity * sizeof(kmp_info_t *));
3724 KMP_MEMCPYmemcpy(newRoot, __kmp_root,
3725 __kmp_threads_capacity * sizeof(kmp_root_t *));
3726 // Put old __kmp_threads array on a list. Any ongoing references to the old
3727 // list will be valid. This list is cleaned up at library shutdown.
3728 kmp_old_threads_list_t *node =
3729 (kmp_old_threads_list_t *)__kmp_allocate(sizeof(kmp_old_threads_list_t))___kmp_allocate((sizeof(kmp_old_threads_list_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 3729)
;
3730 node->threads = __kmp_threads;
3731 node->next = __kmp_old_threads_list;
3732 __kmp_old_threads_list = node;
3733
3734 *(kmp_info_t * *volatile *)&__kmp_threads = newThreads;
3735 *(kmp_root_t * *volatile *)&__kmp_root = newRoot;
3736 added += newCapacity - __kmp_threads_capacity;
3737 *(volatile int *)&__kmp_threads_capacity = newCapacity;
3738
3739 if (newCapacity > __kmp_tp_capacity) {
3740 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3741 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3742 __kmp_threadprivate_resize_cache(newCapacity);
3743 } else { // increase __kmp_tp_capacity to correspond with kmp_threads size
3744 *(volatile int *)&__kmp_tp_capacity = newCapacity;
3745 }
3746 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3747 }
3748
3749 return added;
3750}
3751
3752/* Register the current thread as a root thread and obtain our gtid. We must
3753 have the __kmp_initz_lock held at this point. Argument TRUE only if are the
3754 thread that calls from __kmp_do_serial_initialize() */
3755int __kmp_register_root(int initial_thread) {
3756 kmp_info_t *root_thread;
3757 kmp_root_t *root;
3758 int gtid;
3759 int capacity;
3760 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3761 KA_TRACE(20, ("__kmp_register_root: entered\n"))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_register_root: entered\n"
); }
;
3762 KMP_MB();
3763
3764 /* 2007-03-02:
3765 If initial thread did not invoke OpenMP RTL yet, and this thread is not an
3766 initial one, "__kmp_all_nth >= __kmp_threads_capacity" condition does not
3767 work as expected -- it may return false (that means there is at least one
3768 empty slot in __kmp_threads array), but it is possible the only free slot
3769 is #0, which is reserved for initial thread and so cannot be used for this
3770 one. Following code workarounds this bug.
3771
3772 However, right solution seems to be not reserving slot #0 for initial
3773 thread because:
3774 (1) there is no magic in slot #0,
3775 (2) we cannot detect initial thread reliably (the first thread which does
3776 serial initialization may be not a real initial thread).
3777 */
3778 capacity = __kmp_threads_capacity;
3779 if (!initial_thread && TCR_PTR(__kmp_threads[0])((void *)(__kmp_threads[0])) == NULL__null) {
3780 --capacity;
3781 }
3782
3783 // If it is not for initializing the hidden helper team, we need to take
3784 // __kmp_hidden_helper_threads_num out of the capacity because it is included
3785 // in __kmp_threads_capacity.
3786 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)(__kmp_init_hidden_helper_threads)) {
3787 capacity -= __kmp_hidden_helper_threads_num;
3788 }
3789
3790 /* see if there are too many threads */
3791 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3792 if (__kmp_tp_cached) {
3793 __kmp_fatal(KMP_MSG(CantRegisterNewThread)__kmp_msg_format(kmp_i18n_msg_CantRegisterNewThread),
3794 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity)__kmp_msg_format(kmp_i18n_hnt_Set_ALL_THREADPRIVATE, __kmp_tp_capacity
)
,
3795 KMP_HNT(PossibleSystemLimitOnThreads)__kmp_msg_format(kmp_i18n_hnt_PossibleSystemLimitOnThreads), __kmp_msg_null);
3796 } else {
3797 __kmp_fatal(KMP_MSG(CantRegisterNewThread)__kmp_msg_format(kmp_i18n_msg_CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads)__kmp_msg_format(kmp_i18n_hnt_SystemLimitOnThreads),
3798 __kmp_msg_null);
3799 }
3800 }
3801
3802 // When hidden helper task is enabled, __kmp_threads is organized as follows:
3803 // 0: initial thread, also a regular OpenMP thread.
3804 // [1, __kmp_hidden_helper_threads_num]: slots for hidden helper threads.
3805 // [__kmp_hidden_helper_threads_num + 1, __kmp_threads_capacity): slots for
3806 // regular OpenMP threads.
3807 if (TCR_4(__kmp_init_hidden_helper_threads)(__kmp_init_hidden_helper_threads)) {
3808 // Find an available thread slot for hidden helper thread. Slots for hidden
3809 // helper threads start from 1 to __kmp_hidden_helper_threads_num.
3810 for (gtid = 1; TCR_PTR(__kmp_threads[gtid])((void *)(__kmp_threads[gtid])) != NULL__null &&
3811 gtid <= __kmp_hidden_helper_threads_num;
3812 gtid++)
3813 ;
3814 KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num)if (!(gtid <= __kmp_hidden_helper_threads_num)) { __kmp_debug_assert
("gtid <= __kmp_hidden_helper_threads_num", "openmp/runtime/src/kmp_runtime.cpp"
, 3814); }
;
3815 KA_TRACE(1, ("__kmp_register_root: found slot in threads array for "if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_register_root: found slot in threads array for "
"hidden helper thread: T#%d\n", gtid); }
3816 "hidden helper thread: T#%d\n",if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_register_root: found slot in threads array for "
"hidden helper thread: T#%d\n", gtid); }
3817 gtid))if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_register_root: found slot in threads array for "
"hidden helper thread: T#%d\n", gtid); }
;
3818 } else {
3819 /* find an available thread slot */
3820 // Don't reassign the zero slot since we need that to only be used by
3821 // initial thread. Slots for hidden helper threads should also be skipped.
3822 if (initial_thread && TCR_PTR(__kmp_threads[0])((void *)(__kmp_threads[0])) == NULL__null) {
3823 gtid = 0;
3824 } else {
3825 for (gtid = __kmp_hidden_helper_threads_num + 1;
3826 TCR_PTR(__kmp_threads[gtid])((void *)(__kmp_threads[gtid])) != NULL__null; gtid++)
3827 ;
3828 }
3829 KA_TRACE(if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_register_root: found slot in threads array: T#%d\n"
, gtid); }
3830 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid))if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_register_root: found slot in threads array: T#%d\n"
, gtid); }
;
3831 KMP_ASSERT(gtid < __kmp_threads_capacity)if (!(gtid < __kmp_threads_capacity)) { __kmp_debug_assert
("gtid < __kmp_threads_capacity", "openmp/runtime/src/kmp_runtime.cpp"
, 3831); }
;
3832 }
3833
3834 /* update global accounting */
3835 __kmp_all_nth++;
3836 TCW_4(__kmp_nth, __kmp_nth + 1)(__kmp_nth) = (__kmp_nth + 1);
3837
3838 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
3839 // numbers of procs, and method #2 (keyed API call) for higher numbers.
3840 if (__kmp_adjust_gtid_mode) {
3841 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3842 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) != 2) {
3843 TCW_4(__kmp_gtid_mode, 2)(__kmp_gtid_mode) = (2);
3844 }
3845 } else {
3846 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) != 1) {
3847 TCW_4(__kmp_gtid_mode, 1)(__kmp_gtid_mode) = (1);
3848 }
3849 }
3850 }
3851
3852#ifdef KMP_ADJUST_BLOCKTIME1
3853 /* Adjust blocktime to zero if necessary */
3854 /* Middle initialization might not have occurred yet */
3855 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3856 if (__kmp_nth > __kmp_avail_proc) {
3857 __kmp_zero_bt = TRUE(!0);
3858 }
3859 }
3860#endif /* KMP_ADJUST_BLOCKTIME */
3861
3862 /* setup this new hierarchy */
3863 if (!(root = __kmp_root[gtid])) {
3864 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(sizeof(kmp_root_t))___kmp_allocate((sizeof(kmp_root_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 3864)
;
3865 KMP_DEBUG_ASSERT(!root->r.r_root_team)if (!(!root->r.r_root_team)) { __kmp_debug_assert("!root->r.r_root_team"
, "openmp/runtime/src/kmp_runtime.cpp", 3865); }
;
3866 }
3867
3868#if KMP_STATS_ENABLED0
3869 // Initialize stats as soon as possible (right after gtid assignment).
3870 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3871 __kmp_stats_thread_ptr->startLife();
3872 KMP_SET_THREAD_STATE(SERIAL_REGION)((void)0);
3873 KMP_INIT_PARTITIONED_TIMERS(OMP_serial)((void)0);
3874#endif
3875 __kmp_initialize_root(root);
3876
3877 /* setup new root thread structure */
3878 if (root->r.r_uber_thread) {
3879 root_thread = root->r.r_uber_thread;
3880 } else {
3881 root_thread = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t))___kmp_allocate((sizeof(kmp_info_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 3881)
;
3882 if (__kmp_storage_map) {
3883 __kmp_print_thread_storage_map(root_thread, gtid);
3884 }
3885 root_thread->th.th_info.ds.ds_gtid = gtid;
3886#if OMPT_SUPPORT1
3887 root_thread->th.ompt_thread_info.thread_data = ompt_data_none{0};
3888#endif
3889 root_thread->th.th_root = root;
3890 if (__kmp_env_consistency_check) {
3891 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3892 }
3893#if USE_FAST_MEMORY3
3894 __kmp_initialize_fast_memory(root_thread);
3895#endif /* USE_FAST_MEMORY */
3896
3897#if KMP_USE_BGET1
3898 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL)if (!(root_thread->th.th_local.bget_data == __null)) { __kmp_debug_assert
("root_thread->th.th_local.bget_data == __null", "openmp/runtime/src/kmp_runtime.cpp"
, 3898); }
;
3899 __kmp_initialize_bget(root_thread);
3900#endif
3901 __kmp_init_random(root_thread); // Initialize random number generator
3902 }
3903
3904 /* setup the serial team held in reserve by the root thread */
3905 if (!root_thread->th.th_serial_team) {
3906 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3907 KF_TRACE(10, ("__kmp_register_root: before serial_team\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_register_root: before serial_team\n"
); }
;
3908 root_thread->th.th_serial_team = __kmp_allocate_team(
3909 root, 1, 1,
3910#if OMPT_SUPPORT1
3911 ompt_data_none{0}, // root parallel id
3912#endif
3913 proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL), __null);
3914 }
3915 KMP_ASSERT(root_thread->th.th_serial_team)if (!(root_thread->th.th_serial_team)) { __kmp_debug_assert
("root_thread->th.th_serial_team", "openmp/runtime/src/kmp_runtime.cpp"
, 3915); }
;
3916 KF_TRACE(10, ("__kmp_register_root: after serial_team = %p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_register_root: after serial_team = %p\n"
, root_thread->th.th_serial_team); }
3917 root_thread->th.th_serial_team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_register_root: after serial_team = %p\n"
, root_thread->th.th_serial_team); }
;
3918
3919 /* drop root_thread into place */
3920 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread)((__kmp_threads[gtid])) = ((root_thread));
3921
3922 root->r.r_root_team->t.t_threads[0] = root_thread;
3923 root->r.r_hot_team->t.t_threads[0] = root_thread;
3924 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3925 // AC: the team created in reserve, not for execution (it is unused for now).
3926 root_thread->th.th_serial_team->t.t_serialized = 0;
3927 root->r.r_uber_thread = root_thread;
3928
3929 /* initialize the thread, get it ready to go */
3930 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3931 TCW_4(__kmp_init_gtid, TRUE)(__kmp_init_gtid) = ((!0));
3932
3933 /* prepare the primary thread for get_gtid() */
3934 __kmp_gtid_set_specific(gtid);
3935
3936#if USE_ITT_BUILD1
3937 __kmp_itt_thread_name(gtid);
3938#endif /* USE_ITT_BUILD */
3939
3940#ifdef KMP_TDATA_GTID1
3941 __kmp_gtid = gtid;
3942#endif
3943 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3944 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid)if (!(__kmp_gtid_get_specific() == gtid)) { __kmp_debug_assert
("__kmp_gtid_get_specific() == gtid", "openmp/runtime/src/kmp_runtime.cpp"
, 3944); }
;
3945
3946 KA_TRACE(20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
"plain=%u\n", gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team
), root->r.r_hot_team->t.t_id, 0, 0, 0); }
3947 "plain=%u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
"plain=%u\n", gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team
), root->r.r_hot_team->t.t_id, 0, 0, 0); }
3948 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
"plain=%u\n", gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team
), root->r.r_hot_team->t.t_id, 0, 0, 0); }
3949 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
"plain=%u\n", gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team
), root->r.r_hot_team->t.t_id, 0, 0, 0); }
3950 KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
"plain=%u\n", gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team
), root->r.r_hot_team->t.t_id, 0, 0, 0); }
;
3951 { // Initialize barrier data.
3952 int b;
3953 for (b = 0; b < bs_last_barrier; ++b) {
3954 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE0;
3955#if USE_DEBUGGER0
3956 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3957#endif
3958 }
3959 }
3960 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==if (!(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].
b_arrived == 0)) { __kmp_debug_assert("root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived == 0"
, "openmp/runtime/src/kmp_runtime.cpp", 3961); }
3961 KMP_INIT_BARRIER_STATE)if (!(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].
b_arrived == 0)) { __kmp_debug_assert("root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived == 0"
, "openmp/runtime/src/kmp_runtime.cpp", 3961); }
;
3962
3963#if KMP_AFFINITY_SUPPORTED1
3964 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED(-2);
3965 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED(-2);
3966 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED(-2);
3967 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED(-2);
3968#endif /* KMP_AFFINITY_SUPPORTED */
3969 root_thread->th.th_def_allocator = __kmp_def_allocator;
3970 root_thread->th.th_prev_level = 0;
3971 root_thread->th.th_prev_num_threads = 1;
3972
3973 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(sizeof(kmp_cg_root_t))___kmp_allocate((sizeof(kmp_cg_root_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 3973)
;
3974 tmp->cg_root = root_thread;
3975 tmp->cg_thread_limit = __kmp_cg_max_nth;
3976 tmp->cg_nthreads = 1;
3977 KA_TRACE(100, ("__kmp_register_root: Thread %p created node %p with"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_register_root: Thread %p created node %p with"
" cg_nthreads init to 1\n", root_thread, tmp); }
3978 " cg_nthreads init to 1\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_register_root: Thread %p created node %p with"
" cg_nthreads init to 1\n", root_thread, tmp); }
3979 root_thread, tmp))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_register_root: Thread %p created node %p with"
" cg_nthreads init to 1\n", root_thread, tmp); }
;
3980 tmp->up = NULL__null;
3981 root_thread->th.th_cg_roots = tmp;
3982
3983 __kmp_root_counter++;
3984
3985#if OMPT_SUPPORT1
3986 if (!initial_thread && ompt_enabled.enabled) {
3987
3988 kmp_info_t *root_thread = ompt_get_thread();
3989
3990 ompt_set_thread_state(root_thread, ompt_state_overhead);
3991
3992 if (ompt_enabled.ompt_callback_thread_begin) {
3993 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)ompt_callback_thread_begin_callback(
3994 ompt_thread_initial, __ompt_get_thread_data_internal());
3995 }
3996 ompt_data_t *task_data;
3997 ompt_data_t *parallel_data;
3998 __ompt_get_task_info_internal(0, NULL__null, &task_data, NULL__null, &parallel_data,
3999 NULL__null);
4000 if (ompt_enabled.ompt_callback_implicit_task) {
4001 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
4002 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
4003 }
4004
4005 ompt_set_thread_state(root_thread, ompt_state_work_serial);
4006 }
4007#endif
4008#if OMPD_SUPPORT1
4009 if (ompd_state & OMPD_ENABLE_BP0x1)
4010 ompd_bp_thread_begin();
4011#endif
4012
4013 KMP_MB();
4014 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4015
4016 return gtid;
4017}
4018
4019#if KMP_NESTED_HOT_TEAMS1
4020static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr, int level,
4021 const int max_level) {
4022 int i, n, nth;
4023 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
4024 if (!hot_teams || !hot_teams[level].hot_team) {
4025 return 0;
4026 }
4027 KMP_DEBUG_ASSERT(level < max_level)if (!(level < max_level)) { __kmp_debug_assert("level < max_level"
, "openmp/runtime/src/kmp_runtime.cpp", 4027); }
;
4028 kmp_team_t *team = hot_teams[level].hot_team;
4029 nth = hot_teams[level].hot_team_nth;
4030 n = nth - 1; // primary thread is not freed
4031 if (level < max_level - 1) {
4032 for (i = 0; i < nth; ++i) {
4033 kmp_info_t *th = team->t.t_threads[i];
4034 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
4035 if (i > 0 && th->th.th_hot_teams) {
4036 __kmp_free(th->th.th_hot_teams)___kmp_free((th->th.th_hot_teams), "openmp/runtime/src/kmp_runtime.cpp"
, 4036)
;
4037 th->th.th_hot_teams = NULL__null;
4038 }
4039 }
4040 }
4041 __kmp_free_team(root, team, NULL__null);
4042 return n;
4043}
4044#endif
4045
4046// Resets a root thread and clear its root and hot teams.
4047// Returns the number of __kmp_threads entries directly and indirectly freed.
4048static int __kmp_reset_root(int gtid, kmp_root_t *root) {
4049 kmp_team_t *root_team = root->r.r_root_team;
4050 kmp_team_t *hot_team = root->r.r_hot_team;
4051 int n = hot_team->t.t_nproc;
4052 int i;
4053
4054 KMP_DEBUG_ASSERT(!root->r.r_active)if (!(!root->r.r_active)) { __kmp_debug_assert("!root->r.r_active"
, "openmp/runtime/src/kmp_runtime.cpp", 4054); }
;
4055
4056 root->r.r_root_team = NULL__null;
4057 root->r.r_hot_team = NULL__null;
4058 // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team
4059 // before call to __kmp_free_team().
4060 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL), __null);
4061#if KMP_NESTED_HOT_TEAMS1
4062 if (__kmp_hot_teams_max_level >
4063 0) { // need to free nested hot teams and their threads if any
4064 for (i = 0; i < hot_team->t.t_nproc; ++i) {
4065 kmp_info_t *th = hot_team->t.t_threads[i];
4066 if (__kmp_hot_teams_max_level > 1) {
4067 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
4068 }
4069 if (th->th.th_hot_teams) {
4070 __kmp_free(th->th.th_hot_teams)___kmp_free((th->th.th_hot_teams), "openmp/runtime/src/kmp_runtime.cpp"
, 4070)
;
4071 th->th.th_hot_teams = NULL__null;
4072 }
4073 }
4074 }
4075#endif
4076 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL), __null);
4077
4078 // Before we can reap the thread, we need to make certain that all other
4079 // threads in the teams that had this root as ancestor have stopped trying to
4080 // steal tasks.
4081 if (__kmp_tasking_mode != tskm_immediate_exec) {
4082 __kmp_wait_to_unref_task_teams();
4083 }
4084
4085#if KMP_OS_WINDOWS0
4086 /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */
4087 KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_reset_root: free handle, th = %p, handle = %"
"lu" "\n", (LPVOID) & (root->r.r_uber_thread->th),
root->r.r_uber_thread->th.th_info.ds.ds_thread); }
4088 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPECif (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_reset_root: free handle, th = %p, handle = %"
"lu" "\n", (LPVOID) & (root->r.r_uber_thread->th),
root->r.r_uber_thread->th.th_info.ds.ds_thread); }
4089 "\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_reset_root: free handle, th = %p, handle = %"
"lu" "\n", (LPVOID) & (root->r.r_uber_thread->th),
root->r.r_uber_thread->th.th_info.ds.ds_thread); }
4090 (LPVOID) & (root->r.r_uber_thread->th),if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_reset_root: free handle, th = %p, handle = %"
"lu" "\n", (LPVOID) & (root->r.r_uber_thread->th),
root->r.r_uber_thread->th.th_info.ds.ds_thread); }
4091 root->r.r_uber_thread->th.th_info.ds.ds_thread))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_reset_root: free handle, th = %p, handle = %"
"lu" "\n", (LPVOID) & (root->r.r_uber_thread->th),
root->r.r_uber_thread->th.th_info.ds.ds_thread); }
;
4092 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
4093#endif /* KMP_OS_WINDOWS */
4094
4095#if OMPD_SUPPORT1
4096 if (ompd_state & OMPD_ENABLE_BP0x1)
4097 ompd_bp_thread_end();
4098#endif
4099
4100#if OMPT_SUPPORT1
4101 ompt_data_t *task_data;
4102 ompt_data_t *parallel_data;
4103 __ompt_get_task_info_internal(0, NULL__null, &task_data, NULL__null, &parallel_data,
4104 NULL__null);
4105 if (ompt_enabled.ompt_callback_implicit_task) {
4106 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
4107 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
4108 }
4109 if (ompt_enabled.ompt_callback_thread_end) {
4110 ompt_callbacks.ompt_callback(ompt_callback_thread_end)ompt_callback_thread_end_callback(
4111 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4112 }
4113#endif
4114
4115 TCW_4(__kmp_nth,(__kmp_nth) = (__kmp_nth - 1)
4116 __kmp_nth - 1)(__kmp_nth) = (__kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
4117 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4118 KA_TRACE(100, ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
" to %d\n", root->r.r_uber_thread, root->r.r_uber_thread
->th.th_cg_roots, root->r.r_uber_thread->th.th_cg_roots
->cg_nthreads); }
4119 " to %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
" to %d\n", root->r.r_uber_thread, root->r.r_uber_thread
->th.th_cg_roots, root->r.r_uber_thread->th.th_cg_roots
->cg_nthreads); }
4120 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
" to %d\n", root->r.r_uber_thread, root->r.r_uber_thread
->th.th_cg_roots, root->r.r_uber_thread->th.th_cg_roots
->cg_nthreads); }
4121 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
" to %d\n", root->r.r_uber_thread, root->r.r_uber_thread
->th.th_cg_roots, root->r.r_uber_thread->th.th_cg_roots
->cg_nthreads); }
;
4122 if (i == 1) {
4123 // need to free contention group structure
4124 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==if (!(root->r.r_uber_thread == root->r.r_uber_thread->
th.th_cg_roots->cg_root)) { __kmp_debug_assert("root->r.r_uber_thread == root->r.r_uber_thread->th.th_cg_roots->cg_root"
, "openmp/runtime/src/kmp_runtime.cpp", 4125); }
4125 root->r.r_uber_thread->th.th_cg_roots->cg_root)if (!(root->r.r_uber_thread == root->r.r_uber_thread->
th.th_cg_roots->cg_root)) { __kmp_debug_assert("root->r.r_uber_thread == root->r.r_uber_thread->th.th_cg_roots->cg_root"
, "openmp/runtime/src/kmp_runtime.cpp", 4125); }
;
4126 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL)if (!(root->r.r_uber_thread->th.th_cg_roots->up == __null
)) { __kmp_debug_assert("root->r.r_uber_thread->th.th_cg_roots->up == __null"
, "openmp/runtime/src/kmp_runtime.cpp", 4126); }
;
4127 __kmp_free(root->r.r_uber_thread->th.th_cg_roots)___kmp_free((root->r.r_uber_thread->th.th_cg_roots), "openmp/runtime/src/kmp_runtime.cpp"
, 4127)
;
4128 root->r.r_uber_thread->th.th_cg_roots = NULL__null;
4129 }
4130 __kmp_reap_thread(root->r.r_uber_thread, 1);
4131
4132 // We canot put root thread to __kmp_thread_pool, so we have to reap it
4133 // instead of freeing.
4134 root->r.r_uber_thread = NULL__null;
4135 /* mark root as no longer in use */
4136 root->r.r_begin = FALSE0;
4137
4138 return n;
4139}
4140
4141void __kmp_unregister_root_current_thread(int gtid) {
4142 KA_TRACE(1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid))if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_unregister_root_current_thread: enter T#%d\n"
, gtid); }
;
4143 /* this lock should be ok, since unregister_root_current_thread is never
4144 called during an abort, only during a normal close. furthermore, if you
4145 have the forkjoin lock, you should never try to get the initz lock */
4146 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
4147 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done) || !__kmp_init_serial) {
4148 KC_TRACE(10, ("__kmp_unregister_root_current_thread: already finished, "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_current_thread: already finished, "
"exiting T#%d\n", gtid); }
4149 "exiting T#%d\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_current_thread: already finished, "
"exiting T#%d\n", gtid); }
4150 gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_current_thread: already finished, "
"exiting T#%d\n", gtid); }
;
4151 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4152 return;
4153 }
4154 kmp_root_t *root = __kmp_root[gtid];
4155
4156 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid])if (!(__kmp_threads && __kmp_threads[gtid])) { __kmp_debug_assert
("__kmp_threads && __kmp_threads[gtid]", "openmp/runtime/src/kmp_runtime.cpp"
, 4156); }
;
4157 KMP_ASSERT(KMP_UBER_GTID(gtid))if (!(KMP_UBER_GTID(gtid))) { __kmp_debug_assert("KMP_UBER_GTID(gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 4157); }
;
4158 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root)if (!(root == __kmp_threads[gtid]->th.th_root)) { __kmp_debug_assert
("root == __kmp_threads[gtid]->th.th_root", "openmp/runtime/src/kmp_runtime.cpp"
, 4158); }
;
4159 KMP_ASSERT(root->r.r_active == FALSE)if (!(root->r.r_active == 0)) { __kmp_debug_assert("root->r.r_active == FALSE"
, "openmp/runtime/src/kmp_runtime.cpp", 4159); }
;
4160
4161 KMP_MB();
4162
4163 kmp_info_t *thread = __kmp_threads[gtid];
4164 kmp_team_t *team = thread->th.th_team;
4165 kmp_task_team_t *task_team = thread->th.th_task_team;
4166
4167 // we need to wait for the proxy tasks before finishing the thread
4168 if (task_team != NULL__null && (task_team->tt.tt_found_proxy_tasks ||
4169 task_team->tt.tt_hidden_helper_task_encountered)) {
4170#if OMPT_SUPPORT1
4171 // the runtime is shutting down so we won't report any events
4172 thread->th.ompt_thread_info.state = ompt_state_undefined;
4173#endif
4174 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL), __null);
4175 }
4176
4177 __kmp_reset_root(gtid, root);
4178
4179 KMP_MB();
4180 KC_TRACE(10,if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_current_thread: T#%d unregistered\n"
, gtid); }
4181 ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_current_thread: T#%d unregistered\n"
, gtid); }
;
4182
4183 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4184}
4185
4186#if KMP_OS_WINDOWS0
4187/* __kmp_forkjoin_lock must be already held
4188 Unregisters a root thread that is not the current thread. Returns the number
4189 of __kmp_threads entries freed as a result. */
4190static int __kmp_unregister_root_other_thread(int gtid) {
4191 kmp_root_t *root = __kmp_root[gtid];
4192 int r;
4193
4194 KA_TRACE(1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid))if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_unregister_root_other_thread: enter T#%d\n"
, gtid); }
;
4195 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid])if (!(__kmp_threads && __kmp_threads[gtid])) { __kmp_debug_assert
("__kmp_threads && __kmp_threads[gtid]", "openmp/runtime/src/kmp_runtime.cpp"
, 4195); }
;
4196 KMP_ASSERT(KMP_UBER_GTID(gtid))if (!(KMP_UBER_GTID(gtid))) { __kmp_debug_assert("KMP_UBER_GTID(gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 4196); }
;
4197 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root)if (!(root == __kmp_threads[gtid]->th.th_root)) { __kmp_debug_assert
("root == __kmp_threads[gtid]->th.th_root", "openmp/runtime/src/kmp_runtime.cpp"
, 4197); }
;
4198 KMP_ASSERT(root->r.r_active == FALSE)if (!(root->r.r_active == 0)) { __kmp_debug_assert("root->r.r_active == FALSE"
, "openmp/runtime/src/kmp_runtime.cpp", 4198); }
;
4199
4200 r = __kmp_reset_root(gtid, root);
4201 KC_TRACE(10,if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_other_thread: T#%d unregistered\n"
, gtid); }
4202 ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_other_thread: T#%d unregistered\n"
, gtid); }
;
4203 return r;
4204}
4205#endif
4206
4207#if KMP_DEBUG1
4208void __kmp_task_info() {
4209
4210 kmp_int32 gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
4211 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4212 kmp_info_t *this_thr = __kmp_threads[gtid];
4213 kmp_team_t *steam = this_thr->th.th_serial_team;
4214 kmp_team_t *team = this_thr->th.th_team;
4215
4216 __kmp_printf(
4217 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4218 "ptask=%p\n",
4219 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4220 team->t.t_implicit_task_taskdata[tid].td_parent);
4221}
4222#endif // KMP_DEBUG
4223
4224/* TODO optimize with one big memclr, take out what isn't needed, split
4225 responsibility to workers as much as possible, and delay initialization of
4226 features as much as possible */
4227static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4228 int tid, int gtid) {
4229 /* this_thr->th.th_info.ds.ds_gtid is setup in
4230 kmp_allocate_thread/create_worker.
4231 this_thr->th.th_serial_team is setup in __kmp_allocate_thread */
4232 KMP_DEBUG_ASSERT(this_thr != NULL)if (!(this_thr != __null)) { __kmp_debug_assert("this_thr != __null"
, "openmp/runtime/src/kmp_runtime.cpp", 4232); }
;
4233 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team)if (!(this_thr->th.th_serial_team)) { __kmp_debug_assert("this_thr->th.th_serial_team"
, "openmp/runtime/src/kmp_runtime.cpp", 4233); }
;
4234 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 4234); }
;
4235 KMP_DEBUG_ASSERT(team->t.t_threads)if (!(team->t.t_threads)) { __kmp_debug_assert("team->t.t_threads"
, "openmp/runtime/src/kmp_runtime.cpp", 4235); }
;
4236 KMP_DEBUG_ASSERT(team->t.t_dispatch)if (!(team->t.t_dispatch)) { __kmp_debug_assert("team->t.t_dispatch"
, "openmp/runtime/src/kmp_runtime.cpp", 4236); }
;
4237 kmp_info_t *master = team->t.t_threads[0];
4238 KMP_DEBUG_ASSERT(master)if (!(master)) { __kmp_debug_assert("master", "openmp/runtime/src/kmp_runtime.cpp"
, 4238); }
;
4239 KMP_DEBUG_ASSERT(master->th.th_root)if (!(master->th.th_root)) { __kmp_debug_assert("master->th.th_root"
, "openmp/runtime/src/kmp_runtime.cpp", 4239); }
;
4240
4241 KMP_MB();
4242
4243 TCW_SYNC_PTR(this_thr->th.th_team, team)((this_thr->th.th_team)) = ((team));
4244
4245 this_thr->th.th_info.ds.ds_tid = tid;
4246 this_thr->th.th_set_nproc = 0;
4247 if (__kmp_tasking_mode != tskm_immediate_exec)
4248 // When tasking is possible, threads are not safe to reap until they are
4249 // done tasking; this will be set when tasking code is exited in wait
4250 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP0;
4251 else // no tasking --> always safe to reap
4252 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP1;
4253 this_thr->th.th_set_proc_bind = proc_bind_default;
4254#if KMP_AFFINITY_SUPPORTED1
4255 this_thr->th.th_new_place = this_thr->th.th_current_place;
4256#endif
4257 this_thr->th.th_root = master->th.th_root;
4258
4259 /* setup the thread's cache of the team structure */
4260 this_thr->th.th_team_nproc = team->t.t_nproc;
4261 this_thr->th.th_team_master = master;
4262 this_thr->th.th_team_serialized = team->t.t_serialized;
4263
4264 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata)if (!(team->t.t_implicit_task_taskdata)) { __kmp_debug_assert
("team->t.t_implicit_task_taskdata", "openmp/runtime/src/kmp_runtime.cpp"
, 4264); }
;
4265
4266 KF_TRACE(10, ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n"
, tid, gtid, this_thr, this_thr->th.th_current_task); }
4267 tid, gtid, this_thr, this_thr->th.th_current_task))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n"
, tid, gtid, this_thr, this_thr->th.th_current_task); }
;
4268
4269 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4270 team, tid, TRUE(!0));
4271
4272 KF_TRACE(10, ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n"
, tid, gtid, this_thr, this_thr->th.th_current_task); }
4273 tid, gtid, this_thr, this_thr->th.th_current_task))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n"
, tid, gtid, this_thr, this_thr->th.th_current_task); }
;
4274 // TODO: Initialize ICVs from parent; GEH - isn't that already done in
4275 // __kmp_initialize_team()?
4276
4277 /* TODO no worksharing in speculative threads */
4278 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4279
4280 this_thr->th.th_local.this_construct = 0;
4281
4282 if (!this_thr->th.th_pri_common) {
4283 this_thr->th.th_pri_common =
4284 (struct common_table *)__kmp_allocate(sizeof(struct common_table))___kmp_allocate((sizeof(struct common_table)), "openmp/runtime/src/kmp_runtime.cpp"
, 4284)
;
4285 if (__kmp_storage_map) {
4286 __kmp_print_storage_map_gtid(
4287 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4288 sizeof(struct common_table), "th_%d.th_pri_common\n", gtid);
4289 }
4290 this_thr->th.th_pri_head = NULL__null;
4291 }
4292
4293 if (this_thr != master && // Primary thread's CG root is initialized elsewhere
4294 this_thr->th.th_cg_roots != master->th.th_cg_roots) { // CG root not set
4295 // Make new thread's CG root same as primary thread's
4296 KMP_DEBUG_ASSERT(master->th.th_cg_roots)if (!(master->th.th_cg_roots)) { __kmp_debug_assert("master->th.th_cg_roots"
, "openmp/runtime/src/kmp_runtime.cpp", 4296); }
;
4297 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4298 if (tmp) {
4299 // worker changes CG, need to check if old CG should be freed
4300 int i = tmp->cg_nthreads--;
4301 KA_TRACE(100, ("__kmp_initialize_info: Thread %p decrement cg_nthreads"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p decrement cg_nthreads"
" on node %p of thread %p to %d\n", this_thr, tmp, tmp->cg_root
, tmp->cg_nthreads); }
4302 " on node %p of thread %p to %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p decrement cg_nthreads"
" on node %p of thread %p to %d\n", this_thr, tmp, tmp->cg_root
, tmp->cg_nthreads); }
4303 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p decrement cg_nthreads"
" on node %p of thread %p to %d\n", this_thr, tmp, tmp->cg_root
, tmp->cg_nthreads); }
;
4304 if (i == 1) {
4305 __kmp_free(tmp)___kmp_free((tmp), "openmp/runtime/src/kmp_runtime.cpp", 4305
)
; // last thread left CG --> free it
4306 }
4307 }
4308 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4309 // Increment new thread's CG root's counter to add the new thread
4310 this_thr->th.th_cg_roots->cg_nthreads++;
4311 KA_TRACE(100, ("__kmp_initialize_info: Thread %p increment cg_nthreads on"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p increment cg_nthreads on"
" node %p of thread %p to %d\n", this_thr, this_thr->th.th_cg_roots
, this_thr->th.th_cg_roots->cg_root, this_thr->th.th_cg_roots
->cg_nthreads); }
4312 " node %p of thread %p to %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p increment cg_nthreads on"
" node %p of thread %p to %d\n", this_thr, this_thr->th.th_cg_roots
, this_thr->th.th_cg_roots->cg_root, this_thr->th.th_cg_roots
->cg_nthreads); }
4313 this_thr, this_thr->th.th_cg_roots,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p increment cg_nthreads on"
" node %p of thread %p to %d\n", this_thr, this_thr->th.th_cg_roots
, this_thr->th.th_cg_roots->cg_root, this_thr->th.th_cg_roots
->cg_nthreads); }
4314 this_thr->th.th_cg_roots->cg_root,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p increment cg_nthreads on"
" node %p of thread %p to %d\n", this_thr, this_thr->th.th_cg_roots
, this_thr->th.th_cg_roots->cg_root, this_thr->th.th_cg_roots
->cg_nthreads); }
4315 this_thr->th.th_cg_roots->cg_nthreads))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p increment cg_nthreads on"
" node %p of thread %p to %d\n", this_thr, this_thr->th.th_cg_roots
, this_thr->th.th_cg_roots->cg_root, this_thr->th.th_cg_roots
->cg_nthreads); }
;
4316 this_thr->th.th_current_task->td_icvs.thread_limit =
4317 this_thr->th.th_cg_roots->cg_thread_limit;
4318 }
4319
4320 /* Initialize dynamic dispatch */
4321 {
4322 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4323 // Use team max_nproc since this will never change for the team.
4324 size_t disp_size =
4325 sizeof(dispatch_private_info_t) *
4326 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4327 KD_TRACE(10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_info: T#%d max_nproc: %d\n"
, gtid, team->t.t_max_nproc); }
4328 team->t.t_max_nproc))if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_info: T#%d max_nproc: %d\n"
, gtid, team->t.t_max_nproc); }
;
4329 KMP_ASSERT(dispatch)if (!(dispatch)) { __kmp_debug_assert("dispatch", "openmp/runtime/src/kmp_runtime.cpp"
, 4329); }
;
4330 KMP_DEBUG_ASSERT(team->t.t_dispatch)if (!(team->t.t_dispatch)) { __kmp_debug_assert("team->t.t_dispatch"
, "openmp/runtime/src/kmp_runtime.cpp", 4330); }
;
4331 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid])if (!(dispatch == &team->t.t_dispatch[tid])) { __kmp_debug_assert
("dispatch == &team->t.t_dispatch[tid]", "openmp/runtime/src/kmp_runtime.cpp"
, 4331); }
;
4332
4333 dispatch->th_disp_index = 0;
4334 dispatch->th_doacross_buf_idx = 0;
4335 if (!dispatch->th_disp_buffer) {
4336 dispatch->th_disp_buffer =
4337 (dispatch_private_info_t *)__kmp_allocate(disp_size)___kmp_allocate((disp_size), "openmp/runtime/src/kmp_runtime.cpp"
, 4337)
;
4338
4339 if (__kmp_storage_map) {
4340 __kmp_print_storage_map_gtid(
4341 gtid, &dispatch->th_disp_buffer[0],
4342 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4343 ? 1
4344 : __kmp_dispatch_num_buffers],
4345 disp_size,
4346 "th_%d.th_dispatch.th_disp_buffer "
4347 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4348 gtid, team->t.t_id, gtid);
4349 }
4350 } else {
4351 memset(&dispatch->th_disp_buffer[0], '\0', disp_size);
4352 }
4353
4354 dispatch->th_dispatch_pr_current = 0;
4355 dispatch->th_dispatch_sh_current = 0;
4356
4357 dispatch->th_deo_fcn = 0; /* ORDERED */
4358 dispatch->th_dxo_fcn = 0; /* END ORDERED */
4359 }
4360
4361 this_thr->th.th_next_pool = NULL__null;
4362
4363 if (!this_thr->th.th_task_state_memo_stack) {
4364 size_t i;
4365 this_thr->th.th_task_state_memo_stack =
4366 (kmp_uint8 *)__kmp_allocate(4 * sizeof(kmp_uint8))___kmp_allocate((4 * sizeof(kmp_uint8)), "openmp/runtime/src/kmp_runtime.cpp"
, 4366)
;
4367 this_thr->th.th_task_state_top = 0;
4368 this_thr->th.th_task_state_stack_sz = 4;
4369 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4370 ++i) // zero init the stack
4371 this_thr->th.th_task_state_memo_stack[i] = 0;
4372 }
4373
4374 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here)if (!(!this_thr->th.th_spin_here)) { __kmp_debug_assert("!this_thr->th.th_spin_here"
, "openmp/runtime/src/kmp_runtime.cpp", 4374); }
;
4375 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0)if (!(this_thr->th.th_next_waiting == 0)) { __kmp_debug_assert
("this_thr->th.th_next_waiting == 0", "openmp/runtime/src/kmp_runtime.cpp"
, 4375); }
;
4376
4377 KMP_MB();
4378}
4379
4380/* allocate a new thread for the requesting team. this is only called from
4381 within a forkjoin critical section. we will first try to get an available
4382 thread from the thread pool. if none is available, we will fork a new one
4383 assuming we are able to create a new one. this should be assured, as the
4384 caller should check on this first. */
4385kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4386 int new_tid) {
4387 kmp_team_t *serial_team;
4388 kmp_info_t *new_thr;
4389 int new_gtid;
4390
4391 KA_TRACE(20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d\n"
, __kmp_get_global_thread_id()); }
;
4392 KMP_DEBUG_ASSERT(root && team)if (!(root && team)) { __kmp_debug_assert("root && team"
, "openmp/runtime/src/kmp_runtime.cpp", 4392); }
;
4393#if !KMP_NESTED_HOT_TEAMS1
4394 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()))if (!((0 == __kmp_tid_from_gtid((__kmp_get_global_thread_id()
))))) { __kmp_debug_assert("(0 == __kmp_tid_from_gtid((__kmp_get_global_thread_id())))"
, "openmp/runtime/src/kmp_runtime.cpp", 4394); }
;
4395#endif
4396 KMP_MB();
4397
4398 /* first, try to get one from the thread pool */
4399 if (__kmp_thread_pool) {
4400 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool)const_cast<kmp_info_t *>(__kmp_thread_pool);
4401 __kmp_thread_pool = (volatile kmp_info_t *)new_thr->th.th_next_pool;
4402 if (new_thr == __kmp_thread_pool_insert_pt) {
4403 __kmp_thread_pool_insert_pt = NULL__null;
4404 }
4405 TCW_4(new_thr->th.th_in_pool, FALSE)(new_thr->th.th_in_pool) = (0);
4406 __kmp_suspend_initialize_thread(new_thr);
4407 __kmp_lock_suspend_mx(new_thr);
4408 if (new_thr->th.th_active_in_pool == TRUE(!0)) {
4409 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE)if (!(new_thr->th.th_active == (!0))) { __kmp_debug_assert
("new_thr->th.th_active == (!0)", "openmp/runtime/src/kmp_runtime.cpp"
, 4409); }
;
4410 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth)(&__kmp_thread_pool_active_nth)->fetch_sub(1, std::memory_order_acq_rel
)
;
4411 new_thr->th.th_active_in_pool = FALSE0;
4412 }
4413 __kmp_unlock_suspend_mx(new_thr);
4414
4415 KA_TRACE(20, ("__kmp_allocate_thread: T#%d using thread T#%d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d using thread T#%d\n"
, __kmp_get_global_thread_id(), new_thr->th.th_info.ds.ds_gtid
); }
4416 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d using thread T#%d\n"
, __kmp_get_global_thread_id(), new_thr->th.th_info.ds.ds_gtid
); }
;
4417 KMP_ASSERT(!new_thr->th.th_team)if (!(!new_thr->th.th_team)) { __kmp_debug_assert("!new_thr->th.th_team"
, "openmp/runtime/src/kmp_runtime.cpp", 4417); }
;
4418 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity)if (!(__kmp_nth < __kmp_threads_capacity)) { __kmp_debug_assert
("__kmp_nth < __kmp_threads_capacity", "openmp/runtime/src/kmp_runtime.cpp"
, 4418); }
;
4419
4420 /* setup the thread structure */
4421 __kmp_initialize_info(new_thr, team, new_tid,
4422 new_thr->th.th_info.ds.ds_gtid);
4423 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team)if (!(new_thr->th.th_serial_team)) { __kmp_debug_assert("new_thr->th.th_serial_team"
, "openmp/runtime/src/kmp_runtime.cpp", 4423); }
;
4424
4425 TCW_4(__kmp_nth, __kmp_nth + 1)(__kmp_nth) = (__kmp_nth + 1);
4426
4427 new_thr->th.th_task_state = 0;
4428 new_thr->th.th_task_state_top = 0;
4429 new_thr->th.th_task_state_stack_sz = 4;
4430
4431 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
4432 // Make sure pool thread has transitioned to waiting on own thread struct
4433 KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0)if (!(new_thr->th.th_used_in_team.load() == 0)) { __kmp_debug_assert
("new_thr->th.th_used_in_team.load() == 0", "openmp/runtime/src/kmp_runtime.cpp"
, 4433); }
;
4434 // Thread activated in __kmp_allocate_team when increasing team size
4435 }
4436
4437#ifdef KMP_ADJUST_BLOCKTIME1
4438 /* Adjust blocktime back to zero if necessary */
4439 /* Middle initialization might not have occurred yet */
4440 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4441 if (__kmp_nth > __kmp_avail_proc) {
4442 __kmp_zero_bt = TRUE(!0);
4443 }
4444 }
4445#endif /* KMP_ADJUST_BLOCKTIME */
4446
4447#if KMP_DEBUG1
4448 // If thread entered pool via __kmp_free_thread, wait_flag should !=
4449 // KMP_BARRIER_PARENT_FLAG.
4450 int b;
4451 kmp_balign_t *balign = new_thr->th.th_bar;
4452 for (b = 0; b < bs_last_barrier; ++b)
4453 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG)if (!(balign[b].bb.wait_flag != 2)) { __kmp_debug_assert("balign[b].bb.wait_flag != 2"
, "openmp/runtime/src/kmp_runtime.cpp", 4453); }
;
4454#endif
4455
4456 KF_TRACE(10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d using thread %p T#%d\n"
, __kmp_get_global_thread_id(), new_thr, new_thr->th.th_info
.ds.ds_gtid); }
4457 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d using thread %p T#%d\n"
, __kmp_get_global_thread_id(), new_thr, new_thr->th.th_info
.ds.ds_gtid); }
;
4458
4459 KMP_MB();
4460 return new_thr;
4461 }
4462
4463 /* no, well fork a new one */
4464 KMP_ASSERT(__kmp_nth == __kmp_all_nth)if (!(__kmp_nth == __kmp_all_nth)) { __kmp_debug_assert("__kmp_nth == __kmp_all_nth"
, "openmp/runtime/src/kmp_runtime.cpp", 4464); }
;
4465 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity)if (!(__kmp_all_nth < __kmp_threads_capacity)) { __kmp_debug_assert
("__kmp_all_nth < __kmp_threads_capacity", "openmp/runtime/src/kmp_runtime.cpp"
, 4465); }
;
4466
4467#if KMP_USE_MONITOR
4468 // If this is the first worker thread the RTL is creating, then also
4469 // launch the monitor thread. We try to do this as early as possible.
4470 if (!TCR_4(__kmp_init_monitor)(__kmp_init_monitor)) {
4471 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4472 if (!TCR_4(__kmp_init_monitor)(__kmp_init_monitor)) {
4473 KF_TRACE(10, ("before __kmp_create_monitor\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("before __kmp_create_monitor\n"
); }
;
4474 TCW_4(__kmp_init_monitor, 1)(__kmp_init_monitor) = (1);
4475 __kmp_create_monitor(&__kmp_monitor);
4476 KF_TRACE(10, ("after __kmp_create_monitor\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("after __kmp_create_monitor\n"
); }
;
4477#if KMP_OS_WINDOWS0
4478 // AC: wait until monitor has started. This is a fix for CQ232808.
4479 // The reason is that if the library is loaded/unloaded in a loop with
4480 // small (parallel) work in between, then there is high probability that
4481 // monitor thread started after the library shutdown. At shutdown it is
4482 // too late to cope with the problem, because when the primary thread is
4483 // in DllMain (process detach) the monitor has no chances to start (it is
4484 // blocked), and primary thread has no means to inform the monitor that
4485 // the library has gone, because all the memory which the monitor can
4486 // access is going to be released/reset.
4487 while (TCR_4(__kmp_init_monitor)(__kmp_init_monitor) < 2) {
4488 KMP_YIELD(TRUE){ __kmp_x86_pause(); if (((!0)) && (((__kmp_use_yield
== 1) || (__kmp_use_yield == 2 && (((__kmp_nth) >
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))))))) __kmp_yield
(); }
;
4489 }
4490 KF_TRACE(10, ("after monitor thread has started\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("after monitor thread has started\n"
); }
;
4491#endif
4492 }
4493 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4494 }
4495#endif
4496
4497 KMP_MB();
4498
4499 {
4500 int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)(__kmp_init_hidden_helper_threads)
4501 ? 1
4502 : __kmp_hidden_helper_threads_num + 1;
4503
4504 for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid])((void *)(__kmp_threads[new_gtid])) != NULL__null;
4505 ++new_gtid) {
4506 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity)if (!(new_gtid < __kmp_threads_capacity)) { __kmp_debug_assert
("new_gtid < __kmp_threads_capacity", "openmp/runtime/src/kmp_runtime.cpp"
, 4506); }
;
4507 }
4508
4509 if (TCR_4(__kmp_init_hidden_helper_threads)(__kmp_init_hidden_helper_threads)) {
4510 KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num)if (!(new_gtid <= __kmp_hidden_helper_threads_num)) { __kmp_debug_assert
("new_gtid <= __kmp_hidden_helper_threads_num", "openmp/runtime/src/kmp_runtime.cpp"
, 4510); }
;
4511 }
4512 }
4513
4514 /* allocate space for it. */
4515 new_thr = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t))___kmp_allocate((sizeof(kmp_info_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 4515)
;
4516
4517 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr)((__kmp_threads[new_gtid])) = ((new_thr));
4518
4519#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 && KMP_DEBUG1
4520 // suppress race conditions detection on synchronization flags in debug mode
4521 // this helps to analyze library internals eliminating false positives
4522 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(
4523 __itt_suppress_range, __itt_suppress_threading_errors0x000000ff,
4524 &new_thr->th.th_sleep_loc, sizeof(new_thr->th.th_sleep_loc));
4525 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(
4526 __itt_suppress_range, __itt_suppress_threading_errors0x000000ff,
4527 &new_thr->th.th_reap_state, sizeof(new_thr->th.th_reap_state));
4528#if KMP_OS_WINDOWS0
4529 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(
4530 __itt_suppress_range, __itt_suppress_threading_errors0x000000ff,
4531 &new_thr->th.th_suspend_init, sizeof(new_thr->th.th_suspend_init));
4532#else
4533 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(__itt_suppress_range,
4534 __itt_suppress_threading_errors0x000000ff,
4535 &new_thr->th.th_suspend_init_count,
4536 sizeof(new_thr->th.th_suspend_init_count));
4537#endif
4538 // TODO: check if we need to also suppress b_arrived flags
4539 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(__itt_suppress_range,
4540 __itt_suppress_threading_errors0x000000ff,
4541 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go)const_cast<kmp_uint64 *>(&new_thr->th.th_bar[0].
bb.b_go)
,
4542 sizeof(new_thr->th.th_bar[0].bb.b_go));
4543 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(__itt_suppress_range,
4544 __itt_suppress_threading_errors0x000000ff,
4545 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go)const_cast<kmp_uint64 *>(&new_thr->th.th_bar[1].
bb.b_go)
,
4546 sizeof(new_thr->th.th_bar[1].bb.b_go));
4547 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(__itt_suppress_range,
4548 __itt_suppress_threading_errors0x000000ff,
4549 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go)const_cast<kmp_uint64 *>(&new_thr->th.th_bar[2].
bb.b_go)
,
4550 sizeof(new_thr->th.th_bar[2].bb.b_go));
4551#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG */
4552 if (__kmp_storage_map) {
4553 __kmp_print_thread_storage_map(new_thr, new_gtid);
4554 }
4555
4556 // add the reserve serialized team, initialized from the team's primary thread
4557 {
4558 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4559 KF_TRACE(10, ("__kmp_allocate_thread: before th_serial/serial_team\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: before th_serial/serial_team\n"
); }
;
4560 new_thr->th.th_serial_team = serial_team =
4561 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4562#if OMPT_SUPPORT1
4563 ompt_data_none{0}, // root parallel id
4564#endif
4565 proc_bind_default, &r_icvs,
4566 0 USE_NESTED_HOT_ARG(NULL), __null);
4567 }
4568 KMP_ASSERT(serial_team)if (!(serial_team)) { __kmp_debug_assert("serial_team", "openmp/runtime/src/kmp_runtime.cpp"
, 4568); }
;
4569 serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for
4570 // execution (it is unused for now).
4571 serial_team->t.t_threads[0] = new_thr;
4572 KF_TRACE(10,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n"
, new_thr); }
4573 ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n"
, new_thr); }
4574 new_thr))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n"
, new_thr); }
;
4575
4576 /* setup the thread structures */
4577 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4578
4579#if USE_FAST_MEMORY3
4580 __kmp_initialize_fast_memory(new_thr);
4581#endif /* USE_FAST_MEMORY */
4582
4583#if KMP_USE_BGET1
4584 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL)if (!(new_thr->th.th_local.bget_data == __null)) { __kmp_debug_assert
("new_thr->th.th_local.bget_data == __null", "openmp/runtime/src/kmp_runtime.cpp"
, 4584); }
;
4585 __kmp_initialize_bget(new_thr);
4586#endif
4587
4588 __kmp_init_random(new_thr); // Initialize random number generator
4589
4590 /* Initialize these only once when thread is grabbed for a team allocation */
4591 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n"
, __kmp_get_global_thread_id(), 0, 0); }
4592 ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n"
, __kmp_get_global_thread_id(), 0, 0); }
4593 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n"
, __kmp_get_global_thread_id(), 0, 0); }
;
4594
4595 int b;
4596 kmp_balign_t *balign = new_thr->th.th_bar;
4597 for (b = 0; b < bs_last_barrier; ++b) {
4598 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE0;
4599 balign[b].bb.team = NULL__null;
4600 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING0;
4601 balign[b].bb.use_oncore_barrier = 0;
4602 }
4603
4604 TCW_PTR(new_thr->th.th_sleep_loc, NULL)((new_thr->th.th_sleep_loc)) = ((__null));
4605 new_thr->th.th_sleep_loc_type = flag_unset;
4606
4607 new_thr->th.th_spin_here = FALSE0;
4608 new_thr->th.th_next_waiting = 0;
4609#if KMP_OS_UNIX1
4610 new_thr->th.th_blocking = false;
4611#endif
4612
4613#if KMP_AFFINITY_SUPPORTED1
4614 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED(-2);
4615 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED(-2);
4616 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED(-2);
4617 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED(-2);
4618#endif
4619 new_thr->th.th_def_allocator = __kmp_def_allocator;
4620 new_thr->th.th_prev_level = 0;
4621 new_thr->th.th_prev_num_threads = 1;
4622
4623 TCW_4(new_thr->th.th_in_pool, FALSE)(new_thr->th.th_in_pool) = (0);
4624 new_thr->th.th_active_in_pool = FALSE0;
4625 TCW_4(new_thr->th.th_active, TRUE)(new_thr->th.th_active) = ((!0));
4626
4627 /* adjust the global counters */
4628 __kmp_all_nth++;
4629 __kmp_nth++;
4630
4631 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
4632 // numbers of procs, and method #2 (keyed API call) for higher numbers.
4633 if (__kmp_adjust_gtid_mode) {
4634 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4635 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) != 2) {
4636 TCW_4(__kmp_gtid_mode, 2)(__kmp_gtid_mode) = (2);
4637 }
4638 } else {
4639 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) != 1) {
4640 TCW_4(__kmp_gtid_mode, 1)(__kmp_gtid_mode) = (1);
4641 }
4642 }
4643 }
4644
4645#ifdef KMP_ADJUST_BLOCKTIME1
4646 /* Adjust blocktime back to zero if necessary */
4647 /* Middle initialization might not have occurred yet */
4648 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4649 if (__kmp_nth > __kmp_avail_proc) {
4650 __kmp_zero_bt = TRUE(!0);
4651 }
4652 }
4653#endif /* KMP_ADJUST_BLOCKTIME */
4654
4655 /* actually fork it and create the new worker thread */
4656 KF_TRACE(if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: before __kmp_create_worker: %p\n"
, new_thr); }
4657 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: before __kmp_create_worker: %p\n"
, new_thr); }
;
4658 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4659 KF_TRACE(10,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: after __kmp_create_worker: %p\n"
, new_thr); }
4660 ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: after __kmp_create_worker: %p\n"
, new_thr); }
;
4661
4662 KA_TRACE(20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d forked T#%d\n"
, __kmp_get_global_thread_id(), new_gtid); }
4663 new_gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d forked T#%d\n"
, __kmp_get_global_thread_id(), new_gtid); }
;
4664 KMP_MB();
4665 return new_thr;
4666}
4667
4668/* Reinitialize team for reuse.
4669 The hot team code calls this case at every fork barrier, so EPCC barrier
4670 test are extremely sensitive to changes in it, esp. writes to the team
4671 struct, which cause a cache invalidation in all threads.
4672 IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! */
4673static void __kmp_reinitialize_team(kmp_team_t *team,
4674 kmp_internal_control_t *new_icvs,
4675 ident_t *loc) {
4676 KF_TRACE(10, ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n"
, team->t.t_threads[0], team); }
4677 team->t.t_threads[0], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n"
, team->t.t_threads[0], team); }
;
4678 KMP_DEBUG_ASSERT(team && new_icvs)if (!(team && new_icvs)) { __kmp_debug_assert("team && new_icvs"
, "openmp/runtime/src/kmp_runtime.cpp", 4678); }
;
4679 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc)if (!((!(__kmp_init_parallel)) || new_icvs->nproc)) { __kmp_debug_assert
("(!(__kmp_init_parallel)) || new_icvs->nproc", "openmp/runtime/src/kmp_runtime.cpp"
, 4679); }
;
4680 KMP_CHECK_UPDATE(team->t.t_ident, loc)if ((team->t.t_ident) != (loc)) (team->t.t_ident) = (loc
)
;
4681
4682 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID())if ((team->t.t_id) != ((~0))) (team->t.t_id) = ((~0));
4683 // Copy ICVs to the primary thread's implicit taskdata
4684 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE0);
4685 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4686
4687 KF_TRACE(10, ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n"
, team->t.t_threads[0], team); }
4688 team->t.t_threads[0], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n"
, team->t.t_threads[0], team); }
;
4689}
4690
4691/* Initialize the team data structure.
4692 This assumes the t_threads and t_max_nproc are already set.
4693 Also, we don't touch the arguments */
4694static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
4695 kmp_internal_control_t *new_icvs,
4696 ident_t *loc) {
4697 KF_TRACE(10, ("__kmp_initialize_team: enter: team=%p\n", team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_team: enter: team=%p\n"
, team); }
;
4698
4699 /* verify */
4700 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 4700); }
;
4701 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc)if (!(new_nproc <= team->t.t_max_nproc)) { __kmp_debug_assert
("new_nproc <= team->t.t_max_nproc", "openmp/runtime/src/kmp_runtime.cpp"
, 4701); }
;
4702 KMP_DEBUG_ASSERT(team->t.t_threads)if (!(team->t.t_threads)) { __kmp_debug_assert("team->t.t_threads"
, "openmp/runtime/src/kmp_runtime.cpp", 4702); }
;
4703 KMP_MB();
4704
4705 team->t.t_master_tid = 0; /* not needed */
4706 /* team->t.t_master_bar; not needed */
4707 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4708 team->t.t_nproc = new_nproc;
4709
4710 /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */
4711 team->t.t_next_pool = NULL__null;
4712 /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess
4713 * up hot team */
4714
4715 TCW_SYNC_PTR(team->t.t_pkfn, NULL)((team->t.t_pkfn)) = ((__null)); /* not needed */
4716 team->t.t_invoke = NULL__null; /* not needed */
4717
4718 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4719 team->t.t_sched.sched = new_icvs->sched.sched;
4720
4721#if KMP_ARCH_X860 || KMP_ARCH_X86_641
4722 team->t.t_fp_control_saved = FALSE0; /* not needed */
4723 team->t.t_x87_fpu_control_word = 0; /* not needed */
4724 team->t.t_mxcsr = 0; /* not needed */
4725#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
4726
4727 team->t.t_construct = 0;
4728
4729 team->t.t_ordered.dt.t_value = 0;
4730 team->t.t_master_active = FALSE0;
4731
4732#ifdef KMP_DEBUG1
4733 team->t.t_copypriv_data = NULL__null; /* not necessary, but nice for debugging */
4734#endif
4735#if KMP_OS_WINDOWS0
4736 team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */
4737#endif
4738
4739 team->t.t_control_stack_top = NULL__null;
4740
4741 __kmp_reinitialize_team(team, new_icvs, loc);
4742
4743 KMP_MB();
4744 KF_TRACE(10, ("__kmp_initialize_team: exit: team=%p\n", team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_team: exit: team=%p\n"
, team); }
;
4745}
4746
4747#if (KMP_OS_LINUX1 || KMP_OS_FREEBSD0) && KMP_AFFINITY_SUPPORTED1
4748/* Sets full mask for thread and returns old mask, no changes to structures. */
4749static void
4750__kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4751 if (KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {
4752 int status;
4753 if (old_mask != NULL__null) {
4754 status = __kmp_get_system_affinity(old_mask, TRUE)(old_mask)->get_system_affinity((!0));
4755 int error = errno(*__errno_location ());
4756 if (status != 0) {
4757 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError)__kmp_msg_format(kmp_i18n_msg_ChangeThreadAffMaskError), KMP_ERR(error)__kmp_msg_error_code(error),
4758 __kmp_msg_null);
4759 }
4760 }
4761 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE)(__kmp_affin_fullMask)->set_system_affinity((!0));
4762 }
4763}
4764#endif
4765
4766#if KMP_AFFINITY_SUPPORTED1
4767
4768// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
4769// It calculates the worker + primary thread's partition based upon the parent
4770// thread's partition, and binds each worker to a thread in their partition.
4771// The primary thread's partition should already include its current binding.
4772static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
4773 // Do not partition places for the hidden helper team
4774 if (KMP_HIDDEN_HELPER_TEAM(team)(team->t.t_threads[0] == __kmp_hidden_helper_main_thread))
4775 return;
4776 // Copy the primary thread's place partition to the team struct
4777 kmp_info_t *master_th = team->t.t_threads[0];
4778 KMP_DEBUG_ASSERT(master_th != NULL)if (!(master_th != __null)) { __kmp_debug_assert("master_th != __null"
, "openmp/runtime/src/kmp_runtime.cpp", 4778); }
;
4779 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4780 int first_place = master_th->th.th_first_place;
4781 int last_place = master_th->th.th_last_place;
4782 int masters_place = master_th->th.th_current_place;
4783 int num_masks = __kmp_affinity.num_masks;
4784 team->t.t_first_place = first_place;
4785 team->t.t_last_place = last_place;
4786
4787 KA_TRACE(20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
"bound to place %d partition = [%d,%d]\n", proc_bind, __kmp_gtid_from_thread
(team->t.t_threads[0]), team->t.t_id, masters_place, first_place
, last_place); }
4788 "bound to place %d partition = [%d,%d]\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
"bound to place %d partition = [%d,%d]\n", proc_bind, __kmp_gtid_from_thread
(team->t.t_threads[0]), team->t.t_id, masters_place, first_place
, last_place); }
4789 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
"bound to place %d partition = [%d,%d]\n", proc_bind, __kmp_gtid_from_thread
(team->t.t_threads[0]), team->t.t_id, masters_place, first_place
, last_place); }
4790 team->t.t_id, masters_place, first_place, last_place))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
"bound to place %d partition = [%d,%d]\n", proc_bind, __kmp_gtid_from_thread
(team->t.t_threads[0]), team->t.t_id, masters_place, first_place
, last_place); }
;
4791
4792 switch (proc_bind) {
4793
4794 case proc_bind_default:
4795 // Serial teams might have the proc_bind policy set to proc_bind_default.
4796 // Not an issue -- we don't rebind primary thread for any proc_bind policy.
4797 KMP_DEBUG_ASSERT(team->t.t_nproc == 1)if (!(team->t.t_nproc == 1)) { __kmp_debug_assert("team->t.t_nproc == 1"
, "openmp/runtime/src/kmp_runtime.cpp", 4797); }
;
4798 break;
4799
4800 case proc_bind_primary: {
4801 int f;
4802 int n_th = team->t.t_nproc;
4803 for (f = 1; f < n_th; f++) {
4804 kmp_info_t *th = team->t.t_threads[f];
4805 KMP_DEBUG_ASSERT(th != NULL)if (!(th != __null)) { __kmp_debug_assert("th != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 4805); }
;
4806 th->th.th_first_place = first_place;
4807 th->th.th_last_place = last_place;
4808 th->th.th_new_place = masters_place;
4809 if (__kmp_display_affinity && masters_place != th->th.th_current_place &&
4810 team->t.t_display_affinity != 1) {
4811 team->t.t_display_affinity = 1;
4812 }
4813
4814 KA_TRACE(100, ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, masters_place, first_place, last_place
); }
4815 "partition = [%d,%d]\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, masters_place, first_place, last_place
); }
4816 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, masters_place, first_place, last_place
); }
4817 f, masters_place, first_place, last_place))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, masters_place, first_place, last_place
); }
;
4818 }
4819 } break;
4820
4821 case proc_bind_close: {
4822 int f;
4823 int n_th = team->t.t_nproc;
4824 int n_places;
4825 if (first_place <= last_place) {
4826 n_places = last_place - first_place + 1;
4827 } else {
4828 n_places = num_masks - first_place + last_place + 1;
4829 }
4830 if (n_th <= n_places) {
4831 int place = masters_place;
4832 for (f = 1; f < n_th; f++) {
4833 kmp_info_t *th = team->t.t_threads[f];
4834 KMP_DEBUG_ASSERT(th != NULL)if (!(th != __null)) { __kmp_debug_assert("th != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 4834); }
;
4835
4836 if (place == last_place) {
4837 place = first_place;
4838 } else if (place == (num_masks - 1)) {
4839 place = 0;
4840 } else {
4841 place++;
4842 }
4843 th->th.th_first_place = first_place;
4844 th->th.th_last_place = last_place;
4845 th->th.th_new_place = place;
4846 if (__kmp_display_affinity && place != th->th.th_current_place &&
4847 team->t.t_display_affinity != 1) {
4848 team->t.t_display_affinity = 1;
4849 }
4850
4851 KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, place, first_place, last_place); }
4852 "partition = [%d,%d]\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, place, first_place, last_place); }
4853 __kmp_gtid_from_thread(team->t.t_threads[f]),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, place, first_place, last_place); }
4854 team->t.t_id, f, place, first_place, last_place))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, place, first_place, last_place); }
;
4855 }
4856 } else {
4857 int S, rem, gap, s_count;
4858 S = n_th / n_places;
4859 s_count = 0;
4860 rem = n_th - (S * n_places);
4861 gap = rem > 0 ? n_places / rem : n_places;
4862 int place = masters_place;
4863 int gap_ct = gap;
4864 for (f = 0; f < n_th; f++) {
4865 kmp_info_t *th = team->t.t_threads[f];
4866 KMP_DEBUG_ASSERT(th != NULL)if (!(th != __null)) { __kmp_debug_assert("th != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 4866); }
;
4867
4868 th->th.th_first_place = first_place;
4869 th->th.th_last_place = last_place;
4870 th->th.th_new_place = place;
4871 if (__kmp_display_affinity && place != th->th.th_current_place &&
4872 team->t.t_display_affinity != 1) {
4873 team->t.t_display_affinity = 1;
4874 }
4875 s_count++;
4876
4877 if ((s_count == S) && rem && (gap_ct == gap)) {
4878 // do nothing, add an extra thread to place on next iteration
4879 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4880 // we added an extra thread to this place; move to next place
4881 if (place == last_place) {
4882 place = first_place;
4883 } else if (place == (num_masks - 1)) {
4884 place = 0;
4885 } else {
4886 place++;
4887 }
4888 s_count = 0;
4889 gap_ct = 1;
4890 rem--;
4891 } else if (s_count == S) { // place full; don't add extra
4892 if (place == last_place) {
4893 place = first_place;
4894 } else if (place == (num_masks - 1)) {
4895 place = 0;
4896 } else {
4897 place++;
4898 }
4899 gap_ct++;
4900 s_count = 0;
4901 }
4902
4903 KA_TRACE(100,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, first_place
, last_place); }
4904 ("__kmp_partition_places: close: T#%d(%d:%d) place %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, first_place
, last_place); }
4905 "partition = [%d,%d]\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, first_place
, last_place); }
4906 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, first_place
, last_place); }
4907 th->th.th_new_place, first_place, last_place))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, first_place
, last_place); }
;
4908 }
4909 KMP_DEBUG_ASSERT(place == masters_place)if (!(place == masters_place)) { __kmp_debug_assert("place == masters_place"
, "openmp/runtime/src/kmp_runtime.cpp", 4909); }
;
4910 }
4911 } break;
4912
4913 case proc_bind_spread: {
4914 int f;
4915 int n_th = team->t.t_nproc;
4916 int n_places;
4917 int thidx;
4918 if (first_place <= last_place) {
4919 n_places = last_place - first_place + 1;
4920 } else {
4921 n_places = num_masks - first_place + last_place + 1;
4922 }
4923 if (n_th <= n_places) {
4924 int place = -1;
4925
4926 if (n_places != num_masks) {
4927 int S = n_places / n_th;
4928 int s_count, rem, gap, gap_ct;
4929
4930 place = masters_place;
4931 rem = n_places - n_th * S;
4932 gap = rem ? n_th / rem : 1;
4933 gap_ct = gap;
4934 thidx = n_th;
4935 if (update_master_only == 1)
4936 thidx = 1;
4937 for (f = 0; f < thidx; f++) {
4938 kmp_info_t *th = team->t.t_threads[f];
4939 KMP_DEBUG_ASSERT(th != NULL)if (!(th != __null)) { __kmp_debug_assert("th != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 4939); }
;
4940
4941 th->th.th_first_place = place;
4942 th->th.th_new_place = place;
4943 if (__kmp_display_affinity && place != th->th.th_current_place &&
4944 team->t.t_display_affinity != 1) {
4945 team->t.t_display_affinity = 1;
4946 }
4947 s_count = 1;
4948 while (s_count < S) {
4949 if (place == last_place) {
4950 place = first_place;
4951 } else if (place == (num_masks - 1)) {
4952 place = 0;
4953 } else {
4954 place++;
4955 }
4956 s_count++;
4957 }
4958 if (rem && (gap_ct == gap)) {
4959 if (place == last_place) {
4960 place = first_place;
4961 } else if (place == (num_masks - 1)) {
4962 place = 0;
4963 } else {
4964 place++;
4965 }
4966 rem--;
4967 gap_ct = 0;
4968 }
4969 th->th.th_last_place = place;
4970 gap_ct++;
4971
4972 if (place == last_place) {
4973 place = first_place;
4974 } else if (place == (num_masks - 1)) {
4975 place = 0;
4976 } else {
4977 place++;
4978 }
4979
4980 KA_TRACE(100,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], num_masks: %u\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, num_masks
); }
4981 ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], num_masks: %u\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, num_masks
); }
4982 "partition = [%d,%d], num_masks: %u\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], num_masks: %u\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, num_masks
); }
4983 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], num_masks: %u\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, num_masks
); }
4984 f, th->th.th_new_place, th->th.th_first_place,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], num_masks: %u\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, num_masks
); }
4985 th->th.th_last_place, num_masks))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], num_masks: %u\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, num_masks
); }
;
4986 }
4987 } else {
4988 /* Having uniform space of available computation places I can create
4989 T partitions of round(P/T) size and put threads into the first
4990 place of each partition. */
4991 double current = static_cast<double>(masters_place);
4992 double spacing =
4993 (static_cast<double>(n_places + 1) / static_cast<double>(n_th));
4994 int first, last;
4995 kmp_info_t *th;
4996
4997 thidx = n_th + 1;
4998 if (update_master_only == 1)
4999 thidx = 1;
5000 for (f = 0; f < thidx; f++) {
5001 first = static_cast<int>(current);
5002 last = static_cast<int>(current + spacing) - 1;
5003 KMP_DEBUG_ASSERT(last >= first)if (!(last >= first)) { __kmp_debug_assert("last >= first"
, "openmp/runtime/src/kmp_runtime.cpp", 5003); }
;
5004 if (first >= n_places) {
5005 if (masters_place) {
5006 first -= n_places;
5007 last -= n_places;
5008 if (first == (masters_place + 1)) {
5009 KMP_DEBUG_ASSERT(f == n_th)if (!(f == n_th)) { __kmp_debug_assert("f == n_th", "openmp/runtime/src/kmp_runtime.cpp"
, 5009); }
;
5010 first--;
5011 }
5012 if (last == masters_place) {
5013 KMP_DEBUG_ASSERT(f == (n_th - 1))if (!(f == (n_th - 1))) { __kmp_debug_assert("f == (n_th - 1)"
, "openmp/runtime/src/kmp_runtime.cpp", 5013); }
;
5014 last--;
5015 }
5016 } else {
5017 KMP_DEBUG_ASSERT(f == n_th)if (!(f == n_th)) { __kmp_debug_assert("f == n_th", "openmp/runtime/src/kmp_runtime.cpp"
, 5017); }
;
5018 first = 0;
5019 last = 0;
5020 }
5021 }
5022 if (last >= n_places) {
5023 last = (n_places - 1);
5024 }
5025 place = first;
5026 current += spacing;
5027 if (f < n_th) {
5028 KMP_DEBUG_ASSERT(0 <= first)if (!(0 <= first)) { __kmp_debug_assert("0 <= first", "openmp/runtime/src/kmp_runtime.cpp"
, 5028); }
;
5029 KMP_DEBUG_ASSERT(n_places > first)if (!(n_places > first)) { __kmp_debug_assert("n_places > first"
, "openmp/runtime/src/kmp_runtime.cpp", 5029); }
;
5030 KMP_DEBUG_ASSERT(0 <= last)if (!(0 <= last)) { __kmp_debug_assert("0 <= last", "openmp/runtime/src/kmp_runtime.cpp"
, 5030); }
;
5031 KMP_DEBUG_ASSERT(n_places > last)if (!(n_places > last)) { __kmp_debug_assert("n_places > last"
, "openmp/runtime/src/kmp_runtime.cpp", 5031); }
;
5032 KMP_DEBUG_ASSERT(last_place >= first_place)if (!(last_place >= first_place)) { __kmp_debug_assert("last_place >= first_place"
, "openmp/runtime/src/kmp_runtime.cpp", 5032); }
;
5033 th = team->t.t_threads[f];
5034 KMP_DEBUG_ASSERT(th)if (!(th)) { __kmp_debug_assert("th", "openmp/runtime/src/kmp_runtime.cpp"
, 5034); }
;
5035 th->th.th_first_place = first;
5036 th->th.th_new_place = place;
5037 th->th.th_last_place = last;
5038 if (__kmp_display_affinity && place != th->th.th_current_place &&
5039 team->t.t_display_affinity != 1) {
5040 team->t.t_display_affinity = 1;
5041 }
5042 KA_TRACE(100,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, spacing)
; }
5043 ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, spacing)
; }
5044 "partition = [%d,%d], spacing = %.4f\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, spacing)
; }
5045 __kmp_gtid_from_thread(team->t.t_threads[f]),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, spacing)
; }
5046 team->t.t_id, f, th->th.th_new_place,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, spacing)
; }
5047 th->th.th_first_place, th->th.th_last_place, spacing))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, spacing)
; }
;
5048 }
5049 }
5050 }
5051 KMP_DEBUG_ASSERT(update_master_only || place == masters_place)if (!(update_master_only || place == masters_place)) { __kmp_debug_assert
("update_master_only || place == masters_place", "openmp/runtime/src/kmp_runtime.cpp"
, 5051); }
;
5052 } else {
5053 int S, rem, gap, s_count;
5054 S = n_th / n_places;
5055 s_count = 0;
5056 rem = n_th - (S * n_places);
5057 gap = rem > 0 ? n_places / rem : n_places;
5058 int place = masters_place;
5059 int gap_ct = gap;
5060 thidx = n_th;
5061 if (update_master_only == 1)
5062 thidx = 1;
5063 for (f = 0; f < thidx; f++) {
5064 kmp_info_t *th = team->t.t_threads[f];
5065 KMP_DEBUG_ASSERT(th != NULL)if (!(th != __null)) { __kmp_debug_assert("th != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 5065); }
;
5066
5067 th->th.th_first_place = place;
5068 th->th.th_last_place = place;
5069 th->th.th_new_place = place;
5070 if (__kmp_display_affinity && place != th->th.th_current_place &&
5071 team->t.t_display_affinity != 1) {
5072 team->t.t_display_affinity = 1;
5073 }
5074 s_count++;
5075
5076 if ((s_count == S) && rem && (gap_ct == gap)) {
5077 // do nothing, add an extra thread to place on next iteration
5078 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
5079 // we added an extra thread to this place; move on to next place
5080 if (place == last_place) {
5081 place = first_place;
5082 } else if (place == (num_masks - 1)) {
5083 place = 0;
5084 } else {
5085 place++;
5086 }
5087 s_count = 0;
5088 gap_ct = 1;
5089 rem--;
5090 } else if (s_count == S) { // place is full; don't add extra thread
5091 if (place == last_place) {
5092 place = first_place;
5093 } else if (place == (num_masks - 1)) {
5094 place = 0;
5095 } else {
5096 place++;
5097 }
5098 gap_ct++;
5099 s_count = 0;
5100 }
5101
5102 KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, th->th.th_first_place
, th->th.th_last_place); }
5103 "partition = [%d,%d]\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, th->th.th_first_place
, th->th.th_last_place); }
5104 __kmp_gtid_from_thread(team->t.t_threads[f]),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, th->th.th_first_place
, th->th.th_last_place); }
5105 team->t.t_id, f, th->th.th_new_place,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, th->th.th_first_place
, th->th.th_last_place); }
5106 th->th.th_first_place, th->th.th_last_place))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, th->th.th_first_place
, th->th.th_last_place); }
;
5107 }
5108 KMP_DEBUG_ASSERT(update_master_only || place == masters_place)if (!(update_master_only || place == masters_place)) { __kmp_debug_assert
("update_master_only || place == masters_place", "openmp/runtime/src/kmp_runtime.cpp"
, 5108); }
;
5109 }
5110 } break;
5111
5112 default:
5113 break;
5114 }
5115
5116 KA_TRACE(20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_partition_places: exit T#%d\n"
, team->t.t_id); }
;
5117}
5118
5119#endif // KMP_AFFINITY_SUPPORTED
5120
5121/* allocate a new team data structure to use. take one off of the free pool if
5122 available */
5123kmp_team_t *
5124__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
5125#if OMPT_SUPPORT1
5126 ompt_data_t ompt_parallel_data,
5127#endif
5128 kmp_proc_bind_t new_proc_bind,
5129 kmp_internal_control_t *new_icvs,
5130 int argc USE_NESTED_HOT_ARG(kmp_info_t *master), kmp_info_t *master) {
5131 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team)((void)0);
5132 int f;
5133 kmp_team_t *team;
5134 int use_hot_team = !root->r.r_active;
5135 int level = 0;
5136 int do_place_partition = 1;
5137
5138 KA_TRACE(20, ("__kmp_allocate_team: called\n"))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: called\n"
); }
;
5139 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0)if (!(new_nproc >= 1 && argc >= 0)) { __kmp_debug_assert
("new_nproc >= 1 && argc >= 0", "openmp/runtime/src/kmp_runtime.cpp"
, 5139); }
;
5140 KMP_DEBUG_ASSERT(max_nproc >= new_nproc)if (!(max_nproc >= new_nproc)) { __kmp_debug_assert("max_nproc >= new_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5140); }
;
5141 KMP_MB();
5142
5143#if KMP_NESTED_HOT_TEAMS1
5144 kmp_hot_team_ptr_t *hot_teams;
5145 if (master) {
5146 team = master->th.th_team;
5147 level = team->t.t_active_level;
5148 if (master->th.th_teams_microtask) { // in teams construct?
5149 if (master->th.th_teams_size.nteams > 1 &&
5150 ( // #teams > 1
5151 team->t.t_pkfn ==
5152 (microtask_t)__kmp_teams_master || // inner fork of the teams
5153 master->th.th_teams_level <
5154 team->t.t_level)) { // or nested parallel inside the teams
5155 ++level; // not increment if #teams==1, or for outer fork of the teams;
5156 // increment otherwise
5157 }
5158 // Do not perform the place partition if inner fork of the teams
5159 // Wait until nested parallel region encountered inside teams construct
5160 if ((master->th.th_teams_size.nteams == 1 &&
5161 master->th.th_teams_level >= team->t.t_level) ||
5162 (team->t.t_pkfn == (microtask_t)__kmp_teams_master))
5163 do_place_partition = 0;
5164 }
5165 hot_teams = master->th.th_hot_teams;
5166 if (level < __kmp_hot_teams_max_level && hot_teams &&
5167 hot_teams[level].hot_team) {
5168 // hot team has already been allocated for given level
5169 use_hot_team = 1;
5170 } else {
5171 use_hot_team = 0;
5172 }
5173 } else {
5174 // check we won't access uninitialized hot_teams, just in case
5175 KMP_DEBUG_ASSERT(new_nproc == 1)if (!(new_nproc == 1)) { __kmp_debug_assert("new_nproc == 1",
"openmp/runtime/src/kmp_runtime.cpp", 5175); }
;
5176 }
5177#endif
5178 // Optimization to use a "hot" team
5179 if (use_hot_team && new_nproc > 1) {
5180 KMP_DEBUG_ASSERT(new_nproc <= max_nproc)if (!(new_nproc <= max_nproc)) { __kmp_debug_assert("new_nproc <= max_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5180); }
;
5181#if KMP_NESTED_HOT_TEAMS1
5182 team = hot_teams[level].hot_team;
5183#else
5184 team = root->r.r_hot_team;
5185#endif
5186#if KMP_DEBUG1
5187 if (__kmp_tasking_mode != tskm_immediate_exec) {
5188 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: hot team task_team[0] = %p "
"task_team[1] = %p before reinit\n", team->t.t_task_team[
0], team->t.t_task_team[1]); }
5189 "task_team[1] = %p before reinit\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: hot team task_team[0] = %p "
"task_team[1] = %p before reinit\n", team->t.t_task_team[
0], team->t.t_task_team[1]); }
5190 team->t.t_task_team[0], team->t.t_task_team[1]))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: hot team task_team[0] = %p "
"task_team[1] = %p before reinit\n", team->t.t_task_team[
0], team->t.t_task_team[1]); }
;
5191 }
5192#endif
5193
5194 if (team->t.t_nproc != new_nproc &&
5195 __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5196 // Distributed barrier may need a resize
5197 int old_nthr = team->t.t_nproc;
5198 __kmp_resize_dist_barrier(team, old_nthr, new_nproc);
5199 }
5200
5201 // If not doing the place partition, then reset the team's proc bind
5202 // to indicate that partitioning of all threads still needs to take place
5203 if (do_place_partition == 0)
5204 team->t.t_proc_bind = proc_bind_default;
5205 // Has the number of threads changed?
5206 /* Let's assume the most common case is that the number of threads is
5207 unchanged, and put that case first. */
5208 if (team->t.t_nproc == new_nproc) { // Check changes in number of threads
5209 KA_TRACE(20, ("__kmp_allocate_team: reusing hot team\n"))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: reusing hot team\n"
); }
;
5210 // This case can mean that omp_set_num_threads() was called and the hot
5211 // team size was already reduced, so we check the special flag
5212 if (team->t.t_size_changed == -1) {
5213 team->t.t_size_changed = 1;
5214 } else {
5215 KMP_CHECK_UPDATE(team->t.t_size_changed, 0)if ((team->t.t_size_changed) != (0)) (team->t.t_size_changed
) = (0)
;
5216 }
5217
5218 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
5219 kmp_r_sched_t new_sched = new_icvs->sched;
5220 // set primary thread's schedule as new run-time schedule
5221 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched)if ((team->t.t_sched.sched) != (new_sched.sched)) (team->
t.t_sched.sched) = (new_sched.sched)
;
5222
5223 __kmp_reinitialize_team(team, new_icvs,
5224 root->r.r_uber_thread->th.th_ident);
5225
5226 KF_TRACE(10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n"
, 0, team->t.t_threads[0], team); }
5227 team->t.t_threads[0], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n"
, 0, team->t.t_threads[0], team); }
;
5228 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5229
5230#if KMP_AFFINITY_SUPPORTED1
5231 if ((team->t.t_size_changed == 0) &&
5232 (team->t.t_proc_bind == new_proc_bind)) {
5233 if (new_proc_bind == proc_bind_spread) {
5234 if (do_place_partition) {
5235 // add flag to update only master for spread
5236 __kmp_partition_places(team, 1);
5237 }
5238 }
5239 KA_TRACE(200, ("__kmp_allocate_team: reusing hot team #%d bindings: "if (kmp_a_debug >= 200) { __kmp_debug_printf ("__kmp_allocate_team: reusing hot team #%d bindings: "
"proc_bind = %d, partition = [%d,%d]\n", team->t.t_id, new_proc_bind
, team->t.t_first_place, team->t.t_last_place); }
5240 "proc_bind = %d, partition = [%d,%d]\n",if (kmp_a_debug >= 200) { __kmp_debug_printf ("__kmp_allocate_team: reusing hot team #%d bindings: "
"proc_bind = %d, partition = [%d,%d]\n", team->t.t_id, new_proc_bind
, team->t.t_first_place, team->t.t_last_place); }
5241 team->t.t_id, new_proc_bind, team->t.t_first_place,if (kmp_a_debug >= 200) { __kmp_debug_printf ("__kmp_allocate_team: reusing hot team #%d bindings: "
"proc_bind = %d, partition = [%d,%d]\n", team->t.t_id, new_proc_bind
, team->t.t_first_place, team->t.t_last_place); }
5242 team->t.t_last_place))if (kmp_a_debug >= 200) { __kmp_debug_printf ("__kmp_allocate_team: reusing hot team #%d bindings: "
"proc_bind = %d, partition = [%d,%d]\n", team->t.t_id, new_proc_bind
, team->t.t_first_place, team->t.t_last_place); }
;
5243 } else {
5244 if (do_place_partition) {
5245 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind)if ((team->t.t_proc_bind) != (new_proc_bind)) (team->t.
t_proc_bind) = (new_proc_bind)
;
5246 __kmp_partition_places(team);
5247 }
5248 }
5249#else
5250 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind)if ((team->t.t_proc_bind) != (new_proc_bind)) (team->t.
t_proc_bind) = (new_proc_bind)
;
5251#endif /* KMP_AFFINITY_SUPPORTED */
5252 } else if (team->t.t_nproc > new_nproc) {
5253 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: decreasing hot team thread count to %d\n"
, new_nproc); }
5254 ("__kmp_allocate_team: decreasing hot team thread count to %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: decreasing hot team thread count to %d\n"
, new_nproc); }
5255 new_nproc))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: decreasing hot team thread count to %d\n"
, new_nproc); }
;
5256
5257 team->t.t_size_changed = 1;
5258 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5259 // Barrier size already reduced earlier in this function
5260 // Activate team threads via th_used_in_team
5261 __kmp_add_threads_to_team(team, new_nproc);
5262 }
5263#if KMP_NESTED_HOT_TEAMS1
5264 if (__kmp_hot_teams_mode == 0) {
5265 // AC: saved number of threads should correspond to team's value in this
5266 // mode, can be bigger in mode 1, when hot team has threads in reserve
5267 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc)if (!(hot_teams[level].hot_team_nth == team->t.t_nproc)) {
__kmp_debug_assert("hot_teams[level].hot_team_nth == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5267); }
;
5268 hot_teams[level].hot_team_nth = new_nproc;
5269#endif // KMP_NESTED_HOT_TEAMS
5270 /* release the extra threads we don't need any more */
5271 for (f = new_nproc; f < team->t.t_nproc; f++) {
5272 KMP_DEBUG_ASSERT(team->t.t_threads[f])if (!(team->t.t_threads[f])) { __kmp_debug_assert("team->t.t_threads[f]"
, "openmp/runtime/src/kmp_runtime.cpp", 5272); }
;
5273 if (__kmp_tasking_mode != tskm_immediate_exec) {
5274 // When decreasing team size, threads no longer in the team should
5275 // unref task team.
5276 team->t.t_threads[f]->th.th_task_team = NULL__null;
5277 }
5278 __kmp_free_thread(team->t.t_threads[f]);
5279 team->t.t_threads[f] = NULL__null;
5280 }
5281#if KMP_NESTED_HOT_TEAMS1
5282 } // (__kmp_hot_teams_mode == 0)
5283 else {
5284 // When keeping extra threads in team, switch threads to wait on own
5285 // b_go flag
5286 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5287 KMP_DEBUG_ASSERT(team->t.t_threads[f])if (!(team->t.t_threads[f])) { __kmp_debug_assert("team->t.t_threads[f]"
, "openmp/runtime/src/kmp_runtime.cpp", 5287); }
;
5288 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5289 for (int b = 0; b < bs_last_barrier; ++b) {
5290 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG2) {
5291 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG3;
5292 }
5293 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0)if ((balign[b].bb.leaf_kids) != (0)) (balign[b].bb.leaf_kids)
= (0)
;
5294 }
5295 }
5296 }
5297#endif // KMP_NESTED_HOT_TEAMS
5298 team->t.t_nproc = new_nproc;
5299 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
5300 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched)if ((team->t.t_sched.sched) != (new_icvs->sched.sched))
(team->t.t_sched.sched) = (new_icvs->sched.sched)
;
5301 __kmp_reinitialize_team(team, new_icvs,
5302 root->r.r_uber_thread->th.th_ident);
5303
5304 // Update remaining threads
5305 for (f = 0; f < new_nproc; ++f) {
5306 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5307 }
5308
5309 // restore the current task state of the primary thread: should be the
5310 // implicit task
5311 KF_TRACE(10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n"
, 0, team->t.t_threads[0], team); }
5312 team->t.t_threads[0], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n"
, 0, team->t.t_threads[0], team); }
;
5313
5314 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5315
5316#ifdef KMP_DEBUG1
5317 for (f = 0; f < team->t.t_nproc; f++) {
5318 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&if (!(team->t.t_threads[f] && team->t.t_threads
[f]->th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("team->t.t_threads[f] && team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5320); }
5319 team->t.t_threads[f]->th.th_team_nproc ==if (!(team->t.t_threads[f] && team->t.t_threads
[f]->th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("team->t.t_threads[f] && team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5320); }
5320 team->t.t_nproc)if (!(team->t.t_threads[f] && team->t.t_threads
[f]->th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("team->t.t_threads[f] && team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5320); }
;
5321 }
5322#endif
5323
5324 if (do_place_partition) {
5325 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind)if ((team->t.t_proc_bind) != (new_proc_bind)) (team->t.
t_proc_bind) = (new_proc_bind)
;
5326#if KMP_AFFINITY_SUPPORTED1
5327 __kmp_partition_places(team);
5328#endif
5329 }
5330 } else { // team->t.t_nproc < new_nproc
5331#if (KMP_OS_LINUX1 || KMP_OS_FREEBSD0) && KMP_AFFINITY_SUPPORTED1
5332 kmp_affin_mask_t *old_mask;
5333 if (KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {
5334 KMP_CPU_ALLOC(old_mask)(old_mask = __kmp_affinity_dispatch->allocate_mask());
5335 }
5336#endif
5337
5338 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: increasing hot team thread count to %d\n"
, new_nproc); }
5339 ("__kmp_allocate_team: increasing hot team thread count to %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: increasing hot team thread count to %d\n"
, new_nproc); }
5340 new_nproc))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: increasing hot team thread count to %d\n"
, new_nproc); }
;
5341 int old_nproc = team->t.t_nproc; // save old value and use to update only
5342 team->t.t_size_changed = 1;
5343
5344#if KMP_NESTED_HOT_TEAMS1
5345 int avail_threads = hot_teams[level].hot_team_nth;
5346 if (new_nproc < avail_threads)
5347 avail_threads = new_nproc;
5348 kmp_info_t **other_threads = team->t.t_threads;
5349 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5350 // Adjust barrier data of reserved threads (if any) of the team
5351 // Other data will be set in __kmp_initialize_info() below.
5352 int b;
5353 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5354 for (b = 0; b < bs_last_barrier; ++b) {
5355 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5356 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG)if (!(balign[b].bb.wait_flag != 2)) { __kmp_debug_assert("balign[b].bb.wait_flag != 2"
, "openmp/runtime/src/kmp_runtime.cpp", 5356); }
;
5357#if USE_DEBUGGER0
5358 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5359#endif
5360 }
5361 }
5362 if (hot_teams[level].hot_team_nth >= new_nproc) {
5363 // we have all needed threads in reserve, no need to allocate any
5364 // this only possible in mode 1, cannot have reserved threads in mode 0
5365 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1)if (!(__kmp_hot_teams_mode == 1)) { __kmp_debug_assert("__kmp_hot_teams_mode == 1"
, "openmp/runtime/src/kmp_runtime.cpp", 5365); }
;
5366 team->t.t_nproc = new_nproc; // just get reserved threads involved
5367 } else {
5368 // We may have some threads in reserve, but not enough;
5369 // get reserved threads involved if any.
5370 team->t.t_nproc = hot_teams[level].hot_team_nth;
5371 hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size
5372#endif // KMP_NESTED_HOT_TEAMS
5373 if (team->t.t_max_nproc < new_nproc) {
5374 /* reallocate larger arrays */
5375 __kmp_reallocate_team_arrays(team, new_nproc);
5376 __kmp_reinitialize_team(team, new_icvs, NULL__null);
5377 }
5378
5379#if (KMP_OS_LINUX1 || KMP_OS_FREEBSD0) && KMP_AFFINITY_SUPPORTED1
5380 /* Temporarily set full mask for primary thread before creation of
5381 workers. The reason is that workers inherit the affinity from the
5382 primary thread, so if a lot of workers are created on the single
5383 core quickly, they don't get a chance to set their own affinity for
5384 a long time. */
5385 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
5386#endif
5387
5388 /* allocate new threads for the hot team */
5389 for (f = team->t.t_nproc; f < new_nproc; f++) {
5390 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5391 KMP_DEBUG_ASSERT(new_worker)if (!(new_worker)) { __kmp_debug_assert("new_worker", "openmp/runtime/src/kmp_runtime.cpp"
, 5391); }
;
5392 team->t.t_threads[f] = new_worker;
5393
5394 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init T#%d arrived: "
"join=%llu, plain=%llu\n", team->t.t_id, __kmp_gtid_from_tid
(f, team), team->t.t_id, f, team->t.t_bar[bs_forkjoin_barrier
].b_arrived, team->t.t_bar[bs_plain_barrier].b_arrived); }
5395 ("__kmp_allocate_team: team %d init T#%d arrived: "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init T#%d arrived: "
"join=%llu, plain=%llu\n", team->t.t_id, __kmp_gtid_from_tid
(f, team), team->t.t_id, f, team->t.t_bar[bs_forkjoin_barrier
].b_arrived, team->t.t_bar[bs_plain_barrier].b_arrived); }
5396 "join=%llu, plain=%llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init T#%d arrived: "
"join=%llu, plain=%llu\n", team->t.t_id, __kmp_gtid_from_tid
(f, team), team->t.t_id, f, team->t.t_bar[bs_forkjoin_barrier
].b_arrived, team->t.t_bar[bs_plain_barrier].b_arrived); }
5397 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init T#%d arrived: "
"join=%llu, plain=%llu\n", team->t.t_id, __kmp_gtid_from_tid
(f, team), team->t.t_id, f, team->t.t_bar[bs_forkjoin_barrier
].b_arrived, team->t.t_bar[bs_plain_barrier].b_arrived); }
5398 team->t.t_bar[bs_forkjoin_barrier].b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init T#%d arrived: "
"join=%llu, plain=%llu\n", team->t.t_id, __kmp_gtid_from_tid
(f, team), team->t.t_id, f, team->t.t_bar[bs_forkjoin_barrier
].b_arrived, team->t.t_bar[bs_plain_barrier].b_arrived); }
5399 team->t.t_bar[bs_plain_barrier].b_arrived))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init T#%d arrived: "
"join=%llu, plain=%llu\n", team->t.t_id, __kmp_gtid_from_tid
(f, team), team->t.t_id, f, team->t.t_bar[bs_forkjoin_barrier
].b_arrived, team->t.t_bar[bs_plain_barrier].b_arrived); }
;
5400
5401 { // Initialize barrier data for new threads.
5402 int b;
5403 kmp_balign_t *balign = new_worker->th.th_bar;
5404 for (b = 0; b < bs_last_barrier; ++b) {
5405 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5406 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=if (!(balign[b].bb.wait_flag != 2)) { __kmp_debug_assert("balign[b].bb.wait_flag != 2"
, "openmp/runtime/src/kmp_runtime.cpp", 5407); }
5407 KMP_BARRIER_PARENT_FLAG)if (!(balign[b].bb.wait_flag != 2)) { __kmp_debug_assert("balign[b].bb.wait_flag != 2"
, "openmp/runtime/src/kmp_runtime.cpp", 5407); }
;
5408#if USE_DEBUGGER0
5409 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5410#endif
5411 }
5412 }
5413 }
5414
5415#if (KMP_OS_LINUX1 || KMP_OS_FREEBSD0) && KMP_AFFINITY_SUPPORTED1
5416 if (KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {
5417 /* Restore initial primary thread's affinity mask */
5418 __kmp_set_system_affinity(old_mask, TRUE)(old_mask)->set_system_affinity((!0));
5419 KMP_CPU_FREE(old_mask)__kmp_affinity_dispatch->deallocate_mask(old_mask);
5420 }
5421#endif
5422#if KMP_NESTED_HOT_TEAMS1
5423 } // end of check of t_nproc vs. new_nproc vs. hot_team_nth
5424#endif // KMP_NESTED_HOT_TEAMS
5425 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5426 // Barrier size already increased earlier in this function
5427 // Activate team threads via th_used_in_team
5428 __kmp_add_threads_to_team(team, new_nproc);
5429 }
5430 /* make sure everyone is syncronized */
5431 // new threads below
5432 __kmp_initialize_team(team, new_nproc, new_icvs,
5433 root->r.r_uber_thread->th.th_ident);
5434
5435 /* reinitialize the threads */
5436 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc)if (!(team->t.t_nproc == new_nproc)) { __kmp_debug_assert(
"team->t.t_nproc == new_nproc", "openmp/runtime/src/kmp_runtime.cpp"
, 5436); }
;
5437 for (f = 0; f < team->t.t_nproc; ++f)
5438 __kmp_initialize_info(team->t.t_threads[f], team, f,
5439 __kmp_gtid_from_tid(f, team));
5440
5441 if (level) { // set th_task_state for new threads in nested hot team
5442 // __kmp_initialize_info() no longer zeroes th_task_state, so we should
5443 // only need to set the th_task_state for the new threads. th_task_state
5444 // for primary thread will not be accurate until after this in
5445 // __kmp_fork_call(), so we look to the primary thread's memo_stack to
5446 // get the correct value.
5447 for (f = old_nproc; f < team->t.t_nproc; ++f)
5448 team->t.t_threads[f]->th.th_task_state =
5449 team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5450 } else { // set th_task_state for new threads in non-nested hot team
5451 // copy primary thread's state
5452 kmp_uint8 old_state = team->t.t_threads[0]->th.th_task_state;
5453 for (f = old_nproc; f < team->t.t_nproc; ++f)
5454 team->t.t_threads[f]->th.th_task_state = old_state;
5455 }
5456
5457#ifdef KMP_DEBUG1
5458 for (f = 0; f < team->t.t_nproc; ++f) {
5459 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&if (!(team->t.t_threads[f] && team->t.t_threads
[f]->th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("team->t.t_threads[f] && team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5461); }
5460 team->t.t_threads[f]->th.th_team_nproc ==if (!(team->t.t_threads[f] && team->t.t_threads
[f]->th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("team->t.t_threads[f] && team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5461); }
5461 team->t.t_nproc)if (!(team->t.t_threads[f] && team->t.t_threads
[f]->th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("team->t.t_threads[f] && team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5461); }
;
5462 }
5463#endif
5464
5465 if (do_place_partition) {
5466 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind)if ((team->t.t_proc_bind) != (new_proc_bind)) (team->t.
t_proc_bind) = (new_proc_bind)
;
5467#if KMP_AFFINITY_SUPPORTED1
5468 __kmp_partition_places(team);
5469#endif
5470 }
5471 } // Check changes in number of threads
5472
5473 kmp_info_t *master = team->t.t_threads[0];
5474 if (master->th.th_teams_microtask) {
5475 for (f = 1; f < new_nproc; ++f) {
5476 // propagate teams construct specific info to workers
5477 kmp_info_t *thr = team->t.t_threads[f];
5478 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5479 thr->th.th_teams_level = master->th.th_teams_level;
5480 thr->th.th_teams_size = master->th.th_teams_size;
5481 }
5482 }
5483#if KMP_NESTED_HOT_TEAMS1
5484 if (level) {
5485 // Sync barrier state for nested hot teams, not needed for outermost hot
5486 // team.
5487 for (f = 1; f < new_nproc; ++f) {
5488 kmp_info_t *thr = team->t.t_threads[f];
5489 int b;
5490 kmp_balign_t *balign = thr->th.th_bar;
5491 for (b = 0; b < bs_last_barrier; ++b) {
5492 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5493 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG)if (!(balign[b].bb.wait_flag != 2)) { __kmp_debug_assert("balign[b].bb.wait_flag != 2"
, "openmp/runtime/src/kmp_runtime.cpp", 5493); }
;
5494#if USE_DEBUGGER0
5495 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5496#endif
5497 }
5498 }
5499 }
5500#endif // KMP_NESTED_HOT_TEAMS
5501
5502 /* reallocate space for arguments if necessary */
5503 __kmp_alloc_argv_entries(argc, team, TRUE(!0));
5504 KMP_CHECK_UPDATE(team->t.t_argc, argc)if ((team->t.t_argc) != (argc)) (team->t.t_argc) = (argc
)
;
5505 // The hot team re-uses the previous task team,
5506 // if untouched during the previous release->gather phase.
5507
5508 KF_TRACE(10, (" hot_team = %p\n", team))if (kmp_f_debug >= 10) { __kmp_debug_printf (" hot_team = %p\n"
, team); }
;
5509
5510#if KMP_DEBUG1
5511 if (__kmp_tasking_mode != tskm_immediate_exec) {
5512 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: hot team task_team[0] = %p "
"task_team[1] = %p after reinit\n", team->t.t_task_team[0
], team->t.t_task_team[1]); }
5513 "task_team[1] = %p after reinit\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: hot team task_team[0] = %p "
"task_team[1] = %p after reinit\n", team->t.t_task_team[0
], team->t.t_task_team[1]); }
5514 team->t.t_task_team[0], team->t.t_task_team[1]))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: hot team task_team[0] = %p "
"task_team[1] = %p after reinit\n", team->t.t_task_team[0
], team->t.t_task_team[1]); }
;
5515 }
5516#endif
5517
5518#if OMPT_SUPPORT1
5519 __ompt_team_assign_id(team, ompt_parallel_data);
5520#endif
5521
5522 KMP_MB();
5523
5524 return team;
5525 }
5526
5527 /* next, let's try to take one from the team pool */
5528 KMP_MB();
5529 for (team = CCAST(kmp_team_t *, __kmp_team_pool)const_cast<kmp_team_t *>(__kmp_team_pool); (team);) {
5530 /* TODO: consider resizing undersized teams instead of reaping them, now
5531 that we have a resizing mechanism */
5532 if (team->t.t_max_nproc >= max_nproc) {
5533 /* take this team from the team pool */
5534 __kmp_team_pool = team->t.t_next_pool;
5535
5536 if (max_nproc > 1 &&
5537 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5538 if (!team->t.b) { // Allocate barrier structure
5539 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5540 }
5541 }
5542
5543 /* setup the team for fresh use */
5544 __kmp_initialize_team(team, new_nproc, new_icvs, NULL__null);
5545
5546 KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: setting task_team[0] %p and "
"task_team[1] %p to NULL\n", &team->t.t_task_team[0],
&team->t.t_task_team[1]); }
5547 "task_team[1] %p to NULL\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: setting task_team[0] %p and "
"task_team[1] %p to NULL\n", &team->t.t_task_team[0],
&team->t.t_task_team[1]); }
5548 &team->t.t_task_team[0], &team->t.t_task_team[1]))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: setting task_team[0] %p and "
"task_team[1] %p to NULL\n", &team->t.t_task_team[0],
&team->t.t_task_team[1]); }
;
5549 team->t.t_task_team[0] = NULL__null;
5550 team->t.t_task_team[1] = NULL__null;
5551
5552 /* reallocate space for arguments if necessary */
5553 __kmp_alloc_argv_entries(argc, team, TRUE(!0));
5554 KMP_CHECK_UPDATE(team->t.t_argc, argc)if ((team->t.t_argc) != (argc)) (team->t.t_argc) = (argc
)
;
5555
5556 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n"
, team->t.t_id, 0, 0); }
5557 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n"
, team->t.t_id, 0, 0); }
5558 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n"
, team->t.t_id, 0, 0); }
;
5559 { // Initialize barrier data.
5560 int b;
5561 for (b = 0; b < bs_last_barrier; ++b) {
5562 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE0;
5563#if USE_DEBUGGER0
5564 team->t.t_bar[b].b_master_arrived = 0;
5565 team->t.t_bar[b].b_team_arrived = 0;
5566#endif
5567 }
5568 }
5569
5570 team->t.t_proc_bind = new_proc_bind;
5571
5572 KA_TRACE(20, ("__kmp_allocate_team: using team from pool %d.\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: using team from pool %d.\n"
, team->t.t_id); }
5573 team->t.t_id))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: using team from pool %d.\n"
, team->t.t_id); }
;
5574
5575#if OMPT_SUPPORT1
5576 __ompt_team_assign_id(team, ompt_parallel_data);
5577#endif
5578
5579 KMP_MB();
5580
5581 return team;
5582 }
5583
5584 /* reap team if it is too small, then loop back and check the next one */
5585 // not sure if this is wise, but, will be redone during the hot-teams
5586 // rewrite.
5587 /* TODO: Use technique to find the right size hot-team, don't reap them */
5588 team = __kmp_reap_team(team);
5589 __kmp_team_pool = team;
5590 }
5591
5592 /* nothing available in the pool, no matter, make a new team! */
5593 KMP_MB();
5594 team = (kmp_team_t *)__kmp_allocate(sizeof(kmp_team_t))___kmp_allocate((sizeof(kmp_team_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 5594)
;
5595
5596 /* and set it up */
5597 team->t.t_max_nproc = max_nproc;
5598 if (max_nproc > 1 &&
5599 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5600 // Allocate barrier structure
5601 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5602 }
5603
5604 /* NOTE well, for some reason allocating one big buffer and dividing it up
5605 seems to really hurt performance a lot on the P4, so, let's not use this */
5606 __kmp_allocate_team_arrays(team, max_nproc);
5607
5608 KA_TRACE(20, ("__kmp_allocate_team: making a new team\n"))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: making a new team\n"
); }
;
5609 __kmp_initialize_team(team, new_nproc, new_icvs, NULL__null);
5610
5611 KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
"%p to NULL\n", &team->t.t_task_team[0], &team->
t.t_task_team[1]); }
5612 "%p to NULL\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
"%p to NULL\n", &team->t.t_task_team[0], &team->
t.t_task_team[1]); }
5613 &team->t.t_task_team[0], &team->t.t_task_team[1]))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
"%p to NULL\n", &team->t.t_task_team[0], &team->
t.t_task_team[1]); }
;
5614 team->t.t_task_team[0] = NULL__null; // to be removed, as __kmp_allocate zeroes
5615 // memory, no need to duplicate
5616 team->t.t_task_team[1] = NULL__null; // to be removed, as __kmp_allocate zeroes
5617 // memory, no need to duplicate
5618
5619 if (__kmp_storage_map) {
5620 __kmp_print_team_storage_map("team", team, team->t.t_id, new_nproc);
5621 }
5622
5623 /* allocate space for arguments */
5624 __kmp_alloc_argv_entries(argc, team, FALSE0);
5625 team->t.t_argc = argc;
5626
5627 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n"
, team->t.t_id, 0, 0); }
5628 ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n"
, team->t.t_id, 0, 0); }
5629 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n"
, team->t.t_id, 0, 0); }
;
5630 { // Initialize barrier data.
5631 int b;
5632 for (b = 0; b < bs_last_barrier; ++b) {
5633 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE0;
5634#if USE_DEBUGGER0
5635 team->t.t_bar[b].b_master_arrived = 0;
5636 team->t.t_bar[b].b_team_arrived = 0;
5637#endif
5638 }
5639 }
5640
5641 team->t.t_proc_bind = new_proc_bind;
5642
5643#if OMPT_SUPPORT1
5644 __ompt_team_assign_id(team, ompt_parallel_data);
5645 team->t.ompt_serialized_team_info = NULL__null;
5646#endif
5647
5648 KMP_MB();
5649
5650 KA_TRACE(20, ("__kmp_allocate_team: done creating a new team %d.\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: done creating a new team %d.\n"
, team->t.t_id); }
5651 team->t.t_id))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: done creating a new team %d.\n"
, team->t.t_id); }
;
5652
5653 return team;
5654}
5655
5656/* TODO implement hot-teams at all levels */
5657/* TODO implement lazy thread release on demand (disband request) */
5658
5659/* free the team. return it to the team pool. release all the threads
5660 * associated with it */
5661void __kmp_free_team(kmp_root_t *root,
5662 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master), kmp_info_t *master) {
5663 int f;
5664 KA_TRACE(20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_team: T#%d freeing team %d\n"
, __kmp_get_global_thread_id(), team->t.t_id); }
5665 team->t.t_id))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_team: T#%d freeing team %d\n"
, __kmp_get_global_thread_id(), team->t.t_id); }
;
5666
5667 /* verify state */
5668 KMP_DEBUG_ASSERT(root)if (!(root)) { __kmp_debug_assert("root", "openmp/runtime/src/kmp_runtime.cpp"
, 5668); }
;
5669 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 5669); }
;
5670 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc)if (!(team->t.t_nproc <= team->t.t_max_nproc)) { __kmp_debug_assert
("team->t.t_nproc <= team->t.t_max_nproc", "openmp/runtime/src/kmp_runtime.cpp"
, 5670); }
;
5671 KMP_DEBUG_ASSERT(team->t.t_threads)if (!(team->t.t_threads)) { __kmp_debug_assert("team->t.t_threads"
, "openmp/runtime/src/kmp_runtime.cpp", 5671); }
;
5672
5673 int use_hot_team = team == root->r.r_hot_team;
5674#if KMP_NESTED_HOT_TEAMS1
5675 int level;
5676 if (master) {
5677 level = team->t.t_active_level - 1;
5678 if (master->th.th_teams_microtask) { // in teams construct?
5679 if (master->th.th_teams_size.nteams > 1) {
5680 ++level; // level was not increased in teams construct for
5681 // team_of_masters
5682 }
5683 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5684 master->th.th_teams_level == team->t.t_level) {
5685 ++level; // level was not increased in teams construct for
5686 // team_of_workers before the parallel
5687 } // team->t.t_level will be increased inside parallel
5688 }
5689#if KMP_DEBUG1
5690 kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;
5691#endif
5692 if (level < __kmp_hot_teams_max_level) {
5693 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team)if (!(team == hot_teams[level].hot_team)) { __kmp_debug_assert
("team == hot_teams[level].hot_team", "openmp/runtime/src/kmp_runtime.cpp"
, 5693); }
;
5694 use_hot_team = 1;
5695 }
5696 }
5697#endif // KMP_NESTED_HOT_TEAMS
5698
5699 /* team is done working */
5700 TCW_SYNC_PTR(team->t.t_pkfn,((team->t.t_pkfn)) = ((__null))
5701 NULL)((team->t.t_pkfn)) = ((__null)); // Important for Debugging Support Library.
5702#if KMP_OS_WINDOWS0
5703 team->t.t_copyin_counter = 0; // init counter for possible reuse
5704#endif
5705 // Do not reset pointer to parent team to NULL for hot teams.
5706
5707 /* if we are non-hot team, release our threads */
5708 if (!use_hot_team) {
5709 if (__kmp_tasking_mode != tskm_immediate_exec) {
5710 // Wait for threads to reach reapable state
5711 for (f = 1; f < team->t.t_nproc; ++f) {
5712 KMP_DEBUG_ASSERT(team->t.t_threads[f])if (!(team->t.t_threads[f])) { __kmp_debug_assert("team->t.t_threads[f]"
, "openmp/runtime/src/kmp_runtime.cpp", 5712); }
;
5713 kmp_info_t *th = team->t.t_threads[f];
5714 volatile kmp_uint32 *state = &th->th.th_reap_state;
5715 while (*state != KMP_SAFE_TO_REAP1) {
5716#if KMP_OS_WINDOWS0
5717 // On Windows a thread can be killed at any time, check this
5718 DWORD ecode;
5719 if (!__kmp_is_thread_alive(th, &ecode)) {
5720 *state = KMP_SAFE_TO_REAP1; // reset the flag for dead thread
5721 break;
5722 }
5723#endif
5724 // first check if thread is sleeping
5725 kmp_flag_64<> fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5726 if (fl.is_sleeping())
5727 fl.resume(__kmp_gtid_from_thread(th));
5728 KMP_CPU_PAUSE()__kmp_x86_pause();
5729 }
5730 }
5731
5732 // Delete task teams
5733 int tt_idx;
5734 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5735 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5736 if (task_team != NULL__null) {
5737 for (f = 0; f < team->t.t_nproc; ++f) { // threads unref task teams
5738 KMP_DEBUG_ASSERT(team->t.t_threads[f])if (!(team->t.t_threads[f])) { __kmp_debug_assert("team->t.t_threads[f]"
, "openmp/runtime/src/kmp_runtime.cpp", 5738); }
;
5739 team->t.t_threads[f]->th.th_task_team = NULL__null;
5740 }
5741 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n"
, __kmp_get_global_thread_id(), task_team, team->t.t_id); }
5742 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n"
, __kmp_get_global_thread_id(), task_team, team->t.t_id); }
5743 ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n"
, __kmp_get_global_thread_id(), task_team, team->t.t_id); }
5744 __kmp_get_gtid(), task_team, team->t.t_id))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n"
, __kmp_get_global_thread_id(), task_team, team->t.t_id); }
;
5745#if KMP_NESTED_HOT_TEAMS1
5746 __kmp_free_task_team(master, task_team);
5747#endif
5748 team->t.t_task_team[tt_idx] = NULL__null;
5749 }
5750 }
5751 }
5752
5753 // Reset pointer to parent team only for non-hot teams.
5754 team->t.t_parent = NULL__null;
5755 team->t.t_level = 0;
5756 team->t.t_active_level = 0;
5757
5758 /* free the worker threads */
5759 for (f = 1; f < team->t.t_nproc; ++f) {
5760 KMP_DEBUG_ASSERT(team->t.t_threads[f])if (!(team->t.t_threads[f])) { __kmp_debug_assert("team->t.t_threads[f]"
, "openmp/runtime/src/kmp_runtime.cpp", 5760); }
;
5761 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5762 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team),__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(team
->t.t_threads[f]->th.th_used_in_team)), (kmp_uint32)(1)
, (kmp_uint32)(2))
5763 1, 2)__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(team
->t.t_threads[f]->th.th_used_in_team)), (kmp_uint32)(1)
, (kmp_uint32)(2))
;
5764 }
5765 __kmp_free_thread(team->t.t_threads[f]);
5766 }
5767
5768 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5769 if (team->t.b) {
5770 // wake up thread at old location
5771 team->t.b->go_release();
5772 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) {
5773 for (f = 1; f < team->t.t_nproc; ++f) {
5774 if (team->t.b->sleep[f].sleep) {
5775 __kmp_atomic_resume_64(
5776 team->t.t_threads[f]->th.th_info.ds.ds_gtid,
5777 (kmp_atomic_flag_64<> *)NULL__null);
5778 }
5779 }
5780 }
5781 // Wait for threads to be removed from team
5782 for (int f = 1; f < team->t.t_nproc; ++f) {
5783 while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
5784 KMP_CPU_PAUSE()__kmp_x86_pause();
5785 }
5786 }
5787 }
5788
5789 for (f = 1; f < team->t.t_nproc; ++f) {
5790 team->t.t_threads[f] = NULL__null;
5791 }
5792
5793 if (team->t.t_max_nproc > 1 &&
5794 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5795 distributedBarrier::deallocate(team->t.b);
5796 team->t.b = NULL__null;
5797 }
5798 /* put the team back in the team pool */
5799 /* TODO limit size of team pool, call reap_team if pool too large */
5800 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool)const_cast<kmp_team_t *>(__kmp_team_pool);
5801 __kmp_team_pool = (volatile kmp_team_t *)team;
5802 } else { // Check if team was created for primary threads in teams construct
5803 // See if first worker is a CG root
5804 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&if (!(team->t.t_threads[1] && team->t.t_threads
[1]->th.th_cg_roots)) { __kmp_debug_assert("team->t.t_threads[1] && team->t.t_threads[1]->th.th_cg_roots"
, "openmp/runtime/src/kmp_runtime.cpp", 5805); }
5805 team->t.t_threads[1]->th.th_cg_roots)if (!(team->t.t_threads[1] && team->t.t_threads
[1]->th.th_cg_roots)) { __kmp_debug_assert("team->t.t_threads[1] && team->t.t_threads[1]->th.th_cg_roots"
, "openmp/runtime/src/kmp_runtime.cpp", 5805); }
;
5806 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5807 // Clean up the CG root nodes on workers so that this team can be re-used
5808 for (f = 1; f < team->t.t_nproc; ++f) {
5809 kmp_info_t *thr = team->t.t_threads[f];
5810 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&if (!(thr && thr->th.th_cg_roots && thr->
th.th_cg_roots->cg_root == thr)) { __kmp_debug_assert("thr && thr->th.th_cg_roots && thr->th.th_cg_roots->cg_root == thr"
, "openmp/runtime/src/kmp_runtime.cpp", 5811); }
5811 thr->th.th_cg_roots->cg_root == thr)if (!(thr && thr->th.th_cg_roots && thr->
th.th_cg_roots->cg_root == thr)) { __kmp_debug_assert("thr && thr->th.th_cg_roots && thr->th.th_cg_roots->cg_root == thr"
, "openmp/runtime/src/kmp_runtime.cpp", 5811); }
;
5812 // Pop current CG root off list
5813 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5814 thr->th.th_cg_roots = tmp->up;
5815 KA_TRACE(100, ("__kmp_free_team: Thread %p popping node %p and moving"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_team: Thread %p popping node %p and moving"
" up to node %p. cg_nthreads was %d\n", thr, tmp, thr->th
.th_cg_roots, tmp->cg_nthreads); }
5816 " up to node %p. cg_nthreads was %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_team: Thread %p popping node %p and moving"
" up to node %p. cg_nthreads was %d\n", thr, tmp, thr->th
.th_cg_roots, tmp->cg_nthreads); }
5817 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_team: Thread %p popping node %p and moving"
" up to node %p. cg_nthreads was %d\n", thr, tmp, thr->th
.th_cg_roots, tmp->cg_nthreads); }
;
5818 int i = tmp->cg_nthreads--;
5819 if (i == 1) {
5820 __kmp_free(tmp)___kmp_free((tmp), "openmp/runtime/src/kmp_runtime.cpp", 5820
)
; // free CG if we are the last thread in it
5821 }
5822 // Restore current task's thread_limit from CG root
5823 if (thr->th.th_cg_roots)
5824 thr->th.th_current_task->td_icvs.thread_limit =
5825 thr->th.th_cg_roots->cg_thread_limit;
5826 }
5827 }
5828 }
5829
5830 KMP_MB();
5831}
5832
5833/* reap the team. destroy it, reclaim all its resources and free its memory */
5834kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5835 kmp_team_t *next_pool = team->t.t_next_pool;
5836
5837 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 5837); }
;
5838 KMP_DEBUG_ASSERT(team->t.t_dispatch)if (!(team->t.t_dispatch)) { __kmp_debug_assert("team->t.t_dispatch"
, "openmp/runtime/src/kmp_runtime.cpp", 5838); }
;
5839 KMP_DEBUG_ASSERT(team->t.t_disp_buffer)if (!(team->t.t_disp_buffer)) { __kmp_debug_assert("team->t.t_disp_buffer"
, "openmp/runtime/src/kmp_runtime.cpp", 5839); }
;
5840 KMP_DEBUG_ASSERT(team->t.t_threads)if (!(team->t.t_threads)) { __kmp_debug_assert("team->t.t_threads"
, "openmp/runtime/src/kmp_runtime.cpp", 5840); }
;
5841 KMP_DEBUG_ASSERT(team->t.t_argv)if (!(team->t.t_argv)) { __kmp_debug_assert("team->t.t_argv"
, "openmp/runtime/src/kmp_runtime.cpp", 5841); }
;
5842
5843 /* TODO clean the threads that are a part of this? */
5844
5845 /* free stuff */
5846 __kmp_free_team_arrays(team);
5847 if (team->t.t_argv != &team->t.t_inline_argv[0])
5848 __kmp_free((void *)team->t.t_argv)___kmp_free(((void *)team->t.t_argv), "openmp/runtime/src/kmp_runtime.cpp"
, 5848)
;
5849 __kmp_free(team)___kmp_free((team), "openmp/runtime/src/kmp_runtime.cpp", 5849
)
;
5850
5851 KMP_MB();
5852 return next_pool;
5853}
5854
5855// Free the thread. Don't reap it, just place it on the pool of available
5856// threads.
5857//
5858// Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid
5859// binding for the affinity mechanism to be useful.
5860//
5861// Now, we always keep the free list (__kmp_thread_pool) sorted by gtid.
5862// However, we want to avoid a potential performance problem by always
5863// scanning through the list to find the correct point at which to insert
5864// the thread (potential N**2 behavior). To do this we keep track of the
5865// last place a thread struct was inserted (__kmp_thread_pool_insert_pt).
5866// With single-level parallelism, threads will always be added to the tail
5867// of the list, kept track of by __kmp_thread_pool_insert_pt. With nested
5868// parallelism, all bets are off and we may need to scan through the entire
5869// free list.
5870//
5871// This change also has a potentially large performance benefit, for some
5872// applications. Previously, as threads were freed from the hot team, they
5873// would be placed back on the free list in inverse order. If the hot team
5874// grew back to it's original size, then the freed thread would be placed
5875// back on the hot team in reverse order. This could cause bad cache
5876// locality problems on programs where the size of the hot team regularly
5877// grew and shrunk.
5878//
5879// Now, for single-level parallelism, the OMP tid is always == gtid.
5880void __kmp_free_thread(kmp_info_t *this_th) {
5881 int gtid;
5882 kmp_info_t **scan;
5883
5884 KA_TRACE(20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n"
, __kmp_get_global_thread_id(), this_th->th.th_info.ds.ds_gtid
); }
5885 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n"
, __kmp_get_global_thread_id(), this_th->th.th_info.ds.ds_gtid
); }
;
5886
5887 KMP_DEBUG_ASSERT(this_th)if (!(this_th)) { __kmp_debug_assert("this_th", "openmp/runtime/src/kmp_runtime.cpp"
, 5887); }
;
5888
5889 // When moving thread to pool, switch thread to wait on own b_go flag, and
5890 // uninitialized (NULL team).
5891 int b;
5892 kmp_balign_t *balign = this_th->th.th_bar;
5893 for (b = 0; b < bs_last_barrier; ++b) {
5894 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG2)
5895 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG3;
5896 balign[b].bb.team = NULL__null;
5897 balign[b].bb.leaf_kids = 0;
5898 }
5899 this_th->th.th_task_state = 0;
5900 this_th->th.th_reap_state = KMP_SAFE_TO_REAP1;
5901
5902 /* put thread back on the free pool */
5903 TCW_PTR(this_th->th.th_team, NULL)((this_th->th.th_team)) = ((__null));
5904 TCW_PTR(this_th->th.th_root, NULL)((this_th->th.th_root)) = ((__null));
5905 TCW_PTR(this_th->th.th_dispatch, NULL)((this_th->th.th_dispatch)) = ((__null)); /* NOT NEEDED */
5906
5907 while (this_th->th.th_cg_roots) {
5908 this_th->th.th_cg_roots->cg_nthreads--;
5909 KA_TRACE(100, ("__kmp_free_thread: Thread %p decrement cg_nthreads on node"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_thread: Thread %p decrement cg_nthreads on node"
" %p of thread %p to %d\n", this_th, this_th->th.th_cg_roots
, this_th->th.th_cg_roots->cg_root, this_th->th.th_cg_roots
->cg_nthreads); }
5910 " %p of thread %p to %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_thread: Thread %p decrement cg_nthreads on node"
" %p of thread %p to %d\n", this_th, this_th->th.th_cg_roots
, this_th->th.th_cg_roots->cg_root, this_th->th.th_cg_roots
->cg_nthreads); }
5911 this_th, this_th->th.th_cg_roots,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_thread: Thread %p decrement cg_nthreads on node"
" %p of thread %p to %d\n", this_th, this_th->th.th_cg_roots
, this_th->th.th_cg_roots->cg_root, this_th->th.th_cg_roots
->cg_nthreads); }
5912 this_th->th.th_cg_roots->cg_root,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_thread: Thread %p decrement cg_nthreads on node"
" %p of thread %p to %d\n", this_th, this_th->th.th_cg_roots
, this_th->th.th_cg_roots->cg_root, this_th->th.th_cg_roots
->cg_nthreads); }
5913 this_th->th.th_cg_roots->cg_nthreads))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_thread: Thread %p decrement cg_nthreads on node"
" %p of thread %p to %d\n", this_th, this_th->th.th_cg_roots
, this_th->th.th_cg_roots->cg_root, this_th->th.th_cg_roots
->cg_nthreads); }
;
5914 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5915 if (tmp->cg_root == this_th) { // Thread is a cg_root
5916 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0)if (!(tmp->cg_nthreads == 0)) { __kmp_debug_assert("tmp->cg_nthreads == 0"
, "openmp/runtime/src/kmp_runtime.cpp", 5916); }
;
5917 KA_TRACE(if (kmp_a_debug >= 5) { __kmp_debug_printf ("__kmp_free_thread: Thread %p freeing node %p\n"
, this_th, tmp); }
5918 5, ("__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp))if (kmp_a_debug >= 5) { __kmp_debug_printf ("__kmp_free_thread: Thread %p freeing node %p\n"
, this_th, tmp); }
;
5919 this_th->th.th_cg_roots = tmp->up;
5920 __kmp_free(tmp)___kmp_free((tmp), "openmp/runtime/src/kmp_runtime.cpp", 5920
)
;
5921 } else { // Worker thread
5922 if (tmp->cg_nthreads == 0) { // last thread leaves contention group
5923 __kmp_free(tmp)___kmp_free((tmp), "openmp/runtime/src/kmp_runtime.cpp", 5923
)
;
5924 }
5925 this_th->th.th_cg_roots = NULL__null;
5926 break;
5927 }
5928 }
5929
5930 /* If the implicit task assigned to this thread can be used by other threads
5931 * -> multiple threads can share the data and try to free the task at
5932 * __kmp_reap_thread at exit. This duplicate use of the task data can happen
5933 * with higher probability when hot team is disabled but can occurs even when
5934 * the hot team is enabled */
5935 __kmp_free_implicit_task(this_th);
5936 this_th->th.th_current_task = NULL__null;
5937
5938 // If the __kmp_thread_pool_insert_pt is already past the new insert
5939 // point, then we need to re-scan the entire list.
5940 gtid = this_th->th.th_info.ds.ds_gtid;
5941 if (__kmp_thread_pool_insert_pt != NULL__null) {
5942 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL)if (!(__kmp_thread_pool != __null)) { __kmp_debug_assert("__kmp_thread_pool != __null"
, "openmp/runtime/src/kmp_runtime.cpp", 5942); }
;
5943 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5944 __kmp_thread_pool_insert_pt = NULL__null;
5945 }
5946 }
5947
5948 // Scan down the list to find the place to insert the thread.
5949 // scan is the address of a link in the list, possibly the address of
5950 // __kmp_thread_pool itself.
5951 //
5952 // In the absence of nested parallelism, the for loop will have 0 iterations.
5953 if (__kmp_thread_pool_insert_pt != NULL__null) {
5954 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5955 } else {
5956 scan = CCAST(kmp_info_t **, &__kmp_thread_pool)const_cast<kmp_info_t **>(&__kmp_thread_pool);
5957 }
5958 for (; (*scan != NULL__null) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5959 scan = &((*scan)->th.th_next_pool))
5960 ;
5961
5962 // Insert the new element on the list, and set __kmp_thread_pool_insert_pt
5963 // to its address.
5964 TCW_PTR(this_th->th.th_next_pool, *scan)((this_th->th.th_next_pool)) = ((*scan));
5965 __kmp_thread_pool_insert_pt = *scan = this_th;
5966 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||if (!((this_th->th.th_next_pool == __null) || (this_th->
th.th_info.ds.ds_gtid < this_th->th.th_next_pool->th
.th_info.ds.ds_gtid))) { __kmp_debug_assert("(this_th->th.th_next_pool == __null) || (this_th->th.th_info.ds.ds_gtid < this_th->th.th_next_pool->th.th_info.ds.ds_gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 5968); }
5967 (this_th->th.th_info.ds.ds_gtid <if (!((this_th->th.th_next_pool == __null) || (this_th->
th.th_info.ds.ds_gtid < this_th->th.th_next_pool->th
.th_info.ds.ds_gtid))) { __kmp_debug_assert("(this_th->th.th_next_pool == __null) || (this_th->th.th_info.ds.ds_gtid < this_th->th.th_next_pool->th.th_info.ds.ds_gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 5968); }
5968 this_th->th.th_next_pool->th.th_info.ds.ds_gtid))if (!((this_th->th.th_next_pool == __null) || (this_th->
th.th_info.ds.ds_gtid < this_th->th.th_next_pool->th
.th_info.ds.ds_gtid))) { __kmp_debug_assert("(this_th->th.th_next_pool == __null) || (this_th->th.th_info.ds.ds_gtid < this_th->th.th_next_pool->th.th_info.ds.ds_gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 5968); }
;
5969 TCW_4(this_th->th.th_in_pool, TRUE)(this_th->th.th_in_pool) = ((!0));
5970 __kmp_suspend_initialize_thread(this_th);
5971 __kmp_lock_suspend_mx(this_th);
5972 if (this_th->th.th_active == TRUE(!0)) {
5973 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth)(&__kmp_thread_pool_active_nth)->fetch_add(1, std::memory_order_acq_rel
)
;
5974 this_th->th.th_active_in_pool = TRUE(!0);
5975 }
5976#if KMP_DEBUG1
5977 else {
5978 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE)if (!(this_th->th.th_active_in_pool == 0)) { __kmp_debug_assert
("this_th->th.th_active_in_pool == 0", "openmp/runtime/src/kmp_runtime.cpp"
, 5978); }
;
5979 }
5980#endif
5981 __kmp_unlock_suspend_mx(this_th);
5982
5983 TCW_4(__kmp_nth, __kmp_nth - 1)(__kmp_nth) = (__kmp_nth - 1);
5984
5985#ifdef KMP_ADJUST_BLOCKTIME1
5986 /* Adjust blocktime back to user setting or default if necessary */
5987 /* Middle initialization might never have occurred */
5988 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5989 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0)if (!(__kmp_avail_proc > 0)) { __kmp_debug_assert("__kmp_avail_proc > 0"
, "openmp/runtime/src/kmp_runtime.cpp", 5989); }
;
5990 if (__kmp_nth <= __kmp_avail_proc) {
5991 __kmp_zero_bt = FALSE0;
5992 }
5993 }
5994#endif /* KMP_ADJUST_BLOCKTIME */
5995
5996 KMP_MB();
5997}
5998
5999/* ------------------------------------------------------------------------ */
6000
6001void *__kmp_launch_thread(kmp_info_t *this_thr) {
6002#if OMP_PROFILING_SUPPORT0
6003 ProfileTraceFile = getenv("LIBOMPTARGET_PROFILE");
6004 // TODO: add a configuration option for time granularity
6005 if (ProfileTraceFile)
6006 llvm::timeTraceProfilerInitialize(500 /* us */, "libomptarget");
6007#endif
6008
6009 int gtid = this_thr->th.th_info.ds.ds_gtid;
6010 /* void *stack_data;*/
6011 kmp_team_t **volatile pteam;
6012
6013 KMP_MB();
6014 KA_TRACE(10, ("__kmp_launch_thread: T#%d start\n", gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_launch_thread: T#%d start\n"
, gtid); }
;
6015
6016 if (__kmp_env_consistency_check) {
6017 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid); // ATT: Memory leak?
6018 }
6019
6020#if OMPD_SUPPORT1
6021 if (ompd_state & OMPD_ENABLE_BP0x1)
6022 ompd_bp_thread_begin();
6023#endif
6024
6025#if OMPT_SUPPORT1
6026 ompt_data_t *thread_data = nullptr;
6027 if (ompt_enabled.enabled) {
6028 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
6029 *thread_data = ompt_data_none{0};
6030
6031 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6032 this_thr->th.ompt_thread_info.wait_id = 0;
6033 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
6034 this_thr->th.ompt_thread_info.parallel_flags = 0;
6035 if (ompt_enabled.ompt_callback_thread_begin) {
6036 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)ompt_callback_thread_begin_callback(
6037 ompt_thread_worker, thread_data);
6038 }
6039 this_thr->th.ompt_thread_info.state = ompt_state_idle;
6040 }
6041#endif
6042
6043 /* This is the place where threads wait for work */
6044 while (!TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) {
6045 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid])if (!(this_thr == __kmp_threads[gtid])) { __kmp_debug_assert(
"this_thr == __kmp_threads[gtid]", "openmp/runtime/src/kmp_runtime.cpp"
, 6045); }
;
6046 KMP_MB();
6047
6048 /* wait for work to do */
6049 KA_TRACE(20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_launch_thread: T#%d waiting for work\n"
, gtid); }
;
6050
6051 /* No tid yet since not part of a team */
6052 __kmp_fork_barrier(gtid, KMP_GTID_DNE(-2));
6053
6054#if OMPT_SUPPORT1
6055 if (ompt_enabled.enabled) {
6056 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6057 }
6058#endif
6059
6060 pteam = &this_thr->th.th_team;
6061
6062 /* have we been allocated? */
6063 if (TCR_SYNC_PTR(*pteam)((void *)(*pteam)) && !TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) {
6064 /* we were just woken up, so run our new task */
6065 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn)((void *)((*pteam)->t.t_pkfn)) != NULL__null) {
6066 int rc;
6067 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n"
, gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam
)->t.t_pkfn); }
6068 ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n"
, gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam
)->t.t_pkfn); }
6069 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n"
, gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam
)->t.t_pkfn); }
6070 (*pteam)->t.t_pkfn))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n"
, gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam
)->t.t_pkfn); }
;
6071
6072 updateHWFPControl(*pteam);
6073
6074#if OMPT_SUPPORT1
6075 if (ompt_enabled.enabled) {
6076 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
6077 }
6078#endif
6079
6080 rc = (*pteam)->t.t_invoke(gtid);
6081 KMP_ASSERT(rc)if (!(rc)) { __kmp_debug_assert("rc", "openmp/runtime/src/kmp_runtime.cpp"
, 6081); }
;
6082
6083 KMP_MB();
6084 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n"
, gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam
)->t.t_pkfn); }
6085 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n"
, gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam
)->t.t_pkfn); }
6086 (*pteam)->t.t_pkfn))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n"
, gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam
)->t.t_pkfn); }
;
6087 }
6088#if OMPT_SUPPORT1
6089 if (ompt_enabled.enabled) {
6090 /* no frame set while outside task */
6091 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none{0};
6092
6093 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6094 }
6095#endif
6096 /* join barrier after parallel region */
6097 __kmp_join_barrier(gtid);
6098 }
6099 }
6100 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done)((void *)((intptr_t)__kmp_global.g.g_done));
6101
6102#if OMPD_SUPPORT1
6103 if (ompd_state & OMPD_ENABLE_BP0x1)
6104 ompd_bp_thread_end();
6105#endif
6106
6107#if OMPT_SUPPORT1
6108 if (ompt_enabled.ompt_callback_thread_end) {
6109 ompt_callbacks.ompt_callback(ompt_callback_thread_end)ompt_callback_thread_end_callback(thread_data);
6110 }
6111#endif
6112
6113 this_thr->th.th_task_team = NULL__null;
6114 /* run the destructors for the threadprivate data for this thread */
6115 __kmp_common_destroy_gtid(gtid);
6116
6117 KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_launch_thread: T#%d done\n"
, gtid); }
;
6118 KMP_MB();
6119
6120#if OMP_PROFILING_SUPPORT0
6121 llvm::timeTraceProfilerFinishThread();
6122#endif
6123 return this_thr;
6124}
6125
6126/* ------------------------------------------------------------------------ */
6127
6128void __kmp_internal_end_dest(void *specific_gtid) {
6129 // Make sure no significant bits are lost
6130 int gtid;
6131 __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, &gtid);
6132
6133 KA_TRACE(30, ("__kmp_internal_end_dest: T#%d\n", gtid))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_internal_end_dest: T#%d\n"
, gtid); }
;
6134 /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage
6135 * this is because 0 is reserved for the nothing-stored case */
6136
6137 __kmp_internal_end_thread(gtid);
6138}
6139
6140#if KMP_OS_UNIX1 && KMP_DYNAMIC_LIB1
6141
6142__attribute__((destructor)) void __kmp_internal_end_dtor(void) {
6143 __kmp_internal_end_atexit();
6144}
6145
6146#endif
6147
6148/* [Windows] josh: when the atexit handler is called, there may still be more
6149 than one thread alive */
6150void __kmp_internal_end_atexit(void) {
6151 KA_TRACE(30, ("__kmp_internal_end_atexit\n"))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_internal_end_atexit\n"
); }
;
6152 /* [Windows]
6153 josh: ideally, we want to completely shutdown the library in this atexit
6154 handler, but stat code that depends on thread specific data for gtid fails
6155 because that data becomes unavailable at some point during the shutdown, so
6156 we call __kmp_internal_end_thread instead. We should eventually remove the
6157 dependency on __kmp_get_specific_gtid in the stat code and use
6158 __kmp_internal_end_library to cleanly shutdown the library.
6159
6160 // TODO: Can some of this comment about GVS be removed?
6161 I suspect that the offending stat code is executed when the calling thread
6162 tries to clean up a dead root thread's data structures, resulting in GVS
6163 code trying to close the GVS structures for that thread, but since the stat
6164 code uses __kmp_get_specific_gtid to get the gtid with the assumption that
6165 the calling thread is cleaning up itself instead of another thread, it get
6166 confused. This happens because allowing a thread to unregister and cleanup
6167 another thread is a recent modification for addressing an issue.
6168 Based on the current design (20050722), a thread may end up
6169 trying to unregister another thread only if thread death does not trigger
6170 the calling of __kmp_internal_end_thread. For Linux* OS, there is the
6171 thread specific data destructor function to detect thread death. For
6172 Windows dynamic, there is DllMain(THREAD_DETACH). For Windows static, there
6173 is nothing. Thus, the workaround is applicable only for Windows static
6174 stat library. */
6175 __kmp_internal_end_library(-1);
6176#if KMP_OS_WINDOWS0
6177 __kmp_close_console();
6178#endif
6179}
6180
6181static void __kmp_reap_thread(kmp_info_t *thread, int is_root) {
6182 // It is assumed __kmp_forkjoin_lock is acquired.
6183
6184 int gtid;
6185
6186 KMP_DEBUG_ASSERT(thread != NULL)if (!(thread != __null)) { __kmp_debug_assert("thread != __null"
, "openmp/runtime/src/kmp_runtime.cpp", 6186); }
;
6187
6188 gtid = thread->th.th_info.ds.ds_gtid;
6189
6190 if (!is_root) {
6191 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) {
6192 /* Assume the threads are at the fork barrier here */
6193 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n"
, gtid); }
6194 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n"
, gtid); }
6195 gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n"
, gtid); }
;
6196 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
6197 while (
6198 !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3)__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(thread
->th.th_used_in_team)), (kmp_uint32)(0), (kmp_uint32)(3))
)
6199 KMP_CPU_PAUSE()__kmp_x86_pause();
6200 __kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL__null);
6201 } else {
6202 /* Need release fence here to prevent seg faults for tree forkjoin
6203 barrier (GEH) */
6204 kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
6205 thread);
6206 __kmp_release_64(&flag);
6207 }
6208 }
6209
6210 // Terminate OS thread.
6211 __kmp_reap_worker(thread);
6212
6213 // The thread was killed asynchronously. If it was actively
6214 // spinning in the thread pool, decrement the global count.
6215 //
6216 // There is a small timing hole here - if the worker thread was just waking
6217 // up after sleeping in the pool, had reset it's th_active_in_pool flag but
6218 // not decremented the global counter __kmp_thread_pool_active_nth yet, then
6219 // the global counter might not get updated.
6220 //
6221 // Currently, this can only happen as the library is unloaded,
6222 // so there are no harmful side effects.
6223 if (thread->th.th_active_in_pool) {
6224 thread->th.th_active_in_pool = FALSE0;
6225 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth)(&__kmp_thread_pool_active_nth)->fetch_sub(1, std::memory_order_acq_rel
)
;
6226 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0)if (!(__kmp_thread_pool_active_nth >= 0)) { __kmp_debug_assert
("__kmp_thread_pool_active_nth >= 0", "openmp/runtime/src/kmp_runtime.cpp"
, 6226); }
;
6227 }
6228 }
6229
6230 __kmp_free_implicit_task(thread);
6231
6232// Free the fast memory for tasking
6233#if USE_FAST_MEMORY3
6234 __kmp_free_fast_memory(thread);
6235#endif /* USE_FAST_MEMORY */
6236
6237 __kmp_suspend_uninitialize_thread(thread);
6238
6239 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread)if (!(__kmp_threads[gtid] == thread)) { __kmp_debug_assert("__kmp_threads[gtid] == thread"
, "openmp/runtime/src/kmp_runtime.cpp", 6239); }
;
6240 TCW_SYNC_PTR(__kmp_threads[gtid], NULL)((__kmp_threads[gtid])) = ((__null));
6241
6242 --__kmp_all_nth;
6243 // __kmp_nth was decremented when thread is added to the pool.
6244
6245#ifdef KMP_ADJUST_BLOCKTIME1
6246 /* Adjust blocktime back to user setting or default if necessary */
6247 /* Middle initialization might never have occurred */
6248 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6249 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0)if (!(__kmp_avail_proc > 0)) { __kmp_debug_assert("__kmp_avail_proc > 0"
, "openmp/runtime/src/kmp_runtime.cpp", 6249); }
;
6250 if (__kmp_nth <= __kmp_avail_proc) {
6251 __kmp_zero_bt = FALSE0;
6252 }
6253 }
6254#endif /* KMP_ADJUST_BLOCKTIME */
6255
6256 /* free the memory being used */
6257 if (__kmp_env_consistency_check) {
6258 if (thread->th.th_cons) {
6259 __kmp_free_cons_stack(thread->th.th_cons);
6260 thread->th.th_cons = NULL__null;
6261 }
6262 }
6263
6264 if (thread->th.th_pri_common != NULL__null) {
6265 __kmp_free(thread->th.th_pri_common)___kmp_free((thread->th.th_pri_common), "openmp/runtime/src/kmp_runtime.cpp"
, 6265)
;
6266 thread->th.th_pri_common = NULL__null;
6267 }
6268
6269 if (thread->th.th_task_state_memo_stack != NULL__null) {
6270 __kmp_free(thread->th.th_task_state_memo_stack)___kmp_free((thread->th.th_task_state_memo_stack), "openmp/runtime/src/kmp_runtime.cpp"
, 6270)
;
6271 thread->th.th_task_state_memo_stack = NULL__null;
6272 }
6273
6274#if KMP_USE_BGET1
6275 if (thread->th.th_local.bget_data != NULL__null) {
6276 __kmp_finalize_bget(thread);
6277 }
6278#endif
6279
6280#if KMP_AFFINITY_SUPPORTED1
6281 if (thread->th.th_affin_mask != NULL__null) {
6282 KMP_CPU_FREE(thread->th.th_affin_mask)__kmp_affinity_dispatch->deallocate_mask(thread->th.th_affin_mask
)
;
6283 thread->th.th_affin_mask = NULL__null;
6284 }
6285#endif /* KMP_AFFINITY_SUPPORTED */
6286
6287#if KMP_USE_HIER_SCHED0
6288 if (thread->th.th_hier_bar_data != NULL__null) {
6289 __kmp_free(thread->th.th_hier_bar_data)___kmp_free((thread->th.th_hier_bar_data), "openmp/runtime/src/kmp_runtime.cpp"
, 6289)
;
6290 thread->th.th_hier_bar_data = NULL__null;
6291 }
6292#endif
6293
6294 __kmp_reap_team(thread->th.th_serial_team);
6295 thread->th.th_serial_team = NULL__null;
6296 __kmp_free(thread)___kmp_free((thread), "openmp/runtime/src/kmp_runtime.cpp", 6296
)
;
6297
6298 KMP_MB();
6299
6300} // __kmp_reap_thread
6301
6302static void __kmp_itthash_clean(kmp_info_t *th) {
6303#if USE_ITT_NOTIFY1
6304 if (__kmp_itt_region_domains.count > 0) {
6305 for (int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6306 kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i];
6307 while (bucket) {
6308 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6309 __kmp_thread_free(th, bucket)___kmp_thread_free((th), (bucket), "openmp/runtime/src/kmp_runtime.cpp"
, 6309)
;
6310 bucket = next;
6311 }
6312 }
6313 }
6314 if (__kmp_itt_barrier_domains.count > 0) {
6315 for (int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6316 kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i];
6317 while (bucket) {
6318 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6319 __kmp_thread_free(th, bucket)___kmp_thread_free((th), (bucket), "openmp/runtime/src/kmp_runtime.cpp"
, 6319)
;
6320 bucket = next;
6321 }
6322 }
6323 }
6324#endif
6325}
6326
6327static void __kmp_internal_end(void) {
6328 int i;
6329
6330 /* First, unregister the library */
6331 __kmp_unregister_library();
6332
6333#if KMP_OS_WINDOWS0
6334 /* In Win static library, we can't tell when a root actually dies, so we
6335 reclaim the data structures for any root threads that have died but not
6336 unregistered themselves, in order to shut down cleanly.
6337 In Win dynamic library we also can't tell when a thread dies. */
6338 __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of
6339// dead roots
6340#endif
6341
6342 for (i = 0; i < __kmp_threads_capacity; i++)
6343 if (__kmp_root[i])
6344 if (__kmp_root[i]->r.r_active)
6345 break;
6346 KMP_MB(); /* Flush all pending memory write invalidates. */
6347 TCW_SYNC_4(__kmp_global.g.g_done, TRUE)(__kmp_global.g.g_done) = ((!0));
6348
6349 if (i < __kmp_threads_capacity) {
6350#if KMP_USE_MONITOR
6351 // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor??
6352 KMP_MB(); /* Flush all pending memory write invalidates. */
6353
6354 // Need to check that monitor was initialized before reaping it. If we are
6355 // called form __kmp_atfork_child (which sets __kmp_init_parallel = 0), then
6356 // __kmp_monitor will appear to contain valid data, but it is only valid in
6357 // the parent process, not the child.
6358 // New behavior (201008): instead of keying off of the flag
6359 // __kmp_init_parallel, the monitor thread creation is keyed off
6360 // of the new flag __kmp_init_monitor.
6361 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6362 if (TCR_4(__kmp_init_monitor)(__kmp_init_monitor)) {
6363 __kmp_reap_monitor(&__kmp_monitor);
6364 TCW_4(__kmp_init_monitor, 0)(__kmp_init_monitor) = (0);
6365 }
6366 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6367 KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end: monitor reaped\n"
); }
;
6368#endif // KMP_USE_MONITOR
6369 } else {
6370/* TODO move this to cleanup code */
6371#ifdef KMP_DEBUG1
6372 /* make sure that everything has properly ended */
6373 for (i = 0; i < __kmp_threads_capacity; i++) {
6374 if (__kmp_root[i]) {
6375 // KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC:
6376 // there can be uber threads alive here
6377 KMP_ASSERT(!__kmp_root[i]->r.r_active)if (!(!__kmp_root[i]->r.r_active)) { __kmp_debug_assert("!__kmp_root[i]->r.r_active"
, "openmp/runtime/src/kmp_runtime.cpp", 6377); }
; // TODO: can they be active?
6378 }
6379 }
6380#endif
6381
6382 KMP_MB();
6383
6384 // Reap the worker threads.
6385 // This is valid for now, but be careful if threads are reaped sooner.
6386 while (__kmp_thread_pool != NULL__null) { // Loop thru all the thread in the pool.
6387 // Get the next thread from the pool.
6388 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool)const_cast<kmp_info_t *>(__kmp_thread_pool);
6389 __kmp_thread_pool = thread->th.th_next_pool;
6390 // Reap it.
6391 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP)if (!(thread->th.th_reap_state == 1)) { __kmp_debug_assert
("thread->th.th_reap_state == 1", "openmp/runtime/src/kmp_runtime.cpp"
, 6391); }
;
6392 thread->th.th_next_pool = NULL__null;
6393 thread->th.th_in_pool = FALSE0;
6394 __kmp_reap_thread(thread, 0);
6395 }
6396 __kmp_thread_pool_insert_pt = NULL__null;
6397
6398 // Reap teams.
6399 while (__kmp_team_pool != NULL__null) { // Loop thru all the teams in the pool.
6400 // Get the next team from the pool.
6401 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool)const_cast<kmp_team_t *>(__kmp_team_pool);
6402 __kmp_team_pool = team->t.t_next_pool;
6403 // Reap it.
6404 team->t.t_next_pool = NULL__null;
6405 __kmp_reap_team(team);
6406 }
6407
6408 __kmp_reap_task_teams();
6409
6410#if KMP_OS_UNIX1
6411 // Threads that are not reaped should not access any resources since they
6412 // are going to be deallocated soon, so the shutdown sequence should wait
6413 // until all threads either exit the final spin-waiting loop or begin
6414 // sleeping after the given blocktime.
6415 for (i = 0; i < __kmp_threads_capacity; i++) {
6416 kmp_info_t *thr = __kmp_threads[i];
6417 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking)(&thr->th.th_blocking)->load(std::memory_order_acquire
)
)
6418 KMP_CPU_PAUSE()__kmp_x86_pause();
6419 }
6420#endif
6421
6422 for (i = 0; i < __kmp_threads_capacity; ++i) {
6423 // TBD: Add some checking...
6424 // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL );
6425 }
6426
6427 /* Make sure all threadprivate destructors get run by joining with all
6428 worker threads before resetting this flag */
6429 TCW_SYNC_4(__kmp_init_common, FALSE)(__kmp_init_common) = (0);
6430
6431 KA_TRACE(10, ("__kmp_internal_end: all workers reaped\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end: all workers reaped\n"
); }
;
6432 KMP_MB();
6433
6434#if KMP_USE_MONITOR
6435 // See note above: One of the possible fixes for CQ138434 / CQ140126
6436 //
6437 // FIXME: push both code fragments down and CSE them?
6438 // push them into __kmp_cleanup() ?
6439 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6440 if (TCR_4(__kmp_init_monitor)(__kmp_init_monitor)) {
6441 __kmp_reap_monitor(&__kmp_monitor);
6442 TCW_4(__kmp_init_monitor, 0)(__kmp_init_monitor) = (0);
6443 }
6444 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6445 KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end: monitor reaped\n"
); }
;
6446#endif
6447 } /* else !__kmp_global.t_active */
6448 TCW_4(__kmp_init_gtid, FALSE)(__kmp_init_gtid) = (0);
6449 KMP_MB(); /* Flush all pending memory write invalidates. */
6450
6451 __kmp_cleanup();
6452#if OMPT_SUPPORT1
6453 ompt_fini();
6454#endif
6455}
6456
6457void __kmp_internal_end_library(int gtid_req) {
6458 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
6459 /* this shouldn't be a race condition because __kmp_internal_end() is the
6460 only place to clear __kmp_serial_init */
6461 /* we'll check this later too, after we get the lock */
6462 // 2009-09-06: We do not set g_abort without setting g_done. This check looks
6463 // redundant, because the next check will work in any case.
6464 if (__kmp_global.g.g_abort) {
6465 KA_TRACE(11, ("__kmp_internal_end_library: abort, exiting\n"))if (kmp_a_debug >= 11) { __kmp_debug_printf ("__kmp_internal_end_library: abort, exiting\n"
); }
;
6466 /* TODO abort? */
6467 return;
6468 }
6469 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done) || !__kmp_init_serial) {
6470 KA_TRACE(10, ("__kmp_internal_end_library: already finished\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: already finished\n"
); }
;
6471 return;
6472 }
6473
6474 // If hidden helper team has been initialized, we need to deinit it
6475 if (TCR_4(__kmp_init_hidden_helper)(__kmp_init_hidden_helper) &&
6476 !TCR_4(__kmp_hidden_helper_team_done)(__kmp_hidden_helper_team_done)) {
6477 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE)(__kmp_hidden_helper_team_done) = ((!0));
6478 // First release the main thread to let it continue its work
6479 __kmp_hidden_helper_main_thread_release();
6480 // Wait until the hidden helper team has been destroyed
6481 __kmp_hidden_helper_threads_deinitz_wait();
6482 }
6483
6484 KMP_MB(); /* Flush all pending memory write invalidates. */
6485 /* find out who we are and what we should do */
6486 {
6487 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6488 KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: enter T#%d (%d)\n"
, gtid, gtid_req); }
6489 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: enter T#%d (%d)\n"
, gtid, gtid_req); }
;
6490 if (gtid == KMP_GTID_SHUTDOWN(-3)) {
6491 KA_TRACE(10, ("__kmp_internal_end_library: !__kmp_init_runtime, system "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: !__kmp_init_runtime, system "
"already shutdown\n"); }
6492 "already shutdown\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: !__kmp_init_runtime, system "
"already shutdown\n"); }
;
6493 return;
6494 } else if (gtid == KMP_GTID_MONITOR(-4)) {
6495 KA_TRACE(10, ("__kmp_internal_end_library: monitor thread, gtid not "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: monitor thread, gtid not "
"registered, or system shutdown\n"); }
6496 "registered, or system shutdown\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: monitor thread, gtid not "
"registered, or system shutdown\n"); }
;
6497 return;
6498 } else if (gtid == KMP_GTID_DNE(-2)) {
6499 KA_TRACE(10, ("__kmp_internal_end_library: gtid not registered or system "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: gtid not registered or system "
"shutdown\n"); }
6500 "shutdown\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: gtid not registered or system "
"shutdown\n"); }
;
6501 /* we don't know who we are, but we may still shutdown the library */
6502 } else if (KMP_UBER_GTID(gtid)) {
6503 /* unregister ourselves as an uber thread. gtid is no longer valid */
6504 if (__kmp_root[gtid]->r.r_active) {
6505 __kmp_global.g.g_abort = -1;
6506 TCW_SYNC_4(__kmp_global.g.g_done, TRUE)(__kmp_global.g.g_done) = ((!0));
6507 __kmp_unregister_library();
6508 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: root still active, abort T#%d\n"
, gtid); }
6509 ("__kmp_internal_end_library: root still active, abort T#%d\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: root still active, abort T#%d\n"
, gtid); }
6510 gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: root still active, abort T#%d\n"
, gtid); }
;
6511 return;
6512 } else {
6513 __kmp_itthash_clean(__kmp_threads[gtid]);
6514 KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: unregistering sibling T#%d\n"
, gtid); }
6515 10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: unregistering sibling T#%d\n"
, gtid); }
6516 ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: unregistering sibling T#%d\n"
, gtid); }
;
6517 __kmp_unregister_root_current_thread(gtid);
6518 }
6519 } else {
6520/* worker threads may call this function through the atexit handler, if they
6521 * call exit() */
6522/* For now, skip the usual subsequent processing and just dump the debug buffer.
6523 TODO: do a thorough shutdown instead */
6524#ifdef DUMP_DEBUG_ON_EXIT
6525 if (__kmp_debug_buf)
6526 __kmp_dump_debug_buffer();
6527#endif
6528 // added unregister library call here when we switch to shm linux
6529 // if we don't, it will leave lots of files in /dev/shm
6530 // cleanup shared memory file before exiting.
6531 __kmp_unregister_library();
6532 return;
6533 }
6534 }
6535 /* synchronize the termination process */
6536 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6537
6538 /* have we already finished */
6539 if (__kmp_global.g.g_abort) {
6540 KA_TRACE(10, ("__kmp_internal_end_library: abort, exiting\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: abort, exiting\n"
); }
;
6541 /* TODO abort? */
6542 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6543 return;
6544 }
6545 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done) || !__kmp_init_serial) {
6546 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6547 return;
6548 }
6549
6550 /* We need this lock to enforce mutex between this reading of
6551 __kmp_threads_capacity and the writing by __kmp_register_root.
6552 Alternatively, we can use a counter of roots that is atomically updated by
6553 __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and
6554 __kmp_internal_end_*. */
6555 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6556
6557 /* now we can safely conduct the actual termination */
6558 __kmp_internal_end();
6559
6560 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6561 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6562
6563 KA_TRACE(10, ("__kmp_internal_end_library: exit\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_library: exit\n"
); }
;
6564
6565#ifdef DUMP_DEBUG_ON_EXIT
6566 if (__kmp_debug_buf)
6567 __kmp_dump_debug_buffer();
6568#endif
6569
6570#if KMP_OS_WINDOWS0
6571 __kmp_close_console();
6572#endif
6573
6574 __kmp_fini_allocator();
6575
6576} // __kmp_internal_end_library
6577
6578void __kmp_internal_end_thread(int gtid_req) {
6579 int i;
6580
6581 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
6582 /* this shouldn't be a race condition because __kmp_internal_end() is the
6583 * only place to clear __kmp_serial_init */
6584 /* we'll check this later too, after we get the lock */
6585 // 2009-09-06: We do not set g_abort without setting g_done. This check looks
6586 // redundant, because the next check will work in any case.
6587 if (__kmp_global.g.g_abort) {
6588 KA_TRACE(11, ("__kmp_internal_end_thread: abort, exiting\n"))if (kmp_a_debug >= 11) { __kmp_debug_printf ("__kmp_internal_end_thread: abort, exiting\n"
); }
;
6589 /* TODO abort? */
6590 return;
6591 }
6592 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done) || !__kmp_init_serial) {
6593 KA_TRACE(10, ("__kmp_internal_end_thread: already finished\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: already finished\n"
); }
;
6594 return;
6595 }
6596
6597 // If hidden helper team has been initialized, we need to deinit it
6598 if (TCR_4(__kmp_init_hidden_helper)(__kmp_init_hidden_helper) &&
6599 !TCR_4(__kmp_hidden_helper_team_done)(__kmp_hidden_helper_team_done)) {
6600 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE)(__kmp_hidden_helper_team_done) = ((!0));
6601 // First release the main thread to let it continue its work
6602 __kmp_hidden_helper_main_thread_release();
6603 // Wait until the hidden helper team has been destroyed
6604 __kmp_hidden_helper_threads_deinitz_wait();
6605 }
6606
6607 KMP_MB(); /* Flush all pending memory write invalidates. */
6608
6609 /* find out who we are and what we should do */
6610 {
6611 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6612 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: enter T#%d (%d)\n"
, gtid, gtid_req); }
6613 ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: enter T#%d (%d)\n"
, gtid, gtid_req); }
;
6614 if (gtid == KMP_GTID_SHUTDOWN(-3)) {
6615 KA_TRACE(10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: !__kmp_init_runtime, system "
"already shutdown\n"); }
6616 "already shutdown\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: !__kmp_init_runtime, system "
"already shutdown\n"); }
;
6617 return;
6618 } else if (gtid == KMP_GTID_MONITOR(-4)) {
6619 KA_TRACE(10, ("__kmp_internal_end_thread: monitor thread, gtid not "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: monitor thread, gtid not "
"registered, or system shutdown\n"); }
6620 "registered, or system shutdown\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: monitor thread, gtid not "
"registered, or system shutdown\n"); }
;
6621 return;
6622 } else if (gtid == KMP_GTID_DNE(-2)) {
6623 KA_TRACE(10, ("__kmp_internal_end_thread: gtid not registered or system "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: gtid not registered or system "
"shutdown\n"); }
6624 "shutdown\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: gtid not registered or system "
"shutdown\n"); }
;
6625 return;
6626 /* we don't know who we are */
6627 } else if (KMP_UBER_GTID(gtid)) {
6628 /* unregister ourselves as an uber thread. gtid is no longer valid */
6629 if (__kmp_root[gtid]->r.r_active) {
6630 __kmp_global.g.g_abort = -1;
6631 TCW_SYNC_4(__kmp_global.g.g_done, TRUE)(__kmp_global.g.g_done) = ((!0));
6632 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: root still active, abort T#%d\n"
, gtid); }
6633 ("__kmp_internal_end_thread: root still active, abort T#%d\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: root still active, abort T#%d\n"
, gtid); }
6634 gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: root still active, abort T#%d\n"
, gtid); }
;
6635 return;
6636 } else {
6637 KA_TRACE(10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: unregistering sibling T#%d\n"
, gtid); }
6638 gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: unregistering sibling T#%d\n"
, gtid); }
;
6639 __kmp_unregister_root_current_thread(gtid);
6640 }
6641 } else {
6642 /* just a worker thread, let's leave */
6643 KA_TRACE(10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: worker thread T#%d\n"
, gtid); }
;
6644
6645 if (gtid >= 0) {
6646 __kmp_threads[gtid]->th.th_task_team = NULL__null;
6647 }
6648
6649 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n"
, gtid); }
6650 ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n"
, gtid); }
6651 gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n"
, gtid); }
;
6652 return;
6653 }
6654 }
6655#if KMP_DYNAMIC_LIB1
6656 if (__kmp_pause_status != kmp_hard_paused)
6657 // AC: lets not shutdown the dynamic library at the exit of uber thread,
6658 // because we will better shutdown later in the library destructor.
6659 {
6660 KA_TRACE(10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: exiting T#%d\n"
, gtid_req); }
;
6661 return;
6662 }
6663#endif
6664 /* synchronize the termination process */
6665 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6666
6667 /* have we already finished */
6668 if (__kmp_global.g.g_abort) {
6669 KA_TRACE(10, ("__kmp_internal_end_thread: abort, exiting\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: abort, exiting\n"
); }
;
6670 /* TODO abort? */
6671 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6672 return;
6673 }
6674 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done) || !__kmp_init_serial) {
6675 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6676 return;
6677 }
6678
6679 /* We need this lock to enforce mutex between this reading of
6680 __kmp_threads_capacity and the writing by __kmp_register_root.
6681 Alternatively, we can use a counter of roots that is atomically updated by
6682 __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and
6683 __kmp_internal_end_*. */
6684
6685 /* should we finish the run-time? are all siblings done? */
6686 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6687
6688 for (i = 0; i < __kmp_threads_capacity; ++i) {
6689 if (KMP_UBER_GTID(i)) {
6690 KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n"
, i); }
6691 10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n"
, i); }
6692 ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n"
, i); }
;
6693 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6694 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6695 return;
6696 }
6697 }
6698
6699 /* now we can safely conduct the actual termination */
6700
6701 __kmp_internal_end();
6702
6703 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6704 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6705
6706 KA_TRACE(10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_internal_end_thread: exit T#%d\n"
, gtid_req); }
;
6707
6708#ifdef DUMP_DEBUG_ON_EXIT
6709 if (__kmp_debug_buf)
6710 __kmp_dump_debug_buffer();
6711#endif
6712} // __kmp_internal_end_thread
6713
6714// -----------------------------------------------------------------------------
6715// Library registration stuff.
6716
6717static long __kmp_registration_flag = 0;
6718// Random value used to indicate library initialization.
6719static char *__kmp_registration_str = NULL__null;
6720// Value to be saved in env var __KMP_REGISTERED_LIB_<pid>.
6721
6722static inline char *__kmp_reg_status_name() {
6723/* On RHEL 3u5 if linked statically, getpid() returns different values in
6724 each thread. If registration and unregistration go in different threads
6725 (omp_misc_other_root_exit.cpp test case), the name of registered_lib_env
6726 env var can not be found, because the name will contain different pid. */
6727// macOS* complains about name being too long with additional getuid()
6728#if KMP_OS_UNIX1 && !KMP_OS_DARWIN0 && KMP_DYNAMIC_LIB1
6729 return __kmp_str_format("__KMP_REGISTERED_LIB_%d_%d", (int)getpid(),
6730 (int)getuid());
6731#else
6732 return __kmp_str_format("__KMP_REGISTERED_LIB_%d", (int)getpid());
6733#endif
6734} // __kmp_reg_status_get
6735
6736void __kmp_register_library_startup(void) {
6737
6738 char *name = __kmp_reg_status_name(); // Name of the environment variable.
6739 int done = 0;
6740 union {
6741 double dtime;
6742 long ltime;
6743 } time;
6744#if KMP_ARCH_X860 || KMP_ARCH_X86_641
6745 __kmp_initialize_system_tick();
6746#endif
6747 __kmp_read_system_time(&time.dtime);
6748 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6749 __kmp_registration_str =
6750 __kmp_str_format("%p-%lx-%s", &__kmp_registration_flag,
6751 __kmp_registration_flag, KMP_LIBRARY_FILE"libomp.so.5");
6752
6753 KA_TRACE(50, ("__kmp_register_library_startup: %s=\"%s\"\n", name,if (kmp_a_debug >= 50) { __kmp_debug_printf ("__kmp_register_library_startup: %s=\"%s\"\n"
, name, __kmp_registration_str); }
6754 __kmp_registration_str))if (kmp_a_debug >= 50) { __kmp_debug_printf ("__kmp_register_library_startup: %s=\"%s\"\n"
, name, __kmp_registration_str); }
;
6755
6756 while (!done) {
6757
6758 char *value = NULL__null; // Actual value of the environment variable.
6759
6760#if defined(KMP_USE_SHM)
6761 char *shm_name = __kmp_str_format("/%s", name);
6762 int shm_preexist = 0;
6763 char *data1;
6764 int fd1 = shm_open(shm_name, O_CREAT0100 | O_EXCL0200 | O_RDWR02, 0666);
6765 if ((fd1 == -1) && (errno(*__errno_location ()) == EEXIST17)) {
6766 // file didn't open because it already exists.
6767 // try opening existing file
6768 fd1 = shm_open(shm_name, O_RDWR02, 0666);
6769 if (fd1 == -1) { // file didn't open
6770 // error out here
6771 __kmp_fatal(KMP_MSG(FunctionError, "Can't open SHM")__kmp_msg_format(kmp_i18n_msg_FunctionError, "Can't open SHM"
)
, KMP_ERR(0)__kmp_msg_error_code(0),
6772 __kmp_msg_null);
6773 } else {
6774 // able to open existing file
6775 shm_preexist = 1;
6776 }
6777 } else if (fd1 == -1) { // SHM didn't open; it was due to error other than
6778 // already exists.
6779 // error out here.
6780 __kmp_fatal(KMP_MSG(FunctionError, "Can't open SHM2")__kmp_msg_format(kmp_i18n_msg_FunctionError, "Can't open SHM2"
)
, KMP_ERR(errno)__kmp_msg_error_code((*__errno_location ())),
6781 __kmp_msg_null);
6782 }
6783 if (shm_preexist == 0) {
6784 // we created SHM now set size
6785 if (ftruncate(fd1, SHM_SIZE1024) == -1) {
6786 // error occured setting size;
6787 __kmp_fatal(KMP_MSG(FunctionError, "Can't set size of SHM")__kmp_msg_format(kmp_i18n_msg_FunctionError, "Can't set size of SHM"
)
,
6788 KMP_ERR(errno)__kmp_msg_error_code((*__errno_location ())), __kmp_msg_null);
6789 }
6790 }
6791 data1 =
6792 (char *)mmap(0, SHM_SIZE1024, PROT_READ0x1 | PROT_WRITE0x2, MAP_SHARED0x01, fd1, 0);
6793 if (data1 == MAP_FAILED((void *) -1)) {
6794 // failed to map shared memory
6795 __kmp_fatal(KMP_MSG(FunctionError, "Can't map SHM")__kmp_msg_format(kmp_i18n_msg_FunctionError, "Can't map SHM"), KMP_ERR(errno)__kmp_msg_error_code((*__errno_location ())),
6796 __kmp_msg_null);
6797 }
6798 if (shm_preexist == 0) { // set data to SHM, set value
6799 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str)strcpy(data1, __kmp_registration_str);
6800 }
6801 // Read value from either what we just wrote or existing file.
6802 value = __kmp_str_format("%s", data1); // read value from SHM
6803 munmap(data1, SHM_SIZE1024);
6804 close(fd1);
6805#else // Windows and unix with static library
6806 // Set environment variable, but do not overwrite if it is exist.
6807 __kmp_env_set(name, __kmp_registration_str, 0);
6808 // read value to see if it got set
6809 value = __kmp_env_get(name);
6810#endif
6811
6812 if (value != NULL__null && strcmp(value, __kmp_registration_str) == 0) {
6813 done = 1; // Ok, environment variable set successfully, exit the loop.
6814 } else {
6815 // Oops. Write failed. Another copy of OpenMP RTL is in memory.
6816 // Check whether it alive or dead.
6817 int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead.
6818 char *tail = value;
6819 char *flag_addr_str = NULL__null;
6820 char *flag_val_str = NULL__null;
6821 char const *file_name = NULL__null;
6822 __kmp_str_split(tail, '-', &flag_addr_str, &tail);
6823 __kmp_str_split(tail, '-', &flag_val_str, &tail);
6824 file_name = tail;
6825 if (tail != NULL__null) {
6826 unsigned long *flag_addr = 0;
6827 unsigned long flag_val = 0;
6828 KMP_SSCANFsscanf(flag_addr_str, "%p", RCAST(void **, &flag_addr)reinterpret_cast<void **>(&flag_addr));
6829 KMP_SSCANFsscanf(flag_val_str, "%lx", &flag_val);
6830 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name, "") != 0) {
6831 // First, check whether environment-encoded address is mapped into
6832 // addr space.
6833 // If so, dereference it to see if it still has the right value.
6834 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6835 neighbor = 1;
6836 } else {
6837 // If not, then we know the other copy of the library is no longer
6838 // running.
6839 neighbor = 2;
6840 }
6841 }
6842 }
6843 switch (neighbor) {
6844 case 0: // Cannot parse environment variable -- neighbor status unknown.
6845 // Assume it is the incompatible format of future version of the
6846 // library. Assume the other library is alive.
6847 // WARN( ... ); // TODO: Issue a warning.
6848 file_name = "unknown library";
6849 KMP_FALLTHROUGH()[[fallthrough]];
6850 // Attention! Falling to the next case. That's intentional.
6851 case 1: { // Neighbor is alive.
6852 // Check it is allowed.
6853 char *duplicate_ok = __kmp_env_get("KMP_DUPLICATE_LIB_OK");
6854 if (!__kmp_str_match_true(duplicate_ok)) {
6855 // That's not allowed. Issue fatal error.
6856 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name)__kmp_msg_format(kmp_i18n_msg_DuplicateLibrary, "libomp.so.5"
, file_name)
,
6857 KMP_HNT(DuplicateLibrary)__kmp_msg_format(kmp_i18n_hnt_DuplicateLibrary), __kmp_msg_null);
6858 }
6859 KMP_INTERNAL_FREE(duplicate_ok)free(duplicate_ok);
6860 __kmp_duplicate_library_ok = 1;
6861 done = 1; // Exit the loop.
6862 } break;
6863 case 2: { // Neighbor is dead.
6864
6865#if defined(KMP_USE_SHM)
6866 // close shared memory.
6867 shm_unlink(shm_name); // this removes file in /dev/shm
6868#else
6869 // Clear the variable and try to register library again.
6870 __kmp_env_unset(name);
6871#endif
6872 } break;
6873 default: {
6874 KMP_DEBUG_ASSERT(0)if (!(0)) { __kmp_debug_assert("0", "openmp/runtime/src/kmp_runtime.cpp"
, 6874); }
;
6875 } break;
6876 }
6877 }
6878 KMP_INTERNAL_FREE((void *)value)free((void *)value);
6879#if defined(KMP_USE_SHM)
6880 KMP_INTERNAL_FREE((void *)shm_name)free((void *)shm_name);
6881#endif
6882 } // while
6883 KMP_INTERNAL_FREE((void *)name)free((void *)name);
6884
6885} // func __kmp_register_library_startup
6886
6887void __kmp_unregister_library(void) {
6888
6889 char *name = __kmp_reg_status_name();
6890 char *value = NULL__null;
6891
6892#if defined(KMP_USE_SHM)
6893 char *shm_name = __kmp_str_format("/%s", name);
6894 int fd1 = shm_open(shm_name, O_RDONLY00, 0666);
6895 if (fd1 == -1) {
6896 // file did not open. return.
6897 return;
6898 }
6899 char *data1 = (char *)mmap(0, SHM_SIZE1024, PROT_READ0x1, MAP_SHARED0x01, fd1, 0);
6900 if (data1 != MAP_FAILED((void *) -1)) {
6901 value = __kmp_str_format("%s", data1); // read value from SHM
6902 munmap(data1, SHM_SIZE1024);
6903 }
6904 close(fd1);
6905#else
6906 value = __kmp_env_get(name);
6907#endif
6908
6909 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0)if (!(__kmp_registration_flag != 0)) { __kmp_debug_assert("__kmp_registration_flag != 0"
, "openmp/runtime/src/kmp_runtime.cpp", 6909); }
;
6910 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL)if (!(__kmp_registration_str != __null)) { __kmp_debug_assert
("__kmp_registration_str != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 6910); }
;
6911 if (value != NULL__null && strcmp(value, __kmp_registration_str) == 0) {
6912// Ok, this is our variable. Delete it.
6913#if defined(KMP_USE_SHM)
6914 shm_unlink(shm_name); // this removes file in /dev/shm
6915#else
6916 __kmp_env_unset(name);
6917#endif
6918 }
6919
6920#if defined(KMP_USE_SHM)
6921 KMP_INTERNAL_FREE(shm_name)free(shm_name);
6922#endif
6923
6924 KMP_INTERNAL_FREE(__kmp_registration_str)free(__kmp_registration_str);
6925 KMP_INTERNAL_FREE(value)free(value);
6926 KMP_INTERNAL_FREE(name)free(name);
6927
6928 __kmp_registration_flag = 0;
6929 __kmp_registration_str = NULL__null;
6930
6931} // __kmp_unregister_library
6932
6933// End of Library registration stuff.
6934// -----------------------------------------------------------------------------
6935
6936#if KMP_MIC_SUPPORTED((0 || 1) && (1 || 0))
6937
6938static void __kmp_check_mic_type() {
6939 kmp_cpuid_t cpuid_state = {0};
6940 kmp_cpuid_t *cs_p = &cpuid_state;
6941 __kmp_x86_cpuid(1, 0, cs_p);
6942 // We don't support mic1 at the moment
6943 if ((cs_p->eax & 0xff0) == 0xB10) {
6944 __kmp_mic_type = mic2;
6945 } else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6946 __kmp_mic_type = mic3;
6947 } else {
6948 __kmp_mic_type = non_mic;
6949 }
6950}
6951
6952#endif /* KMP_MIC_SUPPORTED */
6953
6954#if KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0)
6955static void __kmp_user_level_mwait_init() {
6956 struct kmp_cpuid buf;
6957 __kmp_x86_cpuid(7, 0, &buf);
6958 __kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1);
6959 __kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait;
6960 __kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0);
6961 KF_TRACE(30, ("__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",if (kmp_f_debug >= 30) { __kmp_debug_printf ("__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n"
, __kmp_umwait_enabled); }
6962 __kmp_umwait_enabled))if (kmp_f_debug >= 30) { __kmp_debug_printf ("__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n"
, __kmp_umwait_enabled); }
;
6963}
6964#elif KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0)
6965#ifndef AT_INTELPHIUSERMWAIT
6966// Spurious, non-existent value that should always fail to return anything.
6967// Will be replaced with the correct value when we know that.
6968#define AT_INTELPHIUSERMWAIT 10000
6969#endif
6970// getauxval() function is available in RHEL7 and SLES12. If a system with an
6971// earlier OS is used to build the RTL, we'll use the following internal
6972// function when the entry is not found.
6973unsigned long getauxval(unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
6974unsigned long getauxval(unsigned long) { return 0; }
6975
6976static void __kmp_user_level_mwait_init() {
6977 // When getauxval() and correct value of AT_INTELPHIUSERMWAIT are available
6978 // use them to find if the user-level mwait is enabled. Otherwise, forcibly
6979 // set __kmp_mwait_enabled=TRUE on Intel MIC if the environment variable
6980 // KMP_USER_LEVEL_MWAIT was set to TRUE.
6981 if (__kmp_mic_type == mic3) {
6982 unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
6983 if ((res & 0x1) || __kmp_user_level_mwait) {
6984 __kmp_mwait_enabled = TRUE(!0);
6985 if (__kmp_user_level_mwait) {
6986 KMP_INFORM(EnvMwaitWarn)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_EnvMwaitWarn
), __kmp_msg_null)
;
6987 }
6988 } else {
6989 __kmp_mwait_enabled = FALSE0;
6990 }
6991 }
6992 KF_TRACE(30, ("__kmp_user_level_mwait_init: __kmp_mic_type = %d, "if (kmp_f_debug >= 30) { __kmp_debug_printf ("__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
"__kmp_mwait_enabled = %d\n", __kmp_mic_type, __kmp_mwait_enabled
); }
6993 "__kmp_mwait_enabled = %d\n",if (kmp_f_debug >= 30) { __kmp_debug_printf ("__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
"__kmp_mwait_enabled = %d\n", __kmp_mic_type, __kmp_mwait_enabled
); }
6994 __kmp_mic_type, __kmp_mwait_enabled))if (kmp_f_debug >= 30) { __kmp_debug_printf ("__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
"__kmp_mwait_enabled = %d\n", __kmp_mic_type, __kmp_mwait_enabled
); }
;
6995}
6996#endif /* KMP_HAVE_UMWAIT */
6997
6998static void __kmp_do_serial_initialize(void) {
6999 int i, gtid;
7000 size_t size;
7001
7002 KA_TRACE(10, ("__kmp_do_serial_initialize: enter\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_do_serial_initialize: enter\n"
); }
;
7003
7004 KMP_DEBUG_ASSERT(sizeof(kmp_int32) == 4)if (!(sizeof(kmp_int32) == 4)) { __kmp_debug_assert("sizeof(kmp_int32) == 4"
, "openmp/runtime/src/kmp_runtime.cpp", 7004); }
;
7005 KMP_DEBUG_ASSERT(sizeof(kmp_uint32) == 4)if (!(sizeof(kmp_uint32) == 4)) { __kmp_debug_assert("sizeof(kmp_uint32) == 4"
, "openmp/runtime/src/kmp_runtime.cpp", 7005); }
;
7006 KMP_DEBUG_ASSERT(sizeof(kmp_int64) == 8)if (!(sizeof(kmp_int64) == 8)) { __kmp_debug_assert("sizeof(kmp_int64) == 8"
, "openmp/runtime/src/kmp_runtime.cpp", 7006); }
;
7007 KMP_DEBUG_ASSERT(sizeof(kmp_uint64) == 8)if (!(sizeof(kmp_uint64) == 8)) { __kmp_debug_assert("sizeof(kmp_uint64) == 8"
, "openmp/runtime/src/kmp_runtime.cpp", 7007); }
;
7008 KMP_DEBUG_ASSERT(sizeof(kmp_intptr_t) == sizeof(void *))if (!(sizeof(kmp_intptr_t) == sizeof(void *))) { __kmp_debug_assert
("sizeof(kmp_intptr_t) == sizeof(void *)", "openmp/runtime/src/kmp_runtime.cpp"
, 7008); }
;
7009
7010#if OMPT_SUPPORT1
7011 ompt_pre_init();
7012#endif
7013#if OMPD_SUPPORT1
7014 __kmp_env_dump();
7015 ompd_init();
7016#endif
7017
7018 __kmp_validate_locks();
7019
7020 /* Initialize internal memory allocator */
7021 __kmp_init_allocator();
7022
7023 /* Register the library startup via an environment variable or via mapped
7024 shared memory file and check to see whether another copy of the library is
7025 already registered. Since forked child process is often terminated, we
7026 postpone the registration till middle initialization in the child */
7027 if (__kmp_need_register_serial)
7028 __kmp_register_library_startup();
7029
7030 /* TODO reinitialization of library */
7031 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) {
7032 KA_TRACE(10, ("__kmp_do_serial_initialize: reinitialization of library\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_do_serial_initialize: reinitialization of library\n"
); }
;
7033 }
7034
7035 __kmp_global.g.g_abort = 0;
7036 TCW_SYNC_4(__kmp_global.g.g_done, FALSE)(__kmp_global.g.g_done) = (0);
7037
7038/* initialize the locks */
7039#if KMP_USE_ADAPTIVE_LOCKS(0 || 1) && !0
7040#if KMP_DEBUG_ADAPTIVE_LOCKS0
7041 __kmp_init_speculative_stats();
7042#endif
7043#endif
7044#if KMP_STATS_ENABLED0
7045 __kmp_stats_init();
7046#endif
7047 __kmp_init_lock(&__kmp_global_lock);
7048 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
7049 __kmp_init_lock(&__kmp_debug_lock);
7050 __kmp_init_atomic_lock(&__kmp_atomic_lock);
7051 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
7052 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
7053 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
7054 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
7055 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
7056 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
7057 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
7058 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
7059 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
7060 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
7061 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
7062 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
7063 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
7064 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
7065#if KMP_USE_MONITOR
7066 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
7067#endif
7068 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
7069
7070 /* conduct initialization and initial setup of configuration */
7071
7072 __kmp_runtime_initialize();
7073
7074#if KMP_MIC_SUPPORTED((0 || 1) && (1 || 0))
7075 __kmp_check_mic_type();
7076#endif
7077
7078// Some global variable initialization moved here from kmp_env_initialize()
7079#ifdef KMP_DEBUG1
7080 kmp_diag = 0;
7081#endif
7082 __kmp_abort_delay = 0;
7083
7084 // From __kmp_init_dflt_team_nth()
7085 /* assume the entire machine will be used */
7086 __kmp_dflt_team_nth_ub = __kmp_xproc;
7087 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH1) {
7088 __kmp_dflt_team_nth_ub = KMP_MIN_NTH1;
7089 }
7090 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
7091 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
7092 }
7093 __kmp_max_nth = __kmp_sys_max_nth;
7094 __kmp_cg_max_nth = __kmp_sys_max_nth;
7095 __kmp_teams_max_nth = __kmp_xproc; // set a "reasonable" default
7096 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
7097 __kmp_teams_max_nth = __kmp_sys_max_nth;
7098 }
7099
7100 // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME"
7101 // part
7102 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME(__kmp_is_hybrid_cpu() ? (0) : (200));
7103#if KMP_USE_MONITOR
7104 __kmp_monitor_wakeups =
7105 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7106 __kmp_bt_intervals =
7107 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7108#endif
7109 // From "KMP_LIBRARY" part of __kmp_env_initialize()
7110 __kmp_library = library_throughput;
7111 // From KMP_SCHEDULE initialization
7112 __kmp_static = kmp_sch_static_balanced;
7113// AC: do not use analytical here, because it is non-monotonous
7114//__kmp_guided = kmp_sch_guided_iterative_chunked;
7115//__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no
7116// need to repeat assignment
7117// Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch
7118// bit control and barrier method control parts
7119#if KMP_FAST_REDUCTION_BARRIER1
7120#define kmp_reduction_barrier_gather_bb ((int)1)
7121#define kmp_reduction_barrier_release_bb ((int)1)
7122#define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
7123#define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
7124#endif // KMP_FAST_REDUCTION_BARRIER
7125 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
7126 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
7127 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
7128 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
7129 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
7130#if KMP_FAST_REDUCTION_BARRIER1
7131 if (i == bs_reduction_barrier) { // tested and confirmed on ALTIX only (
7132 // lin_64 ): hyper,1
7133 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
7134 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
7135 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
7136 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
7137 }
7138#endif // KMP_FAST_REDUCTION_BARRIER
7139 }
7140#if KMP_FAST_REDUCTION_BARRIER1
7141#undef kmp_reduction_barrier_release_pat
7142#undef kmp_reduction_barrier_gather_pat
7143#undef kmp_reduction_barrier_release_bb
7144#undef kmp_reduction_barrier_gather_bb
7145#endif // KMP_FAST_REDUCTION_BARRIER
7146#if KMP_MIC_SUPPORTED((0 || 1) && (1 || 0))
7147 if (__kmp_mic_type == mic2) { // KNC
7148 // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC
7149 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3; // plain gather
7150 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
7151 1; // forkjoin release
7152 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7153 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7154 }
7155#if KMP_FAST_REDUCTION_BARRIER1
7156 if (__kmp_mic_type == mic2) { // KNC
7157 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7158 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7159 }
7160#endif // KMP_FAST_REDUCTION_BARRIER
7161#endif // KMP_MIC_SUPPORTED
7162
7163// From KMP_CHECKS initialization
7164#ifdef KMP_DEBUG1
7165 __kmp_env_checks = TRUE(!0); /* development versions have the extra checks */
7166#else
7167 __kmp_env_checks = FALSE0; /* port versions do not have the extra checks */
7168#endif
7169
7170 // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization
7171 __kmp_foreign_tp = TRUE(!0);
7172
7173 __kmp_global.g.g_dynamic = FALSE0;
7174 __kmp_global.g.g_dynamic_mode = dynamic_default;
7175
7176 __kmp_init_nesting_mode();
7177
7178 __kmp_env_initialize(NULL__null);
7179
7180#if KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0) || KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0)
7181 __kmp_user_level_mwait_init();
7182#endif
7183// Print all messages in message catalog for testing purposes.
7184#ifdef KMP_DEBUG1
7185 char const *val = __kmp_env_get("KMP_DUMP_CATALOG");
7186 if (__kmp_str_match_true(val)) {
7187 kmp_str_buf_t buffer;
7188 __kmp_str_buf_init(&buffer){ (&buffer)->str = (&buffer)->bulk; (&buffer
)->size = sizeof((&buffer)->bulk); (&buffer)->
used = 0; (&buffer)->bulk[0] = 0; }
;
7189 __kmp_i18n_dump_catalog(&buffer);
7190 __kmp_printf("%s", buffer.str);
7191 __kmp_str_buf_free(&buffer);
7192 }
7193 __kmp_env_free(&val);
7194#endif
7195
7196 __kmp_threads_capacity =
7197 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
7198 // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part
7199 __kmp_tp_capacity = __kmp_default_tp_capacity(
7200 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
7201
7202 // If the library is shut down properly, both pools must be NULL. Just in
7203 // case, set them to NULL -- some memory may leak, but subsequent code will
7204 // work even if pools are not freed.
7205 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL)if (!(__kmp_thread_pool == __null)) { __kmp_debug_assert("__kmp_thread_pool == __null"
, "openmp/runtime/src/kmp_runtime.cpp", 7205); }
;
7206 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL)if (!(__kmp_thread_pool_insert_pt == __null)) { __kmp_debug_assert
("__kmp_thread_pool_insert_pt == __null", "openmp/runtime/src/kmp_runtime.cpp"
, 7206); }
;
7207 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL)if (!(__kmp_team_pool == __null)) { __kmp_debug_assert("__kmp_team_pool == __null"
, "openmp/runtime/src/kmp_runtime.cpp", 7207); }
;
7208 __kmp_thread_pool = NULL__null;
7209 __kmp_thread_pool_insert_pt = NULL__null;
7210 __kmp_team_pool = NULL__null;
7211
7212 /* Allocate all of the variable sized records */
7213 /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are
7214 * expandable */
7215 /* Since allocation is cache-aligned, just add extra padding at the end */
7216 size =
7217 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * __kmp_threads_capacity +
7218 CACHE_LINE64;
7219 __kmp_threads = (kmp_info_t **)__kmp_allocate(size)___kmp_allocate((size), "openmp/runtime/src/kmp_runtime.cpp",
7219)
;
7220 __kmp_root = (kmp_root_t **)((char *)__kmp_threads +
7221 sizeof(kmp_info_t *) * __kmp_threads_capacity);
7222
7223 /* init thread counts */
7224 KMP_DEBUG_ASSERT(__kmp_all_nth ==if (!(__kmp_all_nth == 0)) { __kmp_debug_assert("__kmp_all_nth == 0"
, "openmp/runtime/src/kmp_runtime.cpp", 7225); }
7225 0)if (!(__kmp_all_nth == 0)) { __kmp_debug_assert("__kmp_all_nth == 0"
, "openmp/runtime/src/kmp_runtime.cpp", 7225); }
; // Asserts fail if the library is reinitializing and
7226 KMP_DEBUG_ASSERT(__kmp_nth == 0)if (!(__kmp_nth == 0)) { __kmp_debug_assert("__kmp_nth == 0",
"openmp/runtime/src/kmp_runtime.cpp", 7226); }
; // something was wrong in termination.
7227 __kmp_all_nth = 0;
7228 __kmp_nth = 0;
7229
7230 /* setup the uber master thread and hierarchy */
7231 gtid = __kmp_register_root(TRUE(!0));
7232 KA_TRACE(10, ("__kmp_do_serial_initialize T#%d\n", gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_do_serial_initialize T#%d\n"
, gtid); }
;
7233 KMP_ASSERT(KMP_UBER_GTID(gtid))if (!(KMP_UBER_GTID(gtid))) { __kmp_debug_assert("KMP_UBER_GTID(gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 7233); }
;
7234 KMP_ASSERT(KMP_INITIAL_GTID(gtid))if (!((0 == (gtid)))) { __kmp_debug_assert("KMP_INITIAL_GTID(gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 7234); }
;
7235
7236 KMP_MB(); /* Flush all pending memory write invalidates. */
7237
7238 __kmp_common_initialize();
7239
7240#if KMP_OS_UNIX1
7241 /* invoke the child fork handler */
7242 __kmp_register_atfork();
7243#endif
7244
7245#if !KMP_DYNAMIC_LIB1
7246 {
7247 /* Invoke the exit handler when the program finishes, only for static
7248 library. For dynamic library, we already have _fini and DllMain. */
7249 int rc = atexit(__kmp_internal_end_atexit);
7250 if (rc != 0) {
7251 __kmp_fatal(KMP_MSG(FunctionError, "atexit()")__kmp_msg_format(kmp_i18n_msg_FunctionError, "atexit()"), KMP_ERR(rc)__kmp_msg_error_code(rc),
7252 __kmp_msg_null);
7253 }
7254 }
7255#endif
7256
7257#if KMP_HANDLE_SIGNALS(1 || 0)
7258#if KMP_OS_UNIX1
7259 /* NOTE: make sure that this is called before the user installs their own
7260 signal handlers so that the user handlers are called first. this way they
7261 can return false, not call our handler, avoid terminating the library, and
7262 continue execution where they left off. */
7263 __kmp_install_signals(FALSE0);
7264#endif /* KMP_OS_UNIX */
7265#if KMP_OS_WINDOWS0
7266 __kmp_install_signals(TRUE(!0));
7267#endif /* KMP_OS_WINDOWS */
7268#endif
7269
7270 /* we have finished the serial initialization */
7271 __kmp_init_counter++;
7272
7273 __kmp_init_serial = TRUE(!0);
7274
7275 if (__kmp_settings) {
7276 __kmp_env_print();
7277 }
7278
7279 if (__kmp_display_env || __kmp_display_env_verbose) {
7280 __kmp_env_print_2();
7281 }
7282
7283#if OMPT_SUPPORT1
7284 ompt_post_init();
7285#endif
7286
7287 KMP_MB();
7288
7289 KA_TRACE(10, ("__kmp_do_serial_initialize: exit\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_do_serial_initialize: exit\n"
); }
;
7290}
7291
7292void __kmp_serial_initialize(void) {
7293 if (__kmp_init_serial) {
7294 return;
7295 }
7296 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7297 if (__kmp_init_serial) {
7298 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7299 return;
7300 }
7301 __kmp_do_serial_initialize();
7302 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7303}
7304
7305static void __kmp_do_middle_initialize(void) {
7306 int i, j;
7307 int prev_dflt_team_nth;
7308
7309 if (!__kmp_init_serial) {
7310 __kmp_do_serial_initialize();
7311 }
7312
7313 KA_TRACE(10, ("__kmp_middle_initialize: enter\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_middle_initialize: enter\n"
); }
;
7314
7315 if (UNLIKELY(!__kmp_need_register_serial)__builtin_expect(!!(!__kmp_need_register_serial), 0)) {
7316 // We are in a forked child process. The registration was skipped during
7317 // serial initialization in __kmp_atfork_child handler. Do it here.
7318 __kmp_register_library_startup();
7319 }
7320
7321 // Save the previous value for the __kmp_dflt_team_nth so that
7322 // we can avoid some reinitialization if it hasn't changed.
7323 prev_dflt_team_nth = __kmp_dflt_team_nth;
7324
7325#if KMP_AFFINITY_SUPPORTED1
7326 // __kmp_affinity_initialize() will try to set __kmp_ncores to the
7327 // number of cores on the machine.
7328 __kmp_affinity_initialize(__kmp_affinity);
7329
7330#endif /* KMP_AFFINITY_SUPPORTED */
7331
7332 KMP_ASSERT(__kmp_xproc > 0)if (!(__kmp_xproc > 0)) { __kmp_debug_assert("__kmp_xproc > 0"
, "openmp/runtime/src/kmp_runtime.cpp", 7332); }
;
7333 if (__kmp_avail_proc == 0) {
7334 __kmp_avail_proc = __kmp_xproc;
7335 }
7336
7337 // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3),
7338 // correct them now
7339 j = 0;
7340 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
7341 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
7342 __kmp_avail_proc;
7343 j++;
7344 }
7345
7346 if (__kmp_dflt_team_nth == 0) {
7347#ifdef KMP_DFLT_NTH_CORES
7348 // Default #threads = #cores
7349 __kmp_dflt_team_nth = __kmp_ncores;
7350 KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
"__kmp_ncores (%d)\n", __kmp_dflt_team_nth); }
7351 "__kmp_ncores (%d)\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
"__kmp_ncores (%d)\n", __kmp_dflt_team_nth); }
7352 __kmp_dflt_team_nth))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
"__kmp_ncores (%d)\n", __kmp_dflt_team_nth); }
;
7353#else
7354 // Default #threads = #available OS procs
7355 __kmp_dflt_team_nth = __kmp_avail_proc;
7356 KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
"__kmp_avail_proc(%d)\n", __kmp_dflt_team_nth); }
7357 "__kmp_avail_proc(%d)\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
"__kmp_avail_proc(%d)\n", __kmp_dflt_team_nth); }
7358 __kmp_dflt_team_nth))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
"__kmp_avail_proc(%d)\n", __kmp_dflt_team_nth); }
;
7359#endif /* KMP_DFLT_NTH_CORES */
7360 }
7361
7362 if (__kmp_dflt_team_nth < KMP_MIN_NTH1) {
7363 __kmp_dflt_team_nth = KMP_MIN_NTH1;
7364 }
7365 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
7366 __kmp_dflt_team_nth = __kmp_sys_max_nth;
7367 }
7368
7369 if (__kmp_nesting_mode > 0)
7370 __kmp_set_nesting_mode_threads();
7371
7372 // There's no harm in continuing if the following check fails,
7373 // but it indicates an error in the previous logic.
7374 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub)if (!(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub)) { __kmp_debug_assert
("__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub", "openmp/runtime/src/kmp_runtime.cpp"
, 7374); }
;
7375
7376 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
7377 // Run through the __kmp_threads array and set the num threads icv for each
7378 // root thread that is currently registered with the RTL (which has not
7379 // already explicitly set its nthreads-var with a call to
7380 // omp_set_num_threads()).
7381 for (i = 0; i < __kmp_threads_capacity; i++) {
7382 kmp_info_t *thread = __kmp_threads[i];
7383 if (thread == NULL__null)
7384 continue;
7385 if (thread->th.th_current_task->td_icvs.nproc != 0)
7386 continue;
7387
7388 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth)(((__kmp_threads[i])->th.th_current_task->td_icvs.nproc
) = (__kmp_dflt_team_nth))
;
7389 }
7390 }
7391 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n"
, __kmp_dflt_team_nth); }
7392 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n"
, __kmp_dflt_team_nth); }
7393 ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n"
, __kmp_dflt_team_nth); }
7394 __kmp_dflt_team_nth))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n"
, __kmp_dflt_team_nth); }
;
7395
7396#ifdef KMP_ADJUST_BLOCKTIME1
7397 /* Adjust blocktime to zero if necessary now that __kmp_avail_proc is set */
7398 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
7399 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0)if (!(__kmp_avail_proc > 0)) { __kmp_debug_assert("__kmp_avail_proc > 0"
, "openmp/runtime/src/kmp_runtime.cpp", 7399); }
;
7400 if (__kmp_nth > __kmp_avail_proc) {
7401 __kmp_zero_bt = TRUE(!0);
7402 }
7403 }
7404#endif /* KMP_ADJUST_BLOCKTIME */
7405
7406 /* we have finished middle initialization */
7407 TCW_SYNC_4(__kmp_init_middle, TRUE)(__kmp_init_middle) = ((!0));
7408
7409 KA_TRACE(10, ("__kmp_do_middle_initialize: exit\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_do_middle_initialize: exit\n"
); }
;
7410}
7411
7412void __kmp_middle_initialize(void) {
7413 if (__kmp_init_middle) {
7414 return;
7415 }
7416 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7417 if (__kmp_init_middle) {
7418 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7419 return;
7420 }
7421 __kmp_do_middle_initialize();
7422 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7423}
7424
7425void __kmp_parallel_initialize(void) {
7426 int gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg(); // this might be a new root
7427
7428 /* synchronize parallel initialization (for sibling) */
7429 if (TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
7430 return;
7431 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7432 if (TCR_4(__kmp_init_parallel)(__kmp_init_parallel)) {
7433 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7434 return;
7435 }
7436
7437 /* TODO reinitialization after we have already shut down */
7438 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) {
7439 KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_parallel_initialize: attempt to init while shutting down\n"
); }
7440 10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_parallel_initialize: attempt to init while shutting down\n"
); }
7441 ("__kmp_parallel_initialize: attempt to init while shutting down\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_parallel_initialize: attempt to init while shutting down\n"
); }
;
7442 __kmp_infinite_loop();
7443 }
7444
7445 /* jc: The lock __kmp_initz_lock is already held, so calling
7446 __kmp_serial_initialize would cause a deadlock. So we call
7447 __kmp_do_serial_initialize directly. */
7448 if (!__kmp_init_middle) {
7449 __kmp_do_middle_initialize();
7450 }
7451 __kmp_assign_root_init_mask();
7452 __kmp_resume_if_hard_paused();
7453
7454 /* begin initialization */
7455 KA_TRACE(10, ("__kmp_parallel_initialize: enter\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_parallel_initialize: enter\n"
); }
;
7456 KMP_ASSERT(KMP_UBER_GTID(gtid))if (!(KMP_UBER_GTID(gtid))) { __kmp_debug_assert("KMP_UBER_GTID(gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 7456); }
;
7457
7458#if KMP_ARCH_X860 || KMP_ARCH_X86_641
7459 // Save the FP control regs.
7460 // Worker threads will set theirs to these values at thread startup.
7461 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7462 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7463 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK0xffffffc0;
7464#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
7465
7466#if KMP_OS_UNIX1
7467#if KMP_HANDLE_SIGNALS(1 || 0)
7468 /* must be after __kmp_serial_initialize */
7469 __kmp_install_signals(TRUE(!0));
7470#endif
7471#endif
7472
7473 __kmp_suspend_initialize();
7474
7475#if defined(USE_LOAD_BALANCE1)
7476 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7477 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7478 }
7479#else
7480 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7481 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7482 }
7483#endif
7484
7485 if (__kmp_version) {
7486 __kmp_print_version_2();
7487 }
7488
7489 /* we have finished parallel initialization */
7490 TCW_SYNC_4(__kmp_init_parallel, TRUE)(__kmp_init_parallel) = ((!0));
7491
7492 KMP_MB();
7493 KA_TRACE(10, ("__kmp_parallel_initialize: exit\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_parallel_initialize: exit\n"
); }
;
7494
7495 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7496}
7497
7498void __kmp_hidden_helper_initialize() {
7499 if (TCR_4(__kmp_init_hidden_helper)(__kmp_init_hidden_helper))
7500 return;
7501
7502 // __kmp_parallel_initialize is required before we initialize hidden helper
7503 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
7504 __kmp_parallel_initialize();
7505
7506 // Double check. Note that this double check should not be placed before
7507 // __kmp_parallel_initialize as it will cause dead lock.
7508 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7509 if (TCR_4(__kmp_init_hidden_helper)(__kmp_init_hidden_helper)) {
7510 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7511 return;
7512 }
7513
7514#if KMP_AFFINITY_SUPPORTED1
7515 // Initialize hidden helper affinity settings.
7516 // The above __kmp_parallel_initialize() will initialize
7517 // regular affinity (and topology) if not already done.
7518 if (!__kmp_hh_affinity.flags.initialized)
7519 __kmp_affinity_initialize(__kmp_hh_affinity);
7520#endif
7521
7522 // Set the count of hidden helper tasks to be executed to zero
7523 KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0)(&__kmp_unexecuted_hidden_helper_tasks)->store(0, std::
memory_order_release)
;
7524
7525 // Set the global variable indicating that we're initializing hidden helper
7526 // team/threads
7527 TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE)(__kmp_init_hidden_helper_threads) = ((!0));
7528
7529 // Platform independent initialization
7530 __kmp_do_initialize_hidden_helper_threads();
7531
7532 // Wait here for the finish of initialization of hidden helper teams
7533 __kmp_hidden_helper_threads_initz_wait();
7534
7535 // We have finished hidden helper initialization
7536 TCW_SYNC_4(__kmp_init_hidden_helper, TRUE)(__kmp_init_hidden_helper) = ((!0));
7537
7538 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7539}
7540
7541/* ------------------------------------------------------------------------ */
7542
7543void __kmp_run_before_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
7544 kmp_team_t *team) {
7545 kmp_disp_t *dispatch;
7546
7547 KMP_MB();
7548
7549 /* none of the threads have encountered any constructs, yet. */
7550 this_thr->th.th_local.this_construct = 0;
7551#if KMP_CACHE_MANAGE
7552 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7553#endif /* KMP_CACHE_MANAGE */
7554 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch)((void *)(this_thr->th.th_dispatch));
7555 KMP_DEBUG_ASSERT(dispatch)if (!(dispatch)) { __kmp_debug_assert("dispatch", "openmp/runtime/src/kmp_runtime.cpp"
, 7555); }
;
7556 KMP_DEBUG_ASSERT(team->t.t_dispatch)if (!(team->t.t_dispatch)) { __kmp_debug_assert("team->t.t_dispatch"
, "openmp/runtime/src/kmp_runtime.cpp", 7556); }
;
7557 // KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[
7558 // this_thr->th.th_info.ds.ds_tid ] );
7559
7560 dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */
7561 dispatch->th_doacross_buf_idx = 0; // reset doacross dispatch buffer counter
7562 if (__kmp_env_consistency_check)
7563 __kmp_push_parallel(gtid, team->t.t_ident);
7564
7565 KMP_MB(); /* Flush all pending memory write invalidates. */
7566}
7567
7568void __kmp_run_after_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
7569 kmp_team_t *team) {
7570 if (__kmp_env_consistency_check)
7571 __kmp_pop_parallel(gtid, team->t.t_ident);
7572
7573 __kmp_finish_implicit_task(this_thr);
7574}
7575
7576int __kmp_invoke_task_func(int gtid) {
7577 int rc;
7578 int tid = __kmp_tid_from_gtid(gtid);
7579 kmp_info_t *this_thr = __kmp_threads[gtid];
7580 kmp_team_t *team = this_thr->th.th_team;
7581
7582 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7583#if USE_ITT_BUILD1
7584 if (__itt_stack_caller_create_ptr__kmp_itt_stack_caller_create_ptr__3_0) {
7585 // inform ittnotify about entering user's code
7586 if (team->t.t_stack_id != NULL__null) {
7587 __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id);
7588 } else {
7589 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL)if (!(team->t.t_parent->t.t_stack_id != __null)) { __kmp_debug_assert
("team->t.t_parent->t.t_stack_id != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 7589); }
;
7590 __kmp_itt_stack_callee_enter(
7591 (__itt_caller)team->t.t_parent->t.t_stack_id);
7592 }
7593 }
7594#endif /* USE_ITT_BUILD */
7595#if INCLUDE_SSC_MARKS(1 && 1)
7596 SSC_MARK_INVOKING()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0xd695) : "%ebx")
;
7597#endif
7598
7599#if OMPT_SUPPORT1
7600 void *dummy;
7601 void **exit_frame_p;
7602 ompt_data_t *my_task_data;
7603 ompt_data_t *my_parallel_data;
7604 int ompt_team_size;
7605
7606 if (ompt_enabled.enabled) {
7607 exit_frame_p = &(team->t.t_implicit_task_taskdata[tid]
7608 .ompt_task_info.frame.exit_frame.ptr);
7609 } else {
7610 exit_frame_p = &dummy;
7611 }
7612
7613 my_task_data =
7614 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7615 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7616 if (ompt_enabled.ompt_callback_implicit_task) {
7617 ompt_team_size = team->t.t_nproc;
7618 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
7619 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7620 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7621 OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info))->thread_num = __kmp_tid_from_gtid(gtid);
7622 }
7623#endif
7624
7625#if KMP_STATS_ENABLED0
7626 stats_state_e previous_state = KMP_GET_THREAD_STATE()((void)0);
7627 if (previous_state == stats_state_e::TEAMS_REGION) {
7628 KMP_PUSH_PARTITIONED_TIMER(OMP_teams)((void)0);
7629 } else {
7630 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel)((void)0);
7631 }
7632 KMP_SET_THREAD_STATE(IMPLICIT_TASK)((void)0);
7633#endif
7634
7635 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn)((void *)(team->t.t_pkfn)), gtid,
7636 tid, (int)team->t.t_argc, (void **)team->t.t_argv
7637#if OMPT_SUPPORT1
7638 ,
7639 exit_frame_p
7640#endif
7641 );
7642#if OMPT_SUPPORT1
7643 *exit_frame_p = NULL__null;
7644 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
7645#endif
7646
7647#if KMP_STATS_ENABLED0
7648 if (previous_state == stats_state_e::TEAMS_REGION) {
7649 KMP_SET_THREAD_STATE(previous_state)((void)0);
7650 }
7651 KMP_POP_PARTITIONED_TIMER()((void)0);
7652#endif
7653
7654#if USE_ITT_BUILD1
7655 if (__itt_stack_caller_create_ptr__kmp_itt_stack_caller_create_ptr__3_0) {
7656 // inform ittnotify about leaving user's code
7657 if (team->t.t_stack_id != NULL__null) {
7658 __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id);
7659 } else {
7660 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL)if (!(team->t.t_parent->t.t_stack_id != __null)) { __kmp_debug_assert
("team->t.t_parent->t.t_stack_id != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 7660); }
;
7661 __kmp_itt_stack_callee_leave(
7662 (__itt_caller)team->t.t_parent->t.t_stack_id);
7663 }
7664 }
7665#endif /* USE_ITT_BUILD */
7666 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7667
7668 return rc;
7669}
7670
7671void __kmp_teams_master(int gtid) {
7672 // This routine is called by all primary threads in teams construct
7673 kmp_info_t *thr = __kmp_threads[gtid];
7674 kmp_team_t *team = thr->th.th_team;
7675 ident_t *loc = team->t.t_ident;
7676 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7677 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask)if (!(thr->th.th_teams_microtask)) { __kmp_debug_assert("thr->th.th_teams_microtask"
, "openmp/runtime/src/kmp_runtime.cpp", 7677); }
;
7678 KMP_DEBUG_ASSERT(thr->th.th_set_nproc)if (!(thr->th.th_set_nproc)) { __kmp_debug_assert("thr->th.th_set_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 7678); }
;
7679 KA_TRACE(20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n"
, gtid, __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask
); }
7680 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n"
, gtid, __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask
); }
;
7681
7682 // This thread is a new CG root. Set up the proper variables.
7683 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(sizeof(kmp_cg_root_t))___kmp_allocate((sizeof(kmp_cg_root_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 7683)
;
7684 tmp->cg_root = thr; // Make thr the CG root
7685 // Init to thread limit stored when league primary threads were forked
7686 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7687 tmp->cg_nthreads = 1; // Init counter to one active thread, this one
7688 KA_TRACE(100, ("__kmp_teams_master: Thread %p created node %p and init"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_teams_master: Thread %p created node %p and init"
" cg_nthreads to 1\n", thr, tmp); }
7689 " cg_nthreads to 1\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_teams_master: Thread %p created node %p and init"
" cg_nthreads to 1\n", thr, tmp); }
7690 thr, tmp))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_teams_master: Thread %p created node %p and init"
" cg_nthreads to 1\n", thr, tmp); }
;
7691 tmp->up = thr->th.th_cg_roots;
7692 thr->th.th_cg_roots = tmp;
7693
7694// Launch league of teams now, but not let workers execute
7695// (they hang on fork barrier until next parallel)
7696#if INCLUDE_SSC_MARKS(1 && 1)
7697 SSC_MARK_FORKING()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0xd693) : "%ebx")
;
7698#endif
7699 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7700 (microtask_t)thr->th.th_teams_microtask, // "wrapped" task
7701 VOLATILE_CAST(launch_t)(launch_t) __kmp_invoke_task_func, NULL__null);
7702#if INCLUDE_SSC_MARKS(1 && 1)
7703 SSC_MARK_JOINING()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0xd694) : "%ebx")
;
7704#endif
7705 // If the team size was reduced from the limit, set it to the new size
7706 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7707 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7708 // AC: last parameter "1" eliminates join barrier which won't work because
7709 // worker threads are in a fork barrier waiting for more parallel regions
7710 __kmp_join_call(loc, gtid
7711#if OMPT_SUPPORT1
7712 ,
7713 fork_context_intel
7714#endif
7715 ,
7716 1);
7717}
7718
7719int __kmp_invoke_teams_master(int gtid) {
7720 kmp_info_t *this_thr = __kmp_threads[gtid];
7721 kmp_team_t *team = this_thr->th.th_team;
7722#if KMP_DEBUG1
7723 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7724 KMP_DEBUG_ASSERT((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==if (!((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn
== (void *)__kmp_teams_master)) { __kmp_debug_assert("(void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn == (void *)__kmp_teams_master"
, "openmp/runtime/src/kmp_runtime.cpp", 7725); }
7725 (void *)__kmp_teams_master)if (!((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn
== (void *)__kmp_teams_master)) { __kmp_debug_assert("(void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn == (void *)__kmp_teams_master"
, "openmp/runtime/src/kmp_runtime.cpp", 7725); }
;
7726#endif
7727 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7728#if OMPT_SUPPORT1
7729 int tid = __kmp_tid_from_gtid(gtid);
7730 ompt_data_t *task_data =
7731 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7732 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7733 if (ompt_enabled.ompt_callback_implicit_task) {
7734 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
7735 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7736 ompt_task_initial);
7737 OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info))->thread_num = tid;
7738 }
7739#endif
7740 __kmp_teams_master(gtid);
7741#if OMPT_SUPPORT1
7742 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
7743#endif
7744 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7745 return 1;
7746}
7747
7748/* this sets the requested number of threads for the next parallel region
7749 encountered by this team. since this should be enclosed in the forkjoin
7750 critical section it should avoid race conditions with asymmetrical nested
7751 parallelism */
7752
7753void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) {
7754 kmp_info_t *thr = __kmp_threads[gtid];
7755
7756 if (num_threads > 0)
7757 thr->th.th_set_nproc = num_threads;
7758}
7759
7760static void __kmp_push_thread_limit(kmp_info_t *thr, int num_teams,
7761 int num_threads) {
7762 KMP_DEBUG_ASSERT(thr)if (!(thr)) { __kmp_debug_assert("thr", "openmp/runtime/src/kmp_runtime.cpp"
, 7762); }
;
7763 // Remember the number of threads for inner parallel regions
7764 if (!TCR_4(__kmp_init_middle)(__kmp_init_middle))
7765 __kmp_middle_initialize(); // get internal globals calculated
7766 __kmp_assign_root_init_mask();
7767 KMP_DEBUG_ASSERT(__kmp_avail_proc)if (!(__kmp_avail_proc)) { __kmp_debug_assert("__kmp_avail_proc"
, "openmp/runtime/src/kmp_runtime.cpp", 7767); }
;
7768 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth)if (!(__kmp_dflt_team_nth)) { __kmp_debug_assert("__kmp_dflt_team_nth"
, "openmp/runtime/src/kmp_runtime.cpp", 7768); }
;
7769
7770 if (num_threads == 0) {
7771 if (__kmp_teams_thread_limit > 0) {
7772 num_threads = __kmp_teams_thread_limit;
7773 } else {
7774 num_threads = __kmp_avail_proc / num_teams;
7775 }
7776 // adjust num_threads w/o warning as it is not user setting
7777 // num_threads = min(num_threads, nthreads-var, thread-limit-var)
7778 // no thread_limit clause specified - do not change thread-limit-var ICV
7779 if (num_threads > __kmp_dflt_team_nth) {
7780 num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV
7781 }
7782 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7783 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7784 } // prevent team size to exceed thread-limit-var
7785 if (num_teams * num_threads > __kmp_teams_max_nth) {
7786 num_threads = __kmp_teams_max_nth / num_teams;
7787 }
7788 if (num_threads == 0) {
7789 num_threads = 1;
7790 }
7791 } else {
7792 if (num_threads < 0) {
7793 __kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, 1)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, num_threads, 1
)
,
7794 __kmp_msg_null);
7795 num_threads = 1;
7796 }
7797 // This thread will be the primary thread of the league primary threads
7798 // Store new thread limit; old limit is saved in th_cg_roots list
7799 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7800 // num_threads = min(num_threads, nthreads-var)
7801 if (num_threads > __kmp_dflt_team_nth) {
7802 num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV
7803 }
7804 if (num_teams * num_threads > __kmp_teams_max_nth) {
7805 int new_threads = __kmp_teams_max_nth / num_teams;
7806 if (new_threads == 0) {
7807 new_threads = 1;
7808 }
7809 if (new_threads != num_threads) {
7810 if (!__kmp_reserve_warn) { // user asked for too many threads
7811 __kmp_reserve_warn = 1; // conflicts with KMP_TEAMS_THREAD_LIMIT
7812 __kmp_msg(kmp_ms_warning,
7813 KMP_MSG(CantFormThrTeam, num_threads, new_threads)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, num_threads, new_threads
)
,
7814 KMP_HNT(Unset_ALL_THREADS)__kmp_msg_format(kmp_i18n_hnt_Unset_ALL_THREADS), __kmp_msg_null);
7815 }
7816 }
7817 num_threads = new_threads;
7818 }
7819 }
7820 thr->th.th_teams_size.nth = num_threads;
7821}
7822
7823/* this sets the requested number of teams for the teams region and/or
7824 the number of threads for the next parallel region encountered */
7825void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
7826 int num_threads) {
7827 kmp_info_t *thr = __kmp_threads[gtid];
7828 if (num_teams < 0) {
7829 // OpenMP specification requires requested values to be positive,
7830 // but people can send us any value, so we'd better check
7831 __kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, 1)__kmp_msg_format(kmp_i18n_msg_NumTeamsNotPositive, num_teams,
1)
,
7832 __kmp_msg_null);
7833 num_teams = 1;
7834 }
7835 if (num_teams == 0) {
7836 if (__kmp_nteams > 0) {
7837 num_teams = __kmp_nteams;
7838 } else {
7839 num_teams = 1; // default number of teams is 1.
7840 }
7841 }
7842 if (num_teams > __kmp_teams_max_nth) { // if too many teams requested?
7843 if (!__kmp_reserve_warn) {
7844 __kmp_reserve_warn = 1;
7845 __kmp_msg(kmp_ms_warning,
7846 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, num_teams, __kmp_teams_max_nth
)
,
7847 KMP_HNT(Unset_ALL_THREADS)__kmp_msg_format(kmp_i18n_hnt_Unset_ALL_THREADS), __kmp_msg_null);
7848 }
7849 num_teams = __kmp_teams_max_nth;
7850 }
7851 // Set number of teams (number of threads in the outer "parallel" of the
7852 // teams)
7853 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7854
7855 __kmp_push_thread_limit(thr, num_teams, num_threads);
7856}
7857
7858/* This sets the requested number of teams for the teams region and/or
7859 the number of threads for the next parallel region encountered */
7860void __kmp_push_num_teams_51(ident_t *id, int gtid, int num_teams_lb,
7861 int num_teams_ub, int num_threads) {
7862 kmp_info_t *thr = __kmp_threads[gtid];
7863 KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0)if (!(num_teams_lb >= 0 && num_teams_ub >= 0)) {
__kmp_debug_assert("num_teams_lb >= 0 && num_teams_ub >= 0"
, "openmp/runtime/src/kmp_runtime.cpp", 7863); }
;
7864 KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb)if (!(num_teams_ub >= num_teams_lb)) { __kmp_debug_assert(
"num_teams_ub >= num_teams_lb", "openmp/runtime/src/kmp_runtime.cpp"
, 7864); }
;
7865 KMP_DEBUG_ASSERT(num_threads >= 0)if (!(num_threads >= 0)) { __kmp_debug_assert("num_threads >= 0"
, "openmp/runtime/src/kmp_runtime.cpp", 7865); }
;
7866
7867 if (num_teams_lb > num_teams_ub) {
7868 __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub)__kmp_msg_format(kmp_i18n_msg_FailedToCreateTeam, num_teams_lb
, num_teams_ub)
,
7869 KMP_HNT(SetNewBound, __kmp_teams_max_nth)__kmp_msg_format(kmp_i18n_hnt_SetNewBound, __kmp_teams_max_nth
)
, __kmp_msg_null);
7870 }
7871
7872 int num_teams = 1; // defalt number of teams is 1.
7873
7874 if (num_teams_lb == 0 && num_teams_ub > 0)
7875 num_teams_lb = num_teams_ub;
7876
7877 if (num_teams_lb == 0 && num_teams_ub == 0) { // no num_teams clause
7878 num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
7879 if (num_teams > __kmp_teams_max_nth) {
7880 if (!__kmp_reserve_warn) {
7881 __kmp_reserve_warn = 1;
7882 __kmp_msg(kmp_ms_warning,
7883 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, num_teams, __kmp_teams_max_nth
)
,
7884 KMP_HNT(Unset_ALL_THREADS)__kmp_msg_format(kmp_i18n_hnt_Unset_ALL_THREADS), __kmp_msg_null);
7885 }
7886 num_teams = __kmp_teams_max_nth;
7887 }
7888 } else if (num_teams_lb == num_teams_ub) { // requires exact number of teams
7889 num_teams = num_teams_ub;
7890 } else { // num_teams_lb <= num_teams <= num_teams_ub
7891 if (num_threads <= 0) {
7892 if (num_teams_ub > __kmp_teams_max_nth) {
7893 num_teams = num_teams_lb;
7894 } else {
7895 num_teams = num_teams_ub;
7896 }
7897 } else {
7898 num_teams = (num_threads > __kmp_teams_max_nth)
7899 ? num_teams
7900 : __kmp_teams_max_nth / num_threads;
7901 if (num_teams < num_teams_lb) {
7902 num_teams = num_teams_lb;
7903 } else if (num_teams > num_teams_ub) {
7904 num_teams = num_teams_ub;
7905 }
7906 }
7907 }
7908 // Set number of teams (number of threads in the outer "parallel" of the
7909 // teams)
7910 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7911
7912 __kmp_push_thread_limit(thr, num_teams, num_threads);
7913}
7914
7915// Set the proc_bind var to use in the following parallel region.
7916void __kmp_push_proc_bind(ident_t *id, int gtid, kmp_proc_bind_t proc_bind) {
7917 kmp_info_t *thr = __kmp_threads[gtid];
7918 thr->th.th_set_proc_bind = proc_bind;
7919}
7920
7921/* Launch the worker threads into the microtask. */
7922
7923void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team) {
7924 kmp_info_t *this_thr = __kmp_threads[gtid];
7925
7926#ifdef KMP_DEBUG1
7927 int f;
7928#endif /* KMP_DEBUG */
7929
7930 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 7930); }
;
7931 KMP_DEBUG_ASSERT(this_thr->th.th_team == team)if (!(this_thr->th.th_team == team)) { __kmp_debug_assert(
"this_thr->th.th_team == team", "openmp/runtime/src/kmp_runtime.cpp"
, 7931); }
;
7932 KMP_ASSERT(KMP_MASTER_GTID(gtid))if (!((0 == __kmp_tid_from_gtid((gtid))))) { __kmp_debug_assert
("KMP_MASTER_GTID(gtid)", "openmp/runtime/src/kmp_runtime.cpp"
, 7932); }
;
7933 KMP_MB(); /* Flush all pending memory write invalidates. */
7934
7935 team->t.t_construct = 0; /* no single directives seen yet */
7936 team->t.t_ordered.dt.t_value =
7937 0; /* thread 0 enters the ordered section first */
7938
7939 /* Reset the identifiers on the dispatch buffer */
7940 KMP_DEBUG_ASSERT(team->t.t_disp_buffer)if (!(team->t.t_disp_buffer)) { __kmp_debug_assert("team->t.t_disp_buffer"
, "openmp/runtime/src/kmp_runtime.cpp", 7940); }
;
7941 if (team->t.t_max_nproc > 1) {
7942 int i;
7943 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7944 team->t.t_disp_buffer[i].buffer_index = i;
7945 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7946 }
7947 } else {
7948 team->t.t_disp_buffer[0].buffer_index = 0;
7949 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7950 }
7951
7952 KMP_MB(); /* Flush all pending memory write invalidates. */
7953 KMP_ASSERT(this_thr->th.th_team == team)if (!(this_thr->th.th_team == team)) { __kmp_debug_assert(
"this_thr->th.th_team == team", "openmp/runtime/src/kmp_runtime.cpp"
, 7953); }
;
7954
7955#ifdef KMP_DEBUG1
7956 for (f = 0; f < team->t.t_nproc; f++) {
7957 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&if (!(team->t.t_threads[f] && team->t.t_threads
[f]->th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("team->t.t_threads[f] && team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 7958); }
7958 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc)if (!(team->t.t_threads[f] && team->t.t_threads
[f]->th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("team->t.t_threads[f] && team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 7958); }
;
7959 }
7960#endif /* KMP_DEBUG */
7961
7962 /* release the worker threads so they may begin working */
7963 __kmp_fork_barrier(gtid, 0);
7964}
7965
7966void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) {
7967 kmp_info_t *this_thr = __kmp_threads[gtid];
7968
7969 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 7969); }
;
7970 KMP_DEBUG_ASSERT(this_thr->th.th_team == team)if (!(this_thr->th.th_team == team)) { __kmp_debug_assert(
"this_thr->th.th_team == team", "openmp/runtime/src/kmp_runtime.cpp"
, 7970); }
;
7971 KMP_ASSERT(KMP_MASTER_GTID(gtid))if (!((0 == __kmp_tid_from_gtid((gtid))))) { __kmp_debug_assert
("KMP_MASTER_GTID(gtid)", "openmp/runtime/src/kmp_runtime.cpp"
, 7971); }
;
7972 KMP_MB(); /* Flush all pending memory write invalidates. */
7973
7974 /* Join barrier after fork */
7975
7976#ifdef KMP_DEBUG1
7977 if (__kmp_threads[gtid] &&
7978 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
7979 __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
7980 __kmp_threads[gtid]);
7981 __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
7982 "team->t.t_nproc=%d\n",
7983 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
7984 team->t.t_nproc);
7985 __kmp_print_structure();
7986 }
7987 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&if (!(__kmp_threads[gtid] && __kmp_threads[gtid]->
th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 7988); }
7988 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc)if (!(__kmp_threads[gtid] && __kmp_threads[gtid]->
th.th_team_nproc == team->t.t_nproc)) { __kmp_debug_assert
("__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 7988); }
;
7989#endif /* KMP_DEBUG */
7990
7991 __kmp_join_barrier(gtid); /* wait for everyone */
7992#if OMPT_SUPPORT1
7993 if (ompt_enabled.enabled &&
7994 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
7995 int ds_tid = this_thr->th.th_info.ds.ds_tid;
7996 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data
))
;
7997 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
7998#if OMPT_OPTIONAL1
7999 void *codeptr = NULL__null;
8000 if (KMP_MASTER_TID(ds_tid)(0 == (ds_tid)) &&
8001 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback ||
8002 ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback))
8003 codeptr = OMPT_CUR_TEAM_INFO(this_thr)(&(this_thr->th.th_team->t.ompt_team_info))->master_return_address;
8004
8005 if (ompt_enabled.ompt_callback_sync_region_wait) {
8006 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback(
8007 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL__null, task_data,
8008 codeptr);
8009 }
8010 if (ompt_enabled.ompt_callback_sync_region) {
8011 ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback(
8012 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL__null, task_data,
8013 codeptr);
8014 }
8015#endif
8016 if (!KMP_MASTER_TID(ds_tid)(0 == (ds_tid)) && ompt_enabled.ompt_callback_implicit_task) {
8017 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
8018 ompt_scope_end, NULL__null, task_data, 0, ds_tid,
8019 ompt_task_implicit); // TODO: Can this be ompt_task_initial?
8020 }
8021 }
8022#endif
8023
8024 KMP_MB(); /* Flush all pending memory write invalidates. */
8025 KMP_ASSERT(this_thr->th.th_team == team)if (!(this_thr->th.th_team == team)) { __kmp_debug_assert(
"this_thr->th.th_team == team", "openmp/runtime/src/kmp_runtime.cpp"
, 8025); }
;
8026}
8027
8028/* ------------------------------------------------------------------------ */
8029
8030#ifdef USE_LOAD_BALANCE1
8031
8032// Return the worker threads actively spinning in the hot team, if we
8033// are at the outermost level of parallelism. Otherwise, return 0.
8034static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
8035 int i;
8036 int retval;
8037 kmp_team_t *hot_team;
8038
8039 if (root->r.r_active) {
8040 return 0;
8041 }
8042 hot_team = root->r.r_hot_team;
8043 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME(2147483647)) {
8044 return hot_team->t.t_nproc - 1; // Don't count primary thread
8045 }
8046
8047 // Skip the primary thread - it is accounted for elsewhere.
8048 retval = 0;
8049 for (i = 1; i < hot_team->t.t_nproc; i++) {
8050 if (hot_team->t.t_threads[i]->th.th_active) {
8051 retval++;
8052 }
8053 }
8054 return retval;
8055}
8056
8057// Perform an automatic adjustment to the number of
8058// threads used by the next parallel region.
8059static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc) {
8060 int retval;
8061 int pool_active;
8062 int hot_team_active;
8063 int team_curr_active;
8064 int system_active;
8065
8066 KB_TRACE(20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,if (kmp_b_debug >= 20) { __kmp_debug_printf ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n"
, root, set_nproc); }
8067 set_nproc))if (kmp_b_debug >= 20) { __kmp_debug_printf ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n"
, root, set_nproc); }
;
8068 KMP_DEBUG_ASSERT(root)if (!(root)) { __kmp_debug_assert("root", "openmp/runtime/src/kmp_runtime.cpp"
, 8068); }
;
8069 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]if (!(root->r.r_root_team->t.t_threads[0] ->th.th_current_task
->td_icvs.dynamic == (!0))) { __kmp_debug_assert("root->r.r_root_team->t.t_threads[0] ->th.th_current_task->td_icvs.dynamic == (!0)"
, "openmp/runtime/src/kmp_runtime.cpp", 8070); }
8070 ->th.th_current_task->td_icvs.dynamic == TRUE)if (!(root->r.r_root_team->t.t_threads[0] ->th.th_current_task
->td_icvs.dynamic == (!0))) { __kmp_debug_assert("root->r.r_root_team->t.t_threads[0] ->th.th_current_task->td_icvs.dynamic == (!0)"
, "openmp/runtime/src/kmp_runtime.cpp", 8070); }
;
8071 KMP_DEBUG_ASSERT(set_nproc > 1)if (!(set_nproc > 1)) { __kmp_debug_assert("set_nproc > 1"
, "openmp/runtime/src/kmp_runtime.cpp", 8071); }
;
8072
8073 if (set_nproc == 1) {
8074 KB_TRACE(20, ("__kmp_load_balance_nproc: serial execution.\n"))if (kmp_b_debug >= 20) { __kmp_debug_printf ("__kmp_load_balance_nproc: serial execution.\n"
); }
;
8075 return 1;
8076 }
8077
8078 // Threads that are active in the thread pool, active in the hot team for this
8079 // particular root (if we are at the outer par level), and the currently
8080 // executing thread (to become the primary thread) are available to add to the
8081 // new team, but are currently contributing to the system load, and must be
8082 // accounted for.
8083 pool_active = __kmp_thread_pool_active_nth;
8084 hot_team_active = __kmp_active_hot_team_nproc(root);
8085 team_curr_active = pool_active + hot_team_active + 1;
8086
8087 // Check the system load.
8088 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
8089 KB_TRACE(30, ("__kmp_load_balance_nproc: system active = %d pool active = %d "if (kmp_b_debug >= 30) { __kmp_debug_printf ("__kmp_load_balance_nproc: system active = %d pool active = %d "
"hot team active = %d\n", system_active, pool_active, hot_team_active
); }
8090 "hot team active = %d\n",if (kmp_b_debug >= 30) { __kmp_debug_printf ("__kmp_load_balance_nproc: system active = %d pool active = %d "
"hot team active = %d\n", system_active, pool_active, hot_team_active
); }
8091 system_active, pool_active, hot_team_active))if (kmp_b_debug >= 30) { __kmp_debug_printf ("__kmp_load_balance_nproc: system active = %d pool active = %d "
"hot team active = %d\n", system_active, pool_active, hot_team_active
); }
;
8092
8093 if (system_active < 0) {
8094 // There was an error reading the necessary info from /proc, so use the
8095 // thread limit algorithm instead. Once we set __kmp_global.g.g_dynamic_mode
8096 // = dynamic_thread_limit, we shouldn't wind up getting back here.
8097 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
8098 KMP_WARNING(CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit")__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_CantLoadBalUsing
, "KMP_DYNAMIC_MODE=thread limit"), __kmp_msg_null)
;
8099
8100 // Make this call behave like the thread limit algorithm.
8101 retval = __kmp_avail_proc - __kmp_nth +
8102 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
8103 if (retval > set_nproc) {
8104 retval = set_nproc;
8105 }
8106 if (retval < KMP_MIN_NTH1) {
8107 retval = KMP_MIN_NTH1;
8108 }
8109
8110 KB_TRACE(20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n",if (kmp_b_debug >= 20) { __kmp_debug_printf ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n"
, retval); }
8111 retval))if (kmp_b_debug >= 20) { __kmp_debug_printf ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n"
, retval); }
;
8112 return retval;
8113 }
8114
8115 // There is a slight delay in the load balance algorithm in detecting new
8116 // running procs. The real system load at this instant should be at least as
8117 // large as the #active omp thread that are available to add to the team.
8118 if (system_active < team_curr_active) {
8119 system_active = team_curr_active;
8120 }
8121 retval = __kmp_avail_proc - system_active + team_curr_active;
8122 if (retval > set_nproc) {
8123 retval = set_nproc;
8124 }
8125 if (retval < KMP_MIN_NTH1) {
8126 retval = KMP_MIN_NTH1;
8127 }
8128
8129 KB_TRACE(20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval))if (kmp_b_debug >= 20) { __kmp_debug_printf ("__kmp_load_balance_nproc: exit. retval:%d\n"
, retval); }
;
8130 return retval;
8131} // __kmp_load_balance_nproc()
8132
8133#endif /* USE_LOAD_BALANCE */
8134
8135/* ------------------------------------------------------------------------ */
8136
8137/* NOTE: this is called with the __kmp_init_lock held */
8138void __kmp_cleanup(void) {
8139 int f;
8140
8141 KA_TRACE(10, ("__kmp_cleanup: enter\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_cleanup: enter\n"
); }
;
8142
8143 if (TCR_4(__kmp_init_parallel)(__kmp_init_parallel)) {
8144#if KMP_HANDLE_SIGNALS(1 || 0)
8145 __kmp_remove_signals();
8146#endif
8147 TCW_4(__kmp_init_parallel, FALSE)(__kmp_init_parallel) = (0);
8148 }
8149
8150 if (TCR_4(__kmp_init_middle)(__kmp_init_middle)) {
8151#if KMP_AFFINITY_SUPPORTED1
8152 __kmp_affinity_uninitialize();
8153#endif /* KMP_AFFINITY_SUPPORTED */
8154 __kmp_cleanup_hierarchy();
8155 TCW_4(__kmp_init_middle, FALSE)(__kmp_init_middle) = (0);
8156 }
8157
8158 KA_TRACE(10, ("__kmp_cleanup: go serial cleanup\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_cleanup: go serial cleanup\n"
); }
;
8159
8160 if (__kmp_init_serial) {
8161 __kmp_runtime_destroy();
8162 __kmp_init_serial = FALSE0;
8163 }
8164
8165 __kmp_cleanup_threadprivate_caches();
8166
8167 for (f = 0; f < __kmp_threads_capacity; f++) {
8168 if (__kmp_root[f] != NULL__null) {
8169 __kmp_free(__kmp_root[f])___kmp_free((__kmp_root[f]), "openmp/runtime/src/kmp_runtime.cpp"
, 8169)
;
8170 __kmp_root[f] = NULL__null;
8171 }
8172 }
8173 __kmp_free(__kmp_threads)___kmp_free((__kmp_threads), "openmp/runtime/src/kmp_runtime.cpp"
, 8173)
;
8174 // __kmp_threads and __kmp_root were allocated at once, as single block, so
8175 // there is no need in freeing __kmp_root.
8176 __kmp_threads = NULL__null;
8177 __kmp_root = NULL__null;
8178 __kmp_threads_capacity = 0;
8179
8180 // Free old __kmp_threads arrays if they exist.
8181 kmp_old_threads_list_t *ptr = __kmp_old_threads_list;
8182 while (ptr) {
8183 kmp_old_threads_list_t *next = ptr->next;
8184 __kmp_free(ptr->threads)___kmp_free((ptr->threads), "openmp/runtime/src/kmp_runtime.cpp"
, 8184)
;
8185 __kmp_free(ptr)___kmp_free((ptr), "openmp/runtime/src/kmp_runtime.cpp", 8185
)
;
8186 ptr = next;
8187 }
8188
8189#if KMP_USE_DYNAMIC_LOCK1
8190 __kmp_cleanup_indirect_user_locks();
8191#else
8192 __kmp_cleanup_user_locks();
8193#endif
8194#if OMPD_SUPPORT1
8195 if (ompd_state) {
8196 __kmp_free(ompd_env_block)___kmp_free((ompd_env_block), "openmp/runtime/src/kmp_runtime.cpp"
, 8196)
;
8197 ompd_env_block = NULL__null;
8198 ompd_env_block_size = 0;
8199 }
8200#endif
8201
8202#if KMP_AFFINITY_SUPPORTED1
8203 KMP_INTERNAL_FREE(CCAST(char *, __kmp_cpuinfo_file))free(const_cast<char *>(__kmp_cpuinfo_file));
8204 __kmp_cpuinfo_file = NULL__null;
8205#endif /* KMP_AFFINITY_SUPPORTED */
8206
8207#if KMP_USE_ADAPTIVE_LOCKS(0 || 1) && !0
8208#if KMP_DEBUG_ADAPTIVE_LOCKS0
8209 __kmp_print_speculative_stats();
8210#endif
8211#endif
8212 KMP_INTERNAL_FREE(__kmp_nested_nth.nth)free(__kmp_nested_nth.nth);
8213 __kmp_nested_nth.nth = NULL__null;
8214 __kmp_nested_nth.size = 0;
8215 __kmp_nested_nth.used = 0;
8216 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types)free(__kmp_nested_proc_bind.bind_types);
8217 __kmp_nested_proc_bind.bind_types = NULL__null;
8218 __kmp_nested_proc_bind.size = 0;
8219 __kmp_nested_proc_bind.used = 0;
8220 if (__kmp_affinity_format) {
8221 KMP_INTERNAL_FREE(__kmp_affinity_format)free(__kmp_affinity_format);
8222 __kmp_affinity_format = NULL__null;
8223 }
8224
8225 __kmp_i18n_catclose();
8226
8227#if KMP_USE_HIER_SCHED0
8228 __kmp_hier_scheds.deallocate();
8229#endif
8230
8231#if KMP_STATS_ENABLED0
8232 __kmp_stats_fini();
8233#endif
8234
8235 KA_TRACE(10, ("__kmp_cleanup: exit\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_cleanup: exit\n"
); }
;
8236}
8237
8238/* ------------------------------------------------------------------------ */
8239
8240int __kmp_ignore_mppbeg(void) {
8241 char *env;
8242
8243 if ((env = getenv("KMP_IGNORE_MPPBEG")) != NULL__null) {
8244 if (__kmp_str_match_false(env))
8245 return FALSE0;
8246 }
8247 // By default __kmpc_begin() is no-op.
8248 return TRUE(!0);
8249}
8250
8251int __kmp_ignore_mppend(void) {
8252 char *env;
8253
8254 if ((env = getenv("KMP_IGNORE_MPPEND")) != NULL__null) {
8255 if (__kmp_str_match_false(env))
8256 return FALSE0;
8257 }
8258 // By default __kmpc_end() is no-op.
8259 return TRUE(!0);
8260}
8261
8262void __kmp_internal_begin(void) {
8263 int gtid;
8264 kmp_root_t *root;
8265
8266 /* this is a very important step as it will register new sibling threads
8267 and assign these new uber threads a new gtid */
8268 gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
8269 root = __kmp_threads[gtid]->th.th_root;
8270 KMP_ASSERT(KMP_UBER_GTID(gtid))if (!(KMP_UBER_GTID(gtid))) { __kmp_debug_assert("KMP_UBER_GTID(gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 8270); }
;
8271
8272 if (root->r.r_begin)
8273 return;
8274 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
8275 if (root->r.r_begin) {
8276 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8277 return;
8278 }
8279
8280 root->r.r_begin = TRUE(!0);
8281
8282 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8283}
8284
8285/* ------------------------------------------------------------------------ */
8286
8287void __kmp_user_set_library(enum library_type arg) {
8288 int gtid;
8289 kmp_root_t *root;
8290 kmp_info_t *thread;
8291
8292 /* first, make sure we are initialized so we can get our gtid */
8293
8294 gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
8295 thread = __kmp_threads[gtid];
8296
8297 root = thread->th.th_root;
8298
8299 KA_TRACE(20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n"
, gtid, arg, library_serial); }
8300 library_serial))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n"
, gtid, arg, library_serial); }
;
8301 if (root->r.r_in_parallel) { /* Must be called in serial section of top-level
8302 thread */
8303 KMP_WARNING(SetLibraryIncorrectCall)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_SetLibraryIncorrectCall
), __kmp_msg_null)
;
8304 return;
8305 }
8306
8307 switch (arg) {
8308 case library_serial:
8309 thread->th.th_set_nproc = 0;
8310 set__nproc(thread, 1)(((thread)->th.th_current_task->td_icvs.nproc) = (1));
8311 break;
8312 case library_turnaround:
8313 thread->th.th_set_nproc = 0;
8314 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth(((thread)->th.th_current_task->td_icvs.nproc) = (__kmp_dflt_team_nth
? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub))
8315 : __kmp_dflt_team_nth_ub)(((thread)->th.th_current_task->td_icvs.nproc) = (__kmp_dflt_team_nth
? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub))
;
8316 break;
8317 case library_throughput:
8318 thread->th.th_set_nproc = 0;
8319 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth(((thread)->th.th_current_task->td_icvs.nproc) = (__kmp_dflt_team_nth
? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub))
8320 : __kmp_dflt_team_nth_ub)(((thread)->th.th_current_task->td_icvs.nproc) = (__kmp_dflt_team_nth
? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub))
;
8321 break;
8322 default:
8323 KMP_FATAL(UnknownLibraryType, arg)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_UnknownLibraryType,
arg), __kmp_msg_null)
;
8324 }
8325
8326 __kmp_aux_set_library(arg);
8327}
8328
8329void __kmp_aux_set_stacksize(size_t arg) {
8330 if (!__kmp_init_serial)
8331 __kmp_serial_initialize();
8332
8333#if KMP_OS_DARWIN0
8334 if (arg & (0x1000 - 1)) {
8335 arg &= ~(0x1000 - 1);
8336 if (arg + 0x1000) /* check for overflow if we round up */
8337 arg += 0x1000;
8338 }
8339#endif
8340 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8341
8342 /* only change the default stacksize before the first parallel region */
8343 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel)) {
8344 size_t value = arg; /* argument is in bytes */
8345
8346 if (value < __kmp_sys_min_stksize)
8347 value = __kmp_sys_min_stksize;
8348 else if (value > KMP_MAX_STKSIZE(~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)
))
)
8349 value = KMP_MAX_STKSIZE(~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)
))
;
8350
8351 __kmp_stksize = value;
8352
8353 __kmp_env_stksize = TRUE(!0); /* was KMP_STACKSIZE specified? */
8354 }
8355
8356 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8357}
8358
8359/* set the behaviour of the runtime library */
8360/* TODO this can cause some odd behaviour with sibling parallelism... */
8361void __kmp_aux_set_library(enum library_type arg) {
8362 __kmp_library = arg;
8363
8364 switch (__kmp_library) {
8365 case library_serial: {
8366 KMP_INFORM(LibraryIsSerial)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_LibraryIsSerial
), __kmp_msg_null)
;
8367 } break;
8368 case library_turnaround:
8369 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
8370 __kmp_use_yield = 2; // only yield when oversubscribed
8371 break;
8372 case library_throughput:
8373 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME(2147483647))
8374 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME(__kmp_is_hybrid_cpu() ? (0) : (200));
8375 break;
8376 default:
8377 KMP_FATAL(UnknownLibraryType, arg)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_UnknownLibraryType,
arg), __kmp_msg_null)
;
8378 }
8379}
8380
8381/* Getting team information common for all team API */
8382// Returns NULL if not in teams construct
8383static kmp_team_t *__kmp_aux_get_team_info(int &teams_serialized) {
8384 kmp_info_t *thr = __kmp_entry_thread();
8385 teams_serialized = 0;
8386 if (thr->th.th_teams_microtask) {
8387 kmp_team_t *team = thr->th.th_team;
8388 int tlevel = thr->th.th_teams_level; // the level of the teams construct
8389 int ii = team->t.t_level;
8390 teams_serialized = team->t.t_serialized;
8391 int level = tlevel + 1;
8392 KMP_DEBUG_ASSERT(ii >= tlevel)if (!(ii >= tlevel)) { __kmp_debug_assert("ii >= tlevel"
, "openmp/runtime/src/kmp_runtime.cpp", 8392); }
;
8393 while (ii > level) {
8394 for (teams_serialized = team->t.t_serialized;
8395 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
8396 }
8397 if (team->t.t_serialized && (!teams_serialized)) {
8398 team = team->t.t_parent;
8399 continue;
8400 }
8401 if (ii > level) {
8402 team = team->t.t_parent;
8403 ii--;
8404 }
8405 }
8406 return team;
8407 }
8408 return NULL__null;
8409}
8410
8411int __kmp_aux_get_team_num() {
8412 int serialized;
8413 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8414 if (team) {
8415 if (serialized > 1) {
8416 return 0; // teams region is serialized ( 1 team of 1 thread ).
8417 } else {
8418 return team->t.t_master_tid;
8419 }
8420 }
8421 return 0;
8422}
8423
8424int __kmp_aux_get_num_teams() {
8425 int serialized;
8426 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8427 if (team) {
8428 if (serialized > 1) {
8429 return 1;
8430 } else {
8431 return team->t.t_parent->t.t_nproc;
8432 }
8433 }
8434 return 1;
8435}
8436
8437/* ------------------------------------------------------------------------ */
8438
8439/*
8440 * Affinity Format Parser
8441 *
8442 * Field is in form of: %[[[0].]size]type
8443 * % and type are required (%% means print a literal '%')
8444 * type is either single char or long name surrounded by {},
8445 * e.g., N or {num_threads}
8446 * 0 => leading zeros
8447 * . => right justified when size is specified
8448 * by default output is left justified
8449 * size is the *minimum* field length
8450 * All other characters are printed as is
8451 *
8452 * Available field types:
8453 * L {thread_level} - omp_get_level()
8454 * n {thread_num} - omp_get_thread_num()
8455 * h {host} - name of host machine
8456 * P {process_id} - process id (integer)
8457 * T {thread_identifier} - native thread identifier (integer)
8458 * N {num_threads} - omp_get_num_threads()
8459 * A {ancestor_tnum} - omp_get_ancestor_thread_num(omp_get_level()-1)
8460 * a {thread_affinity} - comma separated list of integers or integer ranges
8461 * (values of affinity mask)
8462 *
8463 * Implementation-specific field types can be added
8464 * If a type is unknown, print "undefined"
8465 */
8466
8467// Structure holding the short name, long name, and corresponding data type
8468// for snprintf. A table of these will represent the entire valid keyword
8469// field types.
8470typedef struct kmp_affinity_format_field_t {
8471 char short_name; // from spec e.g., L -> thread level
8472 const char *long_name; // from spec thread_level -> thread level
8473 char field_format; // data type for snprintf (typically 'd' or 's'
8474 // for integer or string)
8475} kmp_affinity_format_field_t;
8476
8477static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
8478#if KMP_AFFINITY_SUPPORTED1
8479 {'A', "thread_affinity", 's'},
8480#endif
8481 {'t', "team_num", 'd'},
8482 {'T', "num_teams", 'd'},
8483 {'L', "nesting_level", 'd'},
8484 {'n', "thread_num", 'd'},
8485 {'N', "num_threads", 'd'},
8486 {'a', "ancestor_tnum", 'd'},
8487 {'H', "host", 's'},
8488 {'P', "process_id", 'd'},
8489 {'i', "native_thread_id", 'd'}};
8490
8491// Return the number of characters it takes to hold field
8492static int __kmp_aux_capture_affinity_field(int gtid, const kmp_info_t *th,
8493 const char **ptr,
8494 kmp_str_buf_t *field_buffer) {
8495 int rc, format_index, field_value;
8496 const char *width_left, *width_right;
8497 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
8498 static const int FORMAT_SIZE = 20;
8499 char format[FORMAT_SIZE] = {0};
8500 char absolute_short_name = 0;
8501
8502 KMP_DEBUG_ASSERT(gtid >= 0)if (!(gtid >= 0)) { __kmp_debug_assert("gtid >= 0", "openmp/runtime/src/kmp_runtime.cpp"
, 8502); }
;
8503 KMP_DEBUG_ASSERT(th)if (!(th)) { __kmp_debug_assert("th", "openmp/runtime/src/kmp_runtime.cpp"
, 8503); }
;
8504 KMP_DEBUG_ASSERT(**ptr == '%')if (!(**ptr == '%')) { __kmp_debug_assert("**ptr == '%'", "openmp/runtime/src/kmp_runtime.cpp"
, 8504); }
;
8505 KMP_DEBUG_ASSERT(field_buffer)if (!(field_buffer)) { __kmp_debug_assert("field_buffer", "openmp/runtime/src/kmp_runtime.cpp"
, 8505); }
;
8506
8507 __kmp_str_buf_clear(field_buffer);
8508
8509 // Skip the initial %
8510 (*ptr)++;
8511
8512 // Check for %% first
8513 if (**ptr == '%') {
8514 __kmp_str_buf_cat(field_buffer, "%", 1);
8515 (*ptr)++; // skip over the second %
8516 return 1;
8517 }
8518
8519 // Parse field modifiers if they are present
8520 pad_zeros = false;
8521 if (**ptr == '0') {
8522 pad_zeros = true;
8523 (*ptr)++; // skip over 0
8524 }
8525 right_justify = false;
8526 if (**ptr == '.') {
8527 right_justify = true;
8528 (*ptr)++; // skip over .
8529 }
8530 // Parse width of field: [width_left, width_right)
8531 width_left = width_right = NULL__null;
8532 if (**ptr >= '0' && **ptr <= '9') {
8533 width_left = *ptr;
8534 SKIP_DIGITS(*ptr){ while (*(*ptr) >= '0' && *(*ptr) <= '9') (*ptr
)++; }
;
8535 width_right = *ptr;
8536 }
8537
8538 // Create the format for KMP_SNPRINTF based on flags parsed above
8539 format_index = 0;
8540 format[format_index++] = '%';
8541 if (!right_justify)
8542 format[format_index++] = '-';
8543 if (pad_zeros)
8544 format[format_index++] = '0';
8545 if (width_left && width_right) {
8546 int i = 0;
8547 // Only allow 8 digit number widths.
8548 // This also prevents overflowing format variable
8549 while (i < 8 && width_left < width_right) {
8550 format[format_index++] = *width_left;
8551 width_left++;
8552 i++;
8553 }
8554 }
8555
8556 // Parse a name (long or short)
8557 // Canonicalize the name into absolute_short_name
8558 found_valid_name = false;
8559 parse_long_name = (**ptr == '{');
8560 if (parse_long_name)
8561 (*ptr)++; // skip initial left brace
8562 for (size_t i = 0; i < sizeof(__kmp_affinity_format_table) /
8563 sizeof(__kmp_affinity_format_table[0]);
8564 ++i) {
8565 char short_name = __kmp_affinity_format_table[i].short_name;
8566 const char *long_name = __kmp_affinity_format_table[i].long_name;
8567 char field_format = __kmp_affinity_format_table[i].field_format;
8568 if (parse_long_name) {
8569 size_t length = KMP_STRLENstrlen(long_name);
8570 if (strncmp(*ptr, long_name, length) == 0) {
8571 found_valid_name = true;
8572 (*ptr) += length; // skip the long name
8573 }
8574 } else if (**ptr == short_name) {
8575 found_valid_name = true;
8576 (*ptr)++; // skip the short name
8577 }
8578 if (found_valid_name) {
8579 format[format_index++] = field_format;
8580 format[format_index++] = '\0';
8581 absolute_short_name = short_name;
8582 break;
8583 }
8584 }
8585 if (parse_long_name) {
8586 if (**ptr != '}') {
8587 absolute_short_name = 0;
8588 } else {
8589 (*ptr)++; // skip over the right brace
8590 }
8591 }
8592
8593 // Attempt to fill the buffer with the requested
8594 // value using snprintf within __kmp_str_buf_print()
8595 switch (absolute_short_name) {
8596 case 't':
8597 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
8598 break;
8599 case 'T':
8600 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
8601 break;
8602 case 'L':
8603 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8604 break;
8605 case 'n':
8606 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
8607 break;
8608 case 'H': {
8609 static const int BUFFER_SIZE = 256;
8610 char buf[BUFFER_SIZE];
8611 __kmp_expand_host_name(buf, BUFFER_SIZE);
8612 rc = __kmp_str_buf_print(field_buffer, format, buf);
8613 } break;
8614 case 'P':
8615 rc = __kmp_str_buf_print(field_buffer, format, getpid());
8616 break;
8617 case 'i':
8618 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid()syscall(186));
8619 break;
8620 case 'N':
8621 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8622 break;
8623 case 'a':
8624 field_value =
8625 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8626 rc = __kmp_str_buf_print(field_buffer, format, field_value);
8627 break;
8628#if KMP_AFFINITY_SUPPORTED1
8629 case 'A': {
8630 kmp_str_buf_t buf;
8631 __kmp_str_buf_init(&buf){ (&buf)->str = (&buf)->bulk; (&buf)->size
= sizeof((&buf)->bulk); (&buf)->used = 0; (&
buf)->bulk[0] = 0; }
;
8632 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8633 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8634 __kmp_str_buf_free(&buf);
8635 } break;
8636#endif
8637 default:
8638 // According to spec, If an implementation does not have info for field
8639 // type, then "undefined" is printed
8640 rc = __kmp_str_buf_print(field_buffer, "%s", "undefined");
8641 // Skip the field
8642 if (parse_long_name) {
8643 SKIP_TOKEN(*ptr){ while ((*(*ptr) >= '0' && *(*ptr) <= '9') || (
*(*ptr) >= 'a' && *(*ptr) <= 'z') || (*(*ptr) >=
'A' && *(*ptr) <= 'Z') || *(*ptr) == '_') (*ptr)++
; }
;
8644 if (**ptr == '}')
8645 (*ptr)++;
8646 } else {
8647 (*ptr)++;
8648 }
8649 }
8650
8651 KMP_ASSERT(format_index <= FORMAT_SIZE)if (!(format_index <= FORMAT_SIZE)) { __kmp_debug_assert("format_index <= FORMAT_SIZE"
, "openmp/runtime/src/kmp_runtime.cpp", 8651); }
;
8652 return rc;
8653}
8654
8655/*
8656 * Return number of characters needed to hold the affinity string
8657 * (not including null byte character)
8658 * The resultant string is printed to buffer, which the caller can then
8659 * handle afterwards
8660 */
8661size_t __kmp_aux_capture_affinity(int gtid, const char *format,
8662 kmp_str_buf_t *buffer) {
8663 const char *parse_ptr;
8664 size_t retval;
8665 const kmp_info_t *th;
8666 kmp_str_buf_t field;
8667
8668 KMP_DEBUG_ASSERT(buffer)if (!(buffer)) { __kmp_debug_assert("buffer", "openmp/runtime/src/kmp_runtime.cpp"
, 8668); }
;
8669 KMP_DEBUG_ASSERT(gtid >= 0)if (!(gtid >= 0)) { __kmp_debug_assert("gtid >= 0", "openmp/runtime/src/kmp_runtime.cpp"
, 8669); }
;
8670
8671 __kmp_str_buf_init(&field){ (&field)->str = (&field)->bulk; (&field)->
size = sizeof((&field)->bulk); (&field)->used =
0; (&field)->bulk[0] = 0; }
;
8672 __kmp_str_buf_clear(buffer);
8673
8674 th = __kmp_threads[gtid];
8675 retval = 0;
8676
8677 // If format is NULL or zero-length string, then we use
8678 // affinity-format-var ICV
8679 parse_ptr = format;
8680 if (parse_ptr == NULL__null || *parse_ptr == '\0') {
8681 parse_ptr = __kmp_affinity_format;
8682 }
8683 KMP_DEBUG_ASSERT(parse_ptr)if (!(parse_ptr)) { __kmp_debug_assert("parse_ptr", "openmp/runtime/src/kmp_runtime.cpp"
, 8683); }
;
8684
8685 while (*parse_ptr != '\0') {
8686 // Parse a field
8687 if (*parse_ptr == '%') {
8688 // Put field in the buffer
8689 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8690 __kmp_str_buf_catbuf(buffer, &field);
8691 retval += rc;
8692 } else {
8693 // Put literal character in buffer
8694 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8695 retval++;
8696 parse_ptr++;
8697 }
8698 }
8699 __kmp_str_buf_free(&field);
8700 return retval;
8701}
8702
8703// Displays the affinity string to stdout
8704void __kmp_aux_display_affinity(int gtid, const char *format) {
8705 kmp_str_buf_t buf;
8706 __kmp_str_buf_init(&buf){ (&buf)->str = (&buf)->bulk; (&buf)->size
= sizeof((&buf)->bulk); (&buf)->used = 0; (&
buf)->bulk[0] = 0; }
;
8707 __kmp_aux_capture_affinity(gtid, format, &buf);
8708 __kmp_fprintf(kmp_out, "%s" KMP_END_OF_LINE"\n", buf.str);
8709 __kmp_str_buf_free(&buf);
8710}
8711
8712/* ------------------------------------------------------------------------ */
8713
8714void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid) {
8715 int blocktime = arg; /* argument is in milliseconds */
8716#if KMP_USE_MONITOR
8717 int bt_intervals;
8718#endif
8719 kmp_int8 bt_set;
8720
8721 __kmp_save_internal_controls(thread);
8722
8723 /* Normalize and set blocktime for the teams */
8724 if (blocktime < KMP_MIN_BLOCKTIME(0))
8725 blocktime = KMP_MIN_BLOCKTIME(0);
8726 else if (blocktime > KMP_MAX_BLOCKTIME(2147483647))
8727 blocktime = KMP_MAX_BLOCKTIME(2147483647);
8728
8729 set__blocktime_team(thread->th.th_team, tid, blocktime)(((thread->th.th_team)->t.t_threads[(tid)]->th.th_current_task
->td_icvs.blocktime) = (blocktime))
;
8730 set__blocktime_team(thread->th.th_serial_team, 0, blocktime)(((thread->th.th_serial_team)->t.t_threads[(0)]->th.
th_current_task->td_icvs.blocktime) = (blocktime))
;
8731
8732#if KMP_USE_MONITOR
8733 /* Calculate and set blocktime intervals for the teams */
8734 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8735
8736 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8737 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8738#endif
8739
8740 /* Set whether blocktime has been set to "TRUE" */
8741 bt_set = TRUE(!0);
8742
8743 set__bt_set_team(thread->th.th_team, tid, bt_set)(((thread->th.th_team)->t.t_threads[(tid)]->th.th_current_task
->td_icvs.bt_set) = (bt_set))
;
8744 set__bt_set_team(thread->th.th_serial_team, 0, bt_set)(((thread->th.th_serial_team)->t.t_threads[(0)]->th.
th_current_task->td_icvs.bt_set) = (bt_set))
;
8745#if KMP_USE_MONITOR
8746 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
"bt_intervals=%d, monitor_updates=%d\n", __kmp_gtid_from_tid
(tid, thread->th.th_team), thread->th.th_team->t.t_id
, tid, blocktime, bt_intervals, __kmp_monitor_wakeups); }
8747 "bt_intervals=%d, monitor_updates=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
"bt_intervals=%d, monitor_updates=%d\n", __kmp_gtid_from_tid
(tid, thread->th.th_team), thread->th.th_team->t.t_id
, tid, blocktime, bt_intervals, __kmp_monitor_wakeups); }
8748 __kmp_gtid_from_tid(tid, thread->th.th_team),if (kmp_f_debug >= 10) { __kmp_debug_printf ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
"bt_intervals=%d, monitor_updates=%d\n", __kmp_gtid_from_tid
(tid, thread->th.th_team), thread->th.th_team->t.t_id
, tid, blocktime, bt_intervals, __kmp_monitor_wakeups); }
8749 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,if (kmp_f_debug >= 10) { __kmp_debug_printf ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
"bt_intervals=%d, monitor_updates=%d\n", __kmp_gtid_from_tid
(tid, thread->th.th_team), thread->th.th_team->t.t_id
, tid, blocktime, bt_intervals, __kmp_monitor_wakeups); }
8750 __kmp_monitor_wakeups))if (kmp_f_debug >= 10) { __kmp_debug_printf ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
"bt_intervals=%d, monitor_updates=%d\n", __kmp_gtid_from_tid
(tid, thread->th.th_team), thread->th.th_team->t.t_id
, tid, blocktime, bt_intervals, __kmp_monitor_wakeups); }
;
8751#else
8752 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n"
, __kmp_gtid_from_tid(tid, thread->th.th_team), thread->
th.th_team->t.t_id, tid, blocktime); }
8753 __kmp_gtid_from_tid(tid, thread->th.th_team),if (kmp_f_debug >= 10) { __kmp_debug_printf ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n"
, __kmp_gtid_from_tid(tid, thread->th.th_team), thread->
th.th_team->t.t_id, tid, blocktime); }
8754 thread->th.th_team->t.t_id, tid, blocktime))if (kmp_f_debug >= 10) { __kmp_debug_printf ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n"
, __kmp_gtid_from_tid(tid, thread->th.th_team), thread->
th.th_team->t.t_id, tid, blocktime); }
;
8755#endif
8756}
8757
8758void __kmp_aux_set_defaults(char const *str, size_t len) {
8759 if (!__kmp_init_serial) {
8760 __kmp_serial_initialize();
8761 }
8762 __kmp_env_initialize(str);
8763
8764 if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {
8765 __kmp_env_print();
8766 }
8767} // __kmp_aux_set_defaults
8768
8769/* ------------------------------------------------------------------------ */
8770/* internal fast reduction routines */
8771
8772PACKED_REDUCTION_METHOD_T
8773__kmp_determine_reduction_method(
8774 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
8775 void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
8776 kmp_critical_name *lck) {
8777
8778 // Default reduction method: critical construct ( lck != NULL, like in current
8779 // PAROPT )
8780 // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method
8781 // can be selected by RTL
8782 // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method
8783 // can be selected by RTL
8784 // Finally, it's up to OpenMP RTL to make a decision on which method to select
8785 // among generated by PAROPT.
8786
8787 PACKED_REDUCTION_METHOD_T retval;
8788
8789 int team_size;
8790
8791 KMP_DEBUG_ASSERT(loc)if (!(loc)) { __kmp_debug_assert("loc", "openmp/runtime/src/kmp_runtime.cpp"
, 8791); }
; // it would be nice to test ( loc != 0 )
8792 KMP_DEBUG_ASSERT(lck)if (!(lck)) { __kmp_debug_assert("lck", "openmp/runtime/src/kmp_runtime.cpp"
, 8792); }
; // it would be nice to test ( lck != 0 )
8793
8794#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8795 (loc && \
8796 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)))
8797#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8798
8799 retval = critical_reduce_block;
8800
8801 // another choice of getting a team size (with 1 dynamic deference) is slower
8802 team_size = __kmp_get_team_num_threads(global_tid)(__kmp_threads[(global_tid)]->th.th_team->t.t_nproc);
8803 if (team_size == 1) {
8804
8805 retval = empty_reduce_block;
8806
8807 } else {
8808
8809 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8810
8811#if KMP_ARCH_X86_641 || KMP_ARCH_PPC64(0 || 0) || KMP_ARCH_AARCH640 || \
8812 KMP_ARCH_MIPS640 || KMP_ARCH_RISCV640 || KMP_ARCH_LOONGARCH640
8813
8814#if KMP_OS_LINUX1 || KMP_OS_DRAGONFLY0 || KMP_OS_FREEBSD0 || KMP_OS_NETBSD0 || \
8815 KMP_OS_OPENBSD0 || KMP_OS_WINDOWS0 || KMP_OS_DARWIN0 || KMP_OS_HURD0
8816
8817 int teamsize_cutoff = 4;
8818
8819#if KMP_MIC_SUPPORTED((0 || 1) && (1 || 0))
8820 if (__kmp_mic_type != non_mic) {
8821 teamsize_cutoff = 8;
8822 }
8823#endif
8824 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8825 if (tree_available) {
8826 if (team_size <= teamsize_cutoff) {
8827 if (atomic_available) {
8828 retval = atomic_reduce_block;
8829 }
8830 } else {
8831 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER(((tree_reduce_block) | (bs_reduction_barrier)));
8832 }
8833 } else if (atomic_available) {
8834 retval = atomic_reduce_block;
8835 }
8836#else
8837#error "Unknown or unsupported OS"
8838#endif // KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||
8839 // KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
8840
8841#elif KMP_ARCH_X860 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS0
8842
8843#if KMP_OS_LINUX1 || KMP_OS_FREEBSD0 || KMP_OS_WINDOWS0 || KMP_OS_HURD0
8844
8845 // basic tuning
8846
8847 if (atomic_available) {
8848 if (num_vars <= 2) { // && ( team_size <= 8 ) due to false-sharing ???
8849 retval = atomic_reduce_block;
8850 }
8851 } // otherwise: use critical section
8852
8853#elif KMP_OS_DARWIN0
8854
8855 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8856 if (atomic_available && (num_vars <= 3)) {
8857 retval = atomic_reduce_block;
8858 } else if (tree_available) {
8859 if ((reduce_size > (9 * sizeof(kmp_real64))) &&
8860 (reduce_size < (2000 * sizeof(kmp_real64)))) {
8861 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER(((tree_reduce_block) | (bs_plain_barrier)));
8862 }
8863 } // otherwise: use critical section
8864
8865#else
8866#error "Unknown or unsupported OS"
8867#endif
8868
8869#else
8870#error "Unknown or unsupported architecture"
8871#endif
8872 }
8873
8874 // KMP_FORCE_REDUCTION
8875
8876 // If the team is serialized (team_size == 1), ignore the forced reduction
8877 // method and stay with the unsynchronized method (empty_reduce_block)
8878 if (__kmp_force_reduction_method != reduction_method_not_defined &&
8879 team_size != 1) {
8880
8881 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
8882
8883 int atomic_available, tree_available;
8884
8885 switch ((forced_retval = __kmp_force_reduction_method)) {
8886 case critical_reduce_block:
8887 KMP_ASSERT(lck)if (!(lck)) { __kmp_debug_assert("lck", "openmp/runtime/src/kmp_runtime.cpp"
, 8887); }
; // lck should be != 0
8888 break;
8889
8890 case atomic_reduce_block:
8891 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8892 if (!atomic_available) {
8893 KMP_WARNING(RedMethodNotSupported, "atomic")__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_RedMethodNotSupported
, "atomic"), __kmp_msg_null)
;
8894 forced_retval = critical_reduce_block;
8895 }
8896 break;
8897
8898 case tree_reduce_block:
8899 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8900 if (!tree_available) {
8901 KMP_WARNING(RedMethodNotSupported, "tree")__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_RedMethodNotSupported
, "tree"), __kmp_msg_null)
;
8902 forced_retval = critical_reduce_block;
8903 } else {
8904#if KMP_FAST_REDUCTION_BARRIER1
8905 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER(((tree_reduce_block) | (bs_reduction_barrier)));
8906#endif
8907 }
8908 break;
8909
8910 default:
8911 KMP_ASSERT(0)if (!(0)) { __kmp_debug_assert("0", "openmp/runtime/src/kmp_runtime.cpp"
, 8911); }
; // "unsupported method specified"
8912 }
8913
8914 retval = forced_retval;
8915 }
8916
8917 KA_TRACE(10, ("reduction method selected=%08x\n", retval))if (kmp_a_debug >= 10) { __kmp_debug_printf ("reduction method selected=%08x\n"
, retval); }
;
8918
8919#undef FAST_REDUCTION_TREE_METHOD_GENERATED
8920#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
8921
8922 return (retval);
8923}
8924// this function is for testing set/get/determine reduce method
8925kmp_int32 __kmp_get_reduce_method(void) {
8926 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
8927}
8928
8929// Soft pause sets up threads to ignore blocktime and just go to sleep.
8930// Spin-wait code checks __kmp_pause_status and reacts accordingly.
8931void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
8932
8933// Hard pause shuts down the runtime completely. Resume happens naturally when
8934// OpenMP is used subsequently.
8935void __kmp_hard_pause() {
8936 __kmp_pause_status = kmp_hard_paused;
8937 __kmp_internal_end_thread(-1);
8938}
8939
8940// Soft resume sets __kmp_pause_status, and wakes up all threads.
8941void __kmp_resume_if_soft_paused() {
8942 if (__kmp_pause_status == kmp_soft_paused) {
8943 __kmp_pause_status = kmp_not_paused;
8944
8945 for (int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
8946 kmp_info_t *thread = __kmp_threads[gtid];
8947 if (thread) { // Wake it if sleeping
8948 kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
8949 thread);
8950 if (fl.is_sleeping())
8951 fl.resume(gtid);
8952 else if (__kmp_try_suspend_mx(thread)) { // got suspend lock
8953 __kmp_unlock_suspend_mx(thread); // unlock it; it won't sleep
8954 } else { // thread holds the lock and may sleep soon
8955 do { // until either the thread sleeps, or we can get the lock
8956 if (fl.is_sleeping()) {
8957 fl.resume(gtid);
8958 break;
8959 } else if (__kmp_try_suspend_mx(thread)) {
8960 __kmp_unlock_suspend_mx(thread);
8961 break;
8962 }
8963 } while (1);
8964 }
8965 }
8966 }
8967 }
8968}
8969
8970// This function is called via __kmpc_pause_resource. Returns 0 if successful.
8971// TODO: add warning messages
8972int __kmp_pause_resource(kmp_pause_status_t level) {
8973 if (level == kmp_not_paused) { // requesting resume
8974 if (__kmp_pause_status == kmp_not_paused) {
8975 // error message about runtime not being paused, so can't resume
8976 return 1;
8977 } else {
8978 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||if (!(__kmp_pause_status == kmp_soft_paused || __kmp_pause_status
== kmp_hard_paused)) { __kmp_debug_assert("__kmp_pause_status == kmp_soft_paused || __kmp_pause_status == kmp_hard_paused"
, "openmp/runtime/src/kmp_runtime.cpp", 8979); }
8979 __kmp_pause_status == kmp_hard_paused)if (!(__kmp_pause_status == kmp_soft_paused || __kmp_pause_status
== kmp_hard_paused)) { __kmp_debug_assert("__kmp_pause_status == kmp_soft_paused || __kmp_pause_status == kmp_hard_paused"
, "openmp/runtime/src/kmp_runtime.cpp", 8979); }
;
8980 __kmp_pause_status = kmp_not_paused;
8981 return 0;
8982 }
8983 } else if (level == kmp_soft_paused) { // requesting soft pause
8984 if (__kmp_pause_status != kmp_not_paused) {
8985 // error message about already being paused
8986 return 1;
8987 } else {
8988 __kmp_soft_pause();
8989 return 0;
8990 }
8991 } else if (level == kmp_hard_paused) { // requesting hard pause
8992 if (__kmp_pause_status != kmp_not_paused) {
8993 // error message about already being paused
8994 return 1;
8995 } else {
8996 __kmp_hard_pause();
8997 return 0;
8998 }
8999 } else {
9000 // error message about invalid level
9001 return 1;
9002 }
9003}
9004
9005void __kmp_omp_display_env(int verbose) {
9006 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
9007 if (__kmp_init_serial == 0)
9008 __kmp_do_serial_initialize();
9009 __kmp_display_env_impl(!verbose, verbose);
9010 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
9011}
9012
9013// The team size is changing, so distributed barrier must be modified
9014void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads,
9015 int new_nthreads) {
9016 KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==if (!(__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar
)) { __kmp_debug_assert("__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar"
, "openmp/runtime/src/kmp_runtime.cpp", 9017); }
9017 bp_dist_bar)if (!(__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar
)) { __kmp_debug_assert("__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar"
, "openmp/runtime/src/kmp_runtime.cpp", 9017); }
;
9018 kmp_info_t **other_threads = team->t.t_threads;
9019
9020 // We want all the workers to stop waiting on the barrier while we adjust the
9021 // size of the team.
9022 for (int f = 1; f < old_nthreads; ++f) {
9023 KMP_DEBUG_ASSERT(other_threads[f] != NULL)if (!(other_threads[f] != __null)) { __kmp_debug_assert("other_threads[f] != __null"
, "openmp/runtime/src/kmp_runtime.cpp", 9023); }
;
9024 // Ignore threads that are already inactive or not present in the team
9025 if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
9026 // teams construct causes thread_limit to get passed in, and some of
9027 // those could be inactive; just ignore them
9028 continue;
9029 }
9030 // If thread is transitioning still to in_use state, wait for it
9031 if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
9032 while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
9033 KMP_CPU_PAUSE()__kmp_x86_pause();
9034 }
9035 // The thread should be in_use now
9036 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1)if (!(team->t.t_threads[f]->th.th_used_in_team.load() ==
1)) { __kmp_debug_assert("team->t.t_threads[f]->th.th_used_in_team.load() == 1"
, "openmp/runtime/src/kmp_runtime.cpp", 9036); }
;
9037 // Transition to unused state
9038 team->t.t_threads[f]->th.th_used_in_team.store(2);
9039 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2)if (!(team->t.t_threads[f]->th.th_used_in_team.load() ==
2)) { __kmp_debug_assert("team->t.t_threads[f]->th.th_used_in_team.load() == 2"
, "openmp/runtime/src/kmp_runtime.cpp", 9039); }
;
9040 }
9041 // Release all the workers
9042 team->t.b->go_release();
9043
9044 KMP_MFENCE()if (__builtin_expect(!!(!__kmp_cpuinfo.initialized), 0)) { __kmp_query_cpuid
(&__kmp_cpuinfo); } if (__kmp_cpuinfo.flags.sse2) { __sync_synchronize
(); }
;
9045
9046 // Workers should see transition status 2 and move to 0; but may need to be
9047 // woken up first
9048 int count = old_nthreads - 1;
9049 while (count > 0) {
9050 count = old_nthreads - 1;
9051 for (int f = 1; f < old_nthreads; ++f) {
9052 if (other_threads[f]->th.th_used_in_team.load() != 0) {
9053 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) { // Wake up the workers
9054 kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(const_cast<void *>(other_threads[f]->th.th_sleep_loc
)
9055 void *, other_threads[f]->th.th_sleep_loc)const_cast<void *>(other_threads[f]->th.th_sleep_loc
)
;
9056 __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
9057 }
9058 } else {
9059 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0)if (!(team->t.t_threads[f]->th.th_used_in_team.load() ==
0)) { __kmp_debug_assert("team->t.t_threads[f]->th.th_used_in_team.load() == 0"
, "openmp/runtime/src/kmp_runtime.cpp", 9059); }
;
9060 count--;
9061 }
9062 }
9063 }
9064 // Now update the barrier size
9065 team->t.b->update_num_threads(new_nthreads);
9066 team->t.b->go_reset();
9067}
9068
9069void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads) {
9070 // Add the threads back to the team
9071 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 9071); }
;
9072 // Threads were paused and pointed at th_used_in_team temporarily during a
9073 // resize of the team. We're going to set th_used_in_team to 3 to indicate to
9074 // the thread that it should transition itself back into the team. Then, if
9075 // blocktime isn't infinite, the thread could be sleeping, so we send a resume
9076 // to wake it up.
9077 for (int f = 1; f < new_nthreads; ++f) {
9078 KMP_DEBUG_ASSERT(team->t.t_threads[f])if (!(team->t.t_threads[f])) { __kmp_debug_assert("team->t.t_threads[f]"
, "openmp/runtime/src/kmp_runtime.cpp", 9078); }
;
9079 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0,__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(team
->t.t_threads[f]->th.th_used_in_team)), (kmp_uint32)(0)
, (kmp_uint32)(3))
9080 3)__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(team
->t.t_threads[f]->th.th_used_in_team)), (kmp_uint32)(0)
, (kmp_uint32)(3))
;
9081 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) { // Wake up sleeping threads
9082 __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
9083 (kmp_flag_32<false, false> *)NULL__null);
9084 }
9085 }
9086 // The threads should be transitioning to the team; when they are done, they
9087 // should have set th_used_in_team to 1. This loop forces master to wait until
9088 // all threads have moved into the team and are waiting in the barrier.
9089 int count = new_nthreads - 1;
9090 while (count > 0) {
9091 count = new_nthreads - 1;
9092 for (int f = 1; f < new_nthreads; ++f) {
9093 if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
9094 count--;
9095 }
9096 }
9097 }
9098}
9099
9100// Globals and functions for hidden helper task
9101kmp_info_t **__kmp_hidden_helper_threads;
9102kmp_info_t *__kmp_hidden_helper_main_thread;
9103std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
9104#if KMP_OS_LINUX1
9105kmp_int32 __kmp_hidden_helper_threads_num = 8;
9106kmp_int32 __kmp_enable_hidden_helper = TRUE(!0);
9107#else
9108kmp_int32 __kmp_hidden_helper_threads_num = 0;
9109kmp_int32 __kmp_enable_hidden_helper = FALSE0;
9110#endif
9111
9112namespace {
9113std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;
9114
9115void __kmp_hidden_helper_wrapper_fn(int *gtid, int *, ...) {
9116 // This is an explicit synchronization on all hidden helper threads in case
9117 // that when a regular thread pushes a hidden helper task to one hidden
9118 // helper thread, the thread has not been awaken once since they're released
9119 // by the main thread after creating the team.
9120 KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num)(&__kmp_hit_hidden_helper_threads_num)->fetch_add(1, std
::memory_order_acq_rel)
;
9121 while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num)(&__kmp_hit_hidden_helper_threads_num)->load(std::memory_order_acquire
)
!=
9122 __kmp_hidden_helper_threads_num)
9123 ;
9124
9125 // If main thread, then wait for signal
9126 if (__kmpc_master(nullptr, *gtid)) {
9127 // First, unset the initial state and release the initial thread
9128 TCW_4(__kmp_init_hidden_helper_threads, FALSE)(__kmp_init_hidden_helper_threads) = (0);
9129 __kmp_hidden_helper_initz_release();
9130 __kmp_hidden_helper_main_thread_wait();
9131 // Now wake up all worker threads
9132 for (int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) {
9133 __kmp_hidden_helper_worker_thread_signal();
9134 }
9135 }
9136}
9137} // namespace
9138
9139void __kmp_hidden_helper_threads_initz_routine() {
9140 // Create a new root for hidden helper team/threads
9141 const int gtid = __kmp_register_root(TRUE(!0));
9142 __kmp_hidden_helper_main_thread = __kmp_threads[gtid];
9143 __kmp_hidden_helper_threads = &__kmp_threads[gtid];
9144 __kmp_hidden_helper_main_thread->th.th_set_nproc =
9145 __kmp_hidden_helper_threads_num;
9146
9147 KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0)(&__kmp_hit_hidden_helper_threads_num)->store(0, std::
memory_order_release)
;
9148
9149 __kmpc_fork_call(nullptr, 0, __kmp_hidden_helper_wrapper_fn);
9150
9151 // Set the initialization flag to FALSE
9152 TCW_SYNC_4(__kmp_init_hidden_helper, FALSE)(__kmp_init_hidden_helper) = (0);
9153
9154 __kmp_hidden_helper_threads_deinitz_release();
9155}
9156
9157/* Nesting Mode:
9158 Set via KMP_NESTING_MODE, which takes an integer.
9159 Note: we skip duplicate topology levels, and skip levels with only
9160 one entity.
9161 KMP_NESTING_MODE=0 is the default, and doesn't use nesting mode.
9162 KMP_NESTING_MODE=1 sets as many nesting levels as there are distinct levels
9163 in the topology, and initializes the number of threads at each of those
9164 levels to the number of entities at each level, respectively, below the
9165 entity at the parent level.
9166 KMP_NESTING_MODE=N, where N>1, attempts to create up to N nesting levels,
9167 but starts with nesting OFF -- max-active-levels-var is 1 -- and requires
9168 the user to turn nesting on explicitly. This is an even more experimental
9169 option to this experimental feature, and may change or go away in the
9170 future.
9171*/
9172
9173// Allocate space to store nesting levels
9174void __kmp_init_nesting_mode() {
9175 int levels = KMP_HW_LAST;
9176 __kmp_nesting_mode_nlevels = levels;
9177 __kmp_nesting_nth_level = (int *)KMP_INTERNAL_MALLOC(levels * sizeof(int))malloc(levels * sizeof(int));
9178 for (int i = 0; i < levels; ++i)
9179 __kmp_nesting_nth_level[i] = 0;
9180 if (__kmp_nested_nth.size < levels) {
9181 __kmp_nested_nth.nth =
9182 (int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels * sizeof(int))realloc((__kmp_nested_nth.nth), (levels * sizeof(int)));
9183 __kmp_nested_nth.size = levels;
9184 }
9185}
9186
9187// Set # threads for top levels of nesting; must be called after topology set
9188void __kmp_set_nesting_mode_threads() {
9189 kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()__kmp_get_global_thread_id_reg()];
9190
9191 if (__kmp_nesting_mode == 1)
9192 __kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT2147483647;
9193 else if (__kmp_nesting_mode > 1)
9194 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9195
9196 if (__kmp_topology) { // use topology info
9197 int loc, hw_level;
9198 for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() &&
9199 loc < __kmp_nesting_mode_nlevels;
9200 loc++, hw_level++) {
9201 __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level);
9202 if (__kmp_nesting_nth_level[loc] == 1)
9203 loc--;
9204 }
9205 // Make sure all cores are used
9206 if (__kmp_nesting_mode > 1 && loc > 1) {
9207 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
9208 int num_cores = __kmp_topology->get_count(core_level);
9209 int upper_levels = 1;
9210 for (int level = 0; level < loc - 1; ++level)
9211 upper_levels *= __kmp_nesting_nth_level[level];
9212 if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores)
9213 __kmp_nesting_nth_level[loc - 1] =
9214 num_cores / __kmp_nesting_nth_level[loc - 2];
9215 }
9216 __kmp_nesting_mode_nlevels = loc;
9217 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9218 } else { // no topology info available; provide a reasonable guesstimation
9219 if (__kmp_avail_proc >= 4) {
9220 __kmp_nesting_nth_level[0] = __kmp_avail_proc / 2;
9221 __kmp_nesting_nth_level[1] = 2;
9222 __kmp_nesting_mode_nlevels = 2;
9223 } else {
9224 __kmp_nesting_nth_level[0] = __kmp_avail_proc;
9225 __kmp_nesting_mode_nlevels = 1;
9226 }
9227 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9228 }
9229 for (int i = 0; i < __kmp_nesting_mode_nlevels; ++i) {
9230 __kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i];
9231 }
9232 set__nproc(thread, __kmp_nesting_nth_level[0])(((thread)->th.th_current_task->td_icvs.nproc) = (__kmp_nesting_nth_level
[0]))
;
9233 if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode)
9234 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9235 if (get__max_active_levels(thread)((thread)->th.th_current_task->td_icvs.max_active_levels
)
> 1) {
9236 // if max levels was set, set nesting mode levels to same
9237 __kmp_nesting_mode_nlevels = get__max_active_levels(thread)((thread)->th.th_current_task->td_icvs.max_active_levels
)
;
9238 }
9239 if (__kmp_nesting_mode == 1) // turn on nesting for this case only
9240 set__max_active_levels(thread, __kmp_nesting_mode_nlevels)(((thread)->th.th_current_task->td_icvs.max_active_levels
) = (__kmp_nesting_mode_nlevels))
;
9241}
9242
9243// Empty symbols to export (see exports_so.txt) when feature is disabled
9244extern "C" {
9245#if !KMP_STATS_ENABLED0
9246void __kmp_reset_stats() {}
9247#endif
9248#if !USE_DEBUGGER0
9249int __kmp_omp_debug_struct_info = FALSE0;
9250int __kmp_debugging = FALSE0;
9251#endif
9252#if !USE_ITT_BUILD1 || !USE_ITT_NOTIFY1
9253void __kmp_itt_fini_ittlib() {}
9254void __kmp_itt_init_ittlib() {}
9255#endif
9256}
9257
9258// end of file