Bug Summary

File:build/source/openmp/runtime/src/kmp_runtime.cpp
Warning:line 8108, column 3
Dereference of null pointer

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name kmp_runtime.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-17/lib/clang/17 -D _DEBUG -D _GLIBCXX_ASSERTIONS -D _GNU_SOURCE -D _LIBCPP_ENABLE_ASSERTIONS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D omp_EXPORTS -I projects/openmp/runtime/src -I /build/source/openmp/runtime/src -I include -I /build/source/llvm/include -I /build/source/openmp/runtime/src/i18n -I /build/source/openmp/runtime/src/include -I /build/source/openmp/runtime/src/thirdparty/ittnotify -D _FORTIFY_SOURCE=2 -D NDEBUG -D _GNU_SOURCE -D _REENTRANT -D _FORTIFY_SOURCE=2 -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-17/lib/clang/17/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/source/= -source-date-epoch 1683717183 -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -Wno-extra -Wno-pedantic -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-frame-address -Wno-strict-aliasing -Wno-stringop-truncation -Wno-switch -Wno-uninitialized -Wno-cast-qual -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fno-rtti -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2023-05-10-133810-16478-1 -x c++ /build/source/openmp/runtime/src/kmp_runtime.cpp
1/*
2 * kmp_runtime.cpp -- KPTS runtime support library
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#include "kmp.h"
14#include "kmp_affinity.h"
15#include "kmp_atomic.h"
16#include "kmp_environment.h"
17#include "kmp_error.h"
18#include "kmp_i18n.h"
19#include "kmp_io.h"
20#include "kmp_itt.h"
21#include "kmp_settings.h"
22#include "kmp_stats.h"
23#include "kmp_str.h"
24#include "kmp_wait_release.h"
25#include "kmp_wrapper_getpid.h"
26#include "kmp_dispatch.h"
27#if KMP_USE_HIER_SCHED0
28#include "kmp_dispatch_hier.h"
29#endif
30
31#if OMPT_SUPPORT1
32#include "ompt-specific.h"
33#endif
34#if OMPD_SUPPORT1
35#include "ompd-specific.h"
36#endif
37
38#if OMP_PROFILING_SUPPORT0
39#include "llvm/Support/TimeProfiler.h"
40static char *ProfileTraceFile = nullptr;
41#endif
42
43/* these are temporary issues to be dealt with */
44#define KMP_USE_PRCTL0 0
45
46#if KMP_OS_WINDOWS0
47#include <process.h>
48#endif
49
50#if KMP_OS_WINDOWS0
51// windows does not need include files as it doesn't use shared memory
52#else
53#include <sys/mman.h>
54#include <sys/stat.h>
55#include <fcntl.h>
56#define SHM_SIZE1024 1024
57#endif
58
59#if defined(KMP_GOMP_COMPAT)
60char const __kmp_version_alt_comp[] =
61 KMP_VERSION_PREFIX"\x00@(#) " "LLVM OMP " "alternative compiler support: yes";
62#endif /* defined(KMP_GOMP_COMPAT) */
63
64char const __kmp_version_omp_api[] =
65 KMP_VERSION_PREFIX"\x00@(#) " "LLVM OMP " "API version: 5.0 (201611)";
66
67#ifdef KMP_DEBUG1
68char const __kmp_version_lock[] =
69 KMP_VERSION_PREFIX"\x00@(#) " "LLVM OMP " "lock type: run time selectable";
70#endif /* KMP_DEBUG */
71
72#define KMP_MIN(x, y)((x) < (y) ? (x) : (y)) ((x) < (y) ? (x) : (y))
73
74/* ------------------------------------------------------------------------ */
75
76#if KMP_USE_MONITOR
77kmp_info_t __kmp_monitor;
78#endif
79
80/* Forward declarations */
81
82void __kmp_cleanup(void);
83
84static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *, int tid,
85 int gtid);
86static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
87 kmp_internal_control_t *new_icvs,
88 ident_t *loc);
89#if KMP_AFFINITY_SUPPORTED1
90static void __kmp_partition_places(kmp_team_t *team,
91 int update_master_only = 0);
92#endif
93static void __kmp_do_serial_initialize(void);
94void __kmp_fork_barrier(int gtid, int tid);
95void __kmp_join_barrier(int gtid);
96void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc,
97 kmp_internal_control_t *new_icvs, ident_t *loc);
98
99#ifdef USE_LOAD_BALANCE1
100static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc);
101#endif
102
103static int __kmp_expand_threads(int nNeed);
104#if KMP_OS_WINDOWS0
105static int __kmp_unregister_root_other_thread(int gtid);
106#endif
107static void __kmp_reap_thread(kmp_info_t *thread, int is_root);
108kmp_info_t *__kmp_thread_pool_insert_pt = NULL__null;
109
110void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads,
111 int new_nthreads);
112void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads);
113
114/* Calculate the identifier of the current thread */
115/* fast (and somewhat portable) way to get unique identifier of executing
116 thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */
117int __kmp_get_global_thread_id() {
118 int i;
119 kmp_info_t **other_threads;
120 size_t stack_data;
121 char *stack_addr;
122 size_t stack_size;
123 char *stack_base;
124
125 KA_TRACE(if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n"
, __kmp_nth, __kmp_all_nth); }
126 1000,if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n"
, __kmp_nth, __kmp_all_nth); }
127 ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n"
, __kmp_nth, __kmp_all_nth); }
128 __kmp_nth, __kmp_all_nth))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n"
, __kmp_nth, __kmp_all_nth); }
;
129
130 /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to
131 a parallel region, made it return KMP_GTID_DNE to force serial_initialize
132 by caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee
133 __kmp_init_gtid for this to work. */
134
135 if (!TCR_4(__kmp_init_gtid)(__kmp_init_gtid))
136 return KMP_GTID_DNE(-2);
137
138#ifdef KMP_TDATA_GTID1
139 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) >= 3) {
140 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using TDATA\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: using TDATA\n"
); }
;
141 return __kmp_gtid;
142 }
143#endif
144 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) >= 2) {
145 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using keyed TLS\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: using keyed TLS\n"
); }
;
146 return __kmp_gtid_get_specific();
147 }
148 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using internal alg.\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: using internal alg.\n"
); }
;
149
150 stack_addr = (char *)&stack_data;
151 other_threads = __kmp_threads;
152
153 /* ATT: The code below is a source of potential bugs due to unsynchronized
154 access to __kmp_threads array. For example:
155 1. Current thread loads other_threads[i] to thr and checks it, it is
156 non-NULL.
157 2. Current thread is suspended by OS.
158 3. Another thread unregisters and finishes (debug versions of free()
159 may fill memory with something like 0xEF).
160 4. Current thread is resumed.
161 5. Current thread reads junk from *thr.
162 TODO: Fix it. --ln */
163
164 for (i = 0; i < __kmp_threads_capacity; i++) {
165
166 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i])((void *)(other_threads[i]));
167 if (!thr)
168 continue;
169
170 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize)((void *)(thr->th.th_info.ds.ds_stacksize));
171 stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase)((void *)(thr->th.th_info.ds.ds_stackbase));
172
173 /* stack grows down -- search through all of the active threads */
174
175 if (stack_addr <= stack_base) {
176 size_t stack_diff = stack_base - stack_addr;
177
178 if (stack_diff <= stack_size) {
179 /* The only way we can be closer than the allocated */
180 /* stack size is if we are running on this thread. */
181 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i)if (!(__kmp_gtid_get_specific() == i)) { __kmp_debug_assert("__kmp_gtid_get_specific() == i"
, "openmp/runtime/src/kmp_runtime.cpp", 181); }
;
182 return i;
183 }
184 }
185 }
186
187 /* get specific to try and determine our gtid */
188 KA_TRACE(1000,if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: internal alg. failed to find "
"thread, using TLS\n"); }
189 ("*** __kmp_get_global_thread_id: internal alg. failed to find "if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: internal alg. failed to find "
"thread, using TLS\n"); }
190 "thread, using TLS\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id: internal alg. failed to find "
"thread, using TLS\n"); }
;
191 i = __kmp_gtid_get_specific();
192
193 /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */
194
195 /* if we havn't been assigned a gtid, then return code */
196 if (i < 0)
197 return i;
198
199 /* dynamically updated stack window for uber threads to avoid get_specific
200 call */
201 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
202 KMP_FATAL(StackOverflow, i)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_StackOverflow, i), __kmp_msg_null
)
;
203 }
204
205 stack_base = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
206 if (stack_addr > stack_base) {
207 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr)((other_threads[i]->th.th_info.ds.ds_stackbase)) = ((stack_addr
))
;
208 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,((other_threads[i]->th.th_info.ds.ds_stacksize)) = ((other_threads
[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base)
)
209 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -((other_threads[i]->th.th_info.ds.ds_stacksize)) = ((other_threads
[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base)
)
210 stack_base)((other_threads[i]->th.th_info.ds.ds_stacksize)) = ((other_threads
[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base)
)
;
211 } else {
212 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,((other_threads[i]->th.th_info.ds.ds_stacksize)) = ((stack_base
- stack_addr))
213 stack_base - stack_addr)((other_threads[i]->th.th_info.ds.ds_stacksize)) = ((stack_base
- stack_addr))
;
214 }
215
216 /* Reprint stack bounds for ubermaster since they have been refined */
217 if (__kmp_storage_map) {
218 char *stack_end = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
219 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
220 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
221 other_threads[i]->th.th_info.ds.ds_stacksize,
222 "th_%d stack (refinement)", i);
223 }
224 return i;
225}
226
227int __kmp_get_global_thread_id_reg() {
228 int gtid;
229
230 if (!__kmp_init_serial) {
231 gtid = KMP_GTID_DNE(-2);
232 } else
233#ifdef KMP_TDATA_GTID1
234 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) >= 3) {
235 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using TDATA\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id_reg: using TDATA\n"
); }
;
236 gtid = __kmp_gtid;
237 } else
238#endif
239 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) >= 2) {
240 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using keyed TLS\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id_reg: using keyed TLS\n"
); }
;
241 gtid = __kmp_gtid_get_specific();
242 } else {
243 KA_TRACE(1000,if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id_reg: using internal alg.\n"
); }
244 ("*** __kmp_get_global_thread_id_reg: using internal alg.\n"))if (kmp_a_debug >= 1000) { __kmp_debug_printf ("*** __kmp_get_global_thread_id_reg: using internal alg.\n"
); }
;
245 gtid = __kmp_get_global_thread_id();
246 }
247
248 /* we must be a new uber master sibling thread */
249 if (gtid == KMP_GTID_DNE(-2)) {
250 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_get_global_thread_id_reg: Encountered new root thread. "
"Registering a new gtid.\n"); }
251 ("__kmp_get_global_thread_id_reg: Encountered new root thread. "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_get_global_thread_id_reg: Encountered new root thread. "
"Registering a new gtid.\n"); }
252 "Registering a new gtid.\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_get_global_thread_id_reg: Encountered new root thread. "
"Registering a new gtid.\n"); }
;
253 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
254 if (!__kmp_init_serial) {
255 __kmp_do_serial_initialize();
256 gtid = __kmp_gtid_get_specific();
257 } else {
258 gtid = __kmp_register_root(FALSE0);
259 }
260 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
261 /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */
262 }
263
264 KMP_DEBUG_ASSERT(gtid >= 0)if (!(gtid >= 0)) { __kmp_debug_assert("gtid >= 0", "openmp/runtime/src/kmp_runtime.cpp"
, 264); }
;
265
266 return gtid;
267}
268
269/* caller must hold forkjoin_lock */
270void __kmp_check_stack_overlap(kmp_info_t *th) {
271 int f;
272 char *stack_beg = NULL__null;
273 char *stack_end = NULL__null;
274 int gtid;
275
276 KA_TRACE(10, ("__kmp_check_stack_overlap: called\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_check_stack_overlap: called\n"
); }
;
277 if (__kmp_storage_map) {
278 stack_end = (char *)th->th.th_info.ds.ds_stackbase;
279 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
280
281 gtid = __kmp_gtid_from_thread(th);
282
283 if (gtid == KMP_GTID_MONITOR(-4)) {
284 __kmp_print_storage_map_gtid(
285 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
286 "th_%s stack (%s)", "mon",
287 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
288 } else {
289 __kmp_print_storage_map_gtid(
290 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
291 "th_%d stack (%s)", gtid,
292 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
293 }
294 }
295
296 /* No point in checking ubermaster threads since they use refinement and
297 * cannot overlap */
298 gtid = __kmp_gtid_from_thread(th);
299 if (__kmp_env_checks == TRUE(!0) && !KMP_UBER_GTID(gtid)) {
300 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_check_stack_overlap: performing extensive checking\n"
); }
301 ("__kmp_check_stack_overlap: performing extensive checking\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_check_stack_overlap: performing extensive checking\n"
); }
;
302 if (stack_beg == NULL__null) {
303 stack_end = (char *)th->th.th_info.ds.ds_stackbase;
304 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
305 }
306
307 for (f = 0; f < __kmp_threads_capacity; f++) {
308 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f])((void *)(__kmp_threads[f]));
309
310 if (f_th && f_th != th) {
311 char *other_stack_end =
312 (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase)((void *)(f_th->th.th_info.ds.ds_stackbase));
313 char *other_stack_beg =
314 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize)((void *)(f_th->th.th_info.ds.ds_stacksize));
315 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
316 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
317
318 /* Print the other stack values before the abort */
319 if (__kmp_storage_map)
320 __kmp_print_storage_map_gtid(
321 -1, other_stack_beg, other_stack_end,
322 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize)((void *)(f_th->th.th_info.ds.ds_stacksize)),
323 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
324
325 __kmp_fatal(KMP_MSG(StackOverlap)__kmp_msg_format(kmp_i18n_msg_StackOverlap), KMP_HNT(ChangeStackLimit)__kmp_msg_format(kmp_i18n_hnt_ChangeStackLimit),
326 __kmp_msg_null);
327 }
328 }
329 }
330 }
331 KA_TRACE(10, ("__kmp_check_stack_overlap: returning\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_check_stack_overlap: returning\n"
); }
;
332}
333
334/* ------------------------------------------------------------------------ */
335
336void __kmp_infinite_loop(void) {
337 static int done = FALSE0;
338
339 while (!done) {
340 KMP_YIELD(TRUE){ __kmp_x86_pause(); if (((!0)) && (((__kmp_use_yield
== 1) || (__kmp_use_yield == 2 && (((__kmp_nth) >
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))))))) __kmp_yield
(); }
;
341 }
342}
343
344#define MAX_MESSAGE512 512
345
346void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, size_t size,
347 char const *format, ...) {
348 char buffer[MAX_MESSAGE512];
349 va_list ap;
350
351 va_start(ap, format)__builtin_va_start(ap, format);
352 KMP_SNPRINTFsnprintf(buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1,
353 p2, (unsigned long)size, format);
354 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
355 __kmp_vprintf(kmp_err, buffer, ap);
356#if KMP_PRINT_DATA_PLACEMENT
357 int node;
358 if (gtid >= 0) {
359 if (p1 <= p2 && (char *)p2 - (char *)p1 == size) {
360 if (__kmp_storage_map_verbose) {
361 node = __kmp_get_host_node(p1);
362 if (node < 0) /* doesn't work, so don't try this next time */
363 __kmp_storage_map_verbose = FALSE0;
364 else {
365 char *last;
366 int lastNode;
367 int localProc = __kmp_get_cpu_from_gtid(gtid);
368
369 const int page_size = KMP_GET_PAGE_SIZE()getpagesize();
370
371 p1 = (void *)((size_t)p1 & ~((size_t)page_size - 1));
372 p2 = (void *)(((size_t)p2 - 1) & ~((size_t)page_size - 1));
373 if (localProc >= 0)
374 __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid,
375 localProc >> 1);
376 else
377 __kmp_printf_no_lock(" GTID %d\n", gtid);
378#if KMP_USE_PRCTL0
379 /* The more elaborate format is disabled for now because of the prctl
380 * hanging bug. */
381 do {
382 last = p1;
383 lastNode = node;
384 /* This loop collates adjacent pages with the same host node. */
385 do {
386 (char *)p1 += page_size;
387 } while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
388 __kmp_printf_no_lock(" %p-%p memNode %d\n", last, (char *)p1 - 1,
389 lastNode);
390 } while (p1 <= p2);
391#else
392 __kmp_printf_no_lock(" %p-%p memNode %d\n", p1,
393 (char *)p1 + (page_size - 1),
394 __kmp_get_host_node(p1));
395 if (p1 < p2) {
396 __kmp_printf_no_lock(" %p-%p memNode %d\n", p2,
397 (char *)p2 + (page_size - 1),
398 __kmp_get_host_node(p2));
399 }
400#endif
401 }
402 }
403 } else
404 __kmp_printf_no_lock(" %s\n", KMP_I18N_STR(StorageMapWarning)__kmp_i18n_catgets(kmp_i18n_str_StorageMapWarning));
405 }
406#endif /* KMP_PRINT_DATA_PLACEMENT */
407 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
408}
409
410void __kmp_warn(char const *format, ...) {
411 char buffer[MAX_MESSAGE512];
412 va_list ap;
413
414 if (__kmp_generate_warnings == kmp_warnings_off) {
415 return;
416 }
417
418 va_start(ap, format)__builtin_va_start(ap, format);
419
420 KMP_SNPRINTFsnprintf(buffer, sizeof(buffer), "OMP warning: %s\n", format);
421 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
422 __kmp_vprintf(kmp_err, buffer, ap);
423 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
424
425 va_end(ap)__builtin_va_end(ap);
426}
427
428void __kmp_abort_process() {
429 // Later threads may stall here, but that's ok because abort() will kill them.
430 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
431
432 if (__kmp_debug_buf) {
433 __kmp_dump_debug_buffer();
434 }
435
436 if (KMP_OS_WINDOWS0) {
437 // Let other threads know of abnormal termination and prevent deadlock
438 // if abort happened during library initialization or shutdown
439 __kmp_global.g.g_abort = SIGABRT6;
440
441 /* On Windows* OS by default abort() causes pop-up error box, which stalls
442 nightly testing. Unfortunately, we cannot reliably suppress pop-up error
443 boxes. _set_abort_behavior() works well, but this function is not
444 available in VS7 (this is not problem for DLL, but it is a problem for
445 static OpenMP RTL). SetErrorMode (and so, timelimit utility) does not
446 help, at least in some versions of MS C RTL.
447
448 It seems following sequence is the only way to simulate abort() and
449 avoid pop-up error box. */
450 raise(SIGABRT6);
451 _exit(3); // Just in case, if signal ignored, exit anyway.
452 } else {
453 __kmp_unregister_library();
454 abort();
455 }
456
457 __kmp_infinite_loop();
458 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
459
460} // __kmp_abort_process
461
462void __kmp_abort_thread(void) {
463 // TODO: Eliminate g_abort global variable and this function.
464 // In case of abort just call abort(), it will kill all the threads.
465 __kmp_infinite_loop();
466} // __kmp_abort_thread
467
468/* Print out the storage map for the major kmp_info_t thread data structures
469 that are allocated together. */
470
471static void __kmp_print_thread_storage_map(kmp_info_t *thr, int gtid) {
472 __kmp_print_storage_map_gtid(gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d",
473 gtid);
474
475 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
476 sizeof(kmp_desc_t), "th_%d.th_info", gtid);
477
478 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
479 sizeof(kmp_local_t), "th_%d.th_local", gtid);
480
481 __kmp_print_storage_map_gtid(
482 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
483 sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid);
484
485 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
486 &thr->th.th_bar[bs_plain_barrier + 1],
487 sizeof(kmp_balign_t), "th_%d.th_bar[plain]",
488 gtid);
489
490 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
491 &thr->th.th_bar[bs_forkjoin_barrier + 1],
492 sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]",
493 gtid);
494
495#if KMP_FAST_REDUCTION_BARRIER1
496 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
497 &thr->th.th_bar[bs_reduction_barrier + 1],
498 sizeof(kmp_balign_t), "th_%d.th_bar[reduction]",
499 gtid);
500#endif // KMP_FAST_REDUCTION_BARRIER
501}
502
503/* Print out the storage map for the major kmp_team_t team data structures
504 that are allocated together. */
505
506static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team,
507 int team_id, int num_thr) {
508 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
509 __kmp_print_storage_map_gtid(-1, team, team + 1, sizeof(kmp_team_t), "%s_%d",
510 header, team_id);
511
512 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
513 &team->t.t_bar[bs_last_barrier],
514 sizeof(kmp_balign_team_t) * bs_last_barrier,
515 "%s_%d.t_bar", header, team_id);
516
517 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
518 &team->t.t_bar[bs_plain_barrier + 1],
519 sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]",
520 header, team_id);
521
522 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
523 &team->t.t_bar[bs_forkjoin_barrier + 1],
524 sizeof(kmp_balign_team_t),
525 "%s_%d.t_bar[forkjoin]", header, team_id);
526
527#if KMP_FAST_REDUCTION_BARRIER1
528 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
529 &team->t.t_bar[bs_reduction_barrier + 1],
530 sizeof(kmp_balign_team_t),
531 "%s_%d.t_bar[reduction]", header, team_id);
532#endif // KMP_FAST_REDUCTION_BARRIER
533
534 __kmp_print_storage_map_gtid(
535 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
536 sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id);
537
538 __kmp_print_storage_map_gtid(
539 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
540 sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id);
541
542 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
543 &team->t.t_disp_buffer[num_disp_buff],
544 sizeof(dispatch_shared_info_t) * num_disp_buff,
545 "%s_%d.t_disp_buffer", header, team_id);
546}
547
548static void __kmp_init_allocator() {
549 __kmp_init_memkind();
550 __kmp_init_target_mem();
551}
552static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
553
554/* ------------------------------------------------------------------------ */
555
556#if ENABLE_LIBOMPTARGET1
557static void __kmp_init_omptarget() {
558 __kmp_init_target_task();
559}
560#endif
561
562/* ------------------------------------------------------------------------ */
563
564#if KMP_DYNAMIC_LIB1
565#if KMP_OS_WINDOWS0
566
567BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
568 //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
569
570 switch (fdwReason) {
571
572 case DLL_PROCESS_ATTACH:
573 KA_TRACE(10, ("DllMain: PROCESS_ATTACH\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("DllMain: PROCESS_ATTACH\n"
); }
;
574
575 return TRUE(!0);
576
577 case DLL_PROCESS_DETACH:
578 KA_TRACE(10, ("DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()))if (kmp_a_debug >= 10) { __kmp_debug_printf ("DllMain: PROCESS_DETACH T#%d\n"
, __kmp_gtid_get_specific()); }
;
579
580 // According to Windows* documentation for DllMain entry point:
581 // for DLL_PROCESS_DETACH, lpReserved is used for telling the difference:
582 // lpReserved == NULL when FreeLibrary() is called,
583 // lpReserved != NULL when the process is terminated.
584 // When FreeLibrary() is called, worker threads remain alive. So the
585 // runtime's state is consistent and executing proper shutdown is OK.
586 // When the process is terminated, worker threads have exited or been
587 // forcefully terminated by the OS and only the shutdown thread remains.
588 // This can leave the runtime in an inconsistent state.
589 // Hence, only attempt proper cleanup when FreeLibrary() is called.
590 // Otherwise, rely on OS to reclaim resources.
591 if (lpReserved == NULL__null)
592 __kmp_internal_end_library(__kmp_gtid_get_specific());
593
594 return TRUE(!0);
595
596 case DLL_THREAD_ATTACH:
597 KA_TRACE(10, ("DllMain: THREAD_ATTACH\n"))if (kmp_a_debug >= 10) { __kmp_debug_printf ("DllMain: THREAD_ATTACH\n"
); }
;
598
599 /* if we want to register new siblings all the time here call
600 * __kmp_get_gtid(); */
601 return TRUE(!0);
602
603 case DLL_THREAD_DETACH:
604 KA_TRACE(10, ("DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()))if (kmp_a_debug >= 10) { __kmp_debug_printf ("DllMain: THREAD_DETACH T#%d\n"
, __kmp_gtid_get_specific()); }
;
605
606 __kmp_internal_end_thread(__kmp_gtid_get_specific());
607 return TRUE(!0);
608 }
609
610 return TRUE(!0);
611}
612
613#endif /* KMP_OS_WINDOWS */
614#endif /* KMP_DYNAMIC_LIB */
615
616/* __kmp_parallel_deo -- Wait until it's our turn. */
617void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
618 int gtid = *gtid_ref;
619#ifdef BUILD_PARALLEL_ORDERED1
620 kmp_team_t *team = __kmp_team_from_gtid(gtid);
621#endif /* BUILD_PARALLEL_ORDERED */
622
623 if (__kmp_env_consistency_check) {
624 if (__kmp_threads[gtid]->th.th_root->r.r_active)
625#if KMP_USE_DYNAMIC_LOCK1
626 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL__null, 0);
627#else
628 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL__null);
629#endif
630 }
631#ifdef BUILD_PARALLEL_ORDERED1
632 if (!team->t.t_serialized) {
633 KMP_MB();
634 KMP_WAIT__kmp_wait_4(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ__kmp_eq_4,
635 NULL__null);
636 KMP_MB();
637 }
638#endif /* BUILD_PARALLEL_ORDERED */
639}
640
641/* __kmp_parallel_dxo -- Signal the next task. */
642void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
643 int gtid = *gtid_ref;
644#ifdef BUILD_PARALLEL_ORDERED1
645 int tid = __kmp_tid_from_gtid(gtid);
646 kmp_team_t *team = __kmp_team_from_gtid(gtid);
647#endif /* BUILD_PARALLEL_ORDERED */
648
649 if (__kmp_env_consistency_check) {
650 if (__kmp_threads[gtid]->th.th_root->r.r_active)
651 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
652 }
653#ifdef BUILD_PARALLEL_ORDERED1
654 if (!team->t.t_serialized) {
655 KMP_MB(); /* Flush all pending memory write invalidates. */
656
657 /* use the tid of the next thread in this team */
658 /* TODO replace with general release procedure */
659 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
660
661 KMP_MB(); /* Flush all pending memory write invalidates. */
662 }
663#endif /* BUILD_PARALLEL_ORDERED */
664}
665
666/* ------------------------------------------------------------------------ */
667/* The BARRIER for a SINGLE process section is always explicit */
668
669int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws) {
670 int status;
671 kmp_info_t *th;
672 kmp_team_t *team;
673
674 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
675 __kmp_parallel_initialize();
676 __kmp_resume_if_soft_paused();
677
678 th = __kmp_threads[gtid];
679 team = th->th.th_team;
680 status = 0;
681
682 th->th.th_ident = id_ref;
683
684 if (team->t.t_serialized) {
685 status = 1;
686 } else {
687 kmp_int32 old_this = th->th.th_local.this_construct;
688
689 ++th->th.th_local.this_construct;
690 /* try to set team count to thread count--success means thread got the
691 single block */
692 /* TODO: Should this be acquire or release? */
693 if (team->t.t_construct == old_this) {
694 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
695 th->th.th_local.this_construct);
696 }
697#if USE_ITT_BUILD1
698 if (__itt_metadata_add_ptr__kmp_itt_metadata_add_ptr__3_0 && __kmp_forkjoin_frames_mode == 3 &&
699 KMP_MASTER_GTID(gtid)(0 == __kmp_tid_from_gtid((gtid))) && th->th.th_teams_microtask == NULL__null &&
700 team->t.t_active_level == 1) {
701 // Only report metadata by primary thread of active team at level 1
702 __kmp_itt_metadata_single(id_ref);
703 }
704#endif /* USE_ITT_BUILD */
705 }
706
707 if (__kmp_env_consistency_check) {
708 if (status && push_ws) {
709 __kmp_push_workshare(gtid, ct_psingle, id_ref);
710 } else {
711 __kmp_check_workshare(gtid, ct_psingle, id_ref);
712 }
713 }
714#if USE_ITT_BUILD1
715 if (status) {
716 __kmp_itt_single_start(gtid);
717 }
718#endif /* USE_ITT_BUILD */
719 return status;
720}
721
722void __kmp_exit_single(int gtid) {
723#if USE_ITT_BUILD1
724 __kmp_itt_single_end(gtid);
725#endif /* USE_ITT_BUILD */
726 if (__kmp_env_consistency_check)
727 __kmp_pop_workshare(gtid, ct_psingle, NULL__null);
728}
729
730/* determine if we can go parallel or must use a serialized parallel region and
731 * how many threads we can use
732 * set_nproc is the number of threads requested for the team
733 * returns 0 if we should serialize or only use one thread,
734 * otherwise the number of threads to use
735 * The forkjoin lock is held by the caller. */
736static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
737 int master_tid, int set_nthreads,
738 int enter_teams) {
739 int capacity;
740 int new_nthreads;
741 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 741); }
;
1
Assuming '__kmp_init_serial' is not equal to 0
742 KMP_DEBUG_ASSERT(root && parent_team)if (!(root && parent_team)) { __kmp_debug_assert("root && parent_team"
, "openmp/runtime/src/kmp_runtime.cpp", 742); }
;
2
Taking false branch
3
Assuming 'root' is null
4
Taking true branch
743 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
744
745 // If dyn-var is set, dynamically adjust the number of desired threads,
746 // according to the method specified by dynamic_mode.
747 new_nthreads = set_nthreads;
748 if (!get__dynamic_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.dynamic)
) {
5
Assuming field 'dynamic' is not equal to 0
6
Taking false branch
749 ;
750 }
751#ifdef USE_LOAD_BALANCE1
752 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
7
Assuming field 'g_dynamic_mode' is equal to dynamic_load_balance
8
Taking true branch
753 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
9
Passing null pointer value via 1st parameter 'root'
10
Calling '__kmp_load_balance_nproc'
754 if (new_nthreads == 1) {
755 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d load balance reduced "
"reservation to 1 thread\n", master_tid); }
756 "reservation to 1 thread\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d load balance reduced "
"reservation to 1 thread\n", master_tid); }
757 master_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d load balance reduced "
"reservation to 1 thread\n", master_tid); }
;
758 return 1;
759 }
760 if (new_nthreads < set_nthreads) {
761 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d load balance reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
762 "reservation to %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d load balance reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
763 master_tid, new_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d load balance reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
;
764 }
765 }
766#endif /* USE_LOAD_BALANCE */
767 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
768 new_nthreads = __kmp_avail_proc - __kmp_nth +
769 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
770 if (new_nthreads <= 1) {
771 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d thread limit reduced "
"reservation to 1 thread\n", master_tid); }
772 "reservation to 1 thread\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d thread limit reduced "
"reservation to 1 thread\n", master_tid); }
773 master_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d thread limit reduced "
"reservation to 1 thread\n", master_tid); }
;
774 return 1;
775 }
776 if (new_nthreads < set_nthreads) {
777 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d thread limit reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
778 "reservation to %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d thread limit reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
779 master_tid, new_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d thread limit reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
;
780 } else {
781 new_nthreads = set_nthreads;
782 }
783 } else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
784 if (set_nthreads > 2) {
785 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
786 new_nthreads = (new_nthreads % set_nthreads) + 1;
787 if (new_nthreads == 1) {
788 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d dynamic random reduced "
"reservation to 1 thread\n", master_tid); }
789 "reservation to 1 thread\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d dynamic random reduced "
"reservation to 1 thread\n", master_tid); }
790 master_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d dynamic random reduced "
"reservation to 1 thread\n", master_tid); }
;
791 return 1;
792 }
793 if (new_nthreads < set_nthreads) {
794 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d dynamic random reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
795 "reservation to %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d dynamic random reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
796 master_tid, new_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d dynamic random reduced "
"reservation to %d threads\n", master_tid, new_nthreads); }
;
797 }
798 }
799 } else {
800 KMP_ASSERT(0)if (!(0)) { __kmp_debug_assert("0", "openmp/runtime/src/kmp_runtime.cpp"
, 800); }
;
801 }
802
803 // Respect KMP_ALL_THREADS/KMP_DEVICE_THREAD_LIMIT.
804 if (__kmp_nth + new_nthreads -
805 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
806 __kmp_max_nth) {
807 int tl_nthreads = __kmp_max_nth - __kmp_nth +
808 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
809 if (tl_nthreads <= 0) {
810 tl_nthreads = 1;
811 }
812
813 // If dyn-var is false, emit a 1-time warning.
814 if (!get__dynamic_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.dynamic)
&& (!__kmp_reserve_warn)) {
815 __kmp_reserve_warn = 1;
816 __kmp_msg(kmp_ms_warning,
817 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, set_nthreads, tl_nthreads
)
,
818 KMP_HNT(Unset_ALL_THREADS)__kmp_msg_format(kmp_i18n_hnt_Unset_ALL_THREADS), __kmp_msg_null);
819 }
820 if (tl_nthreads == 1) {
821 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
"reduced reservation to 1 thread\n", master_tid); }
822 "reduced reservation to 1 thread\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
"reduced reservation to 1 thread\n", master_tid); }
823 master_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
"reduced reservation to 1 thread\n", master_tid); }
;
824 return 1;
825 }
826 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
"reservation to %d threads\n", master_tid, tl_nthreads); }
827 "reservation to %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
"reservation to %d threads\n", master_tid, tl_nthreads); }
828 master_tid, tl_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
"reservation to %d threads\n", master_tid, tl_nthreads); }
;
829 new_nthreads = tl_nthreads;
830 }
831
832 // Respect OMP_THREAD_LIMIT
833 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
834 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
835 if (cg_nthreads + new_nthreads -
836 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
837 max_cg_threads) {
838 int tl_nthreads = max_cg_threads - cg_nthreads +
839 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
840 if (tl_nthreads <= 0) {
841 tl_nthreads = 1;
842 }
843
844 // If dyn-var is false, emit a 1-time warning.
845 if (!get__dynamic_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.dynamic)
&& (!__kmp_reserve_warn)) {
846 __kmp_reserve_warn = 1;
847 __kmp_msg(kmp_ms_warning,
848 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, set_nthreads, tl_nthreads
)
,
849 KMP_HNT(Unset_ALL_THREADS)__kmp_msg_format(kmp_i18n_hnt_Unset_ALL_THREADS), __kmp_msg_null);
850 }
851 if (tl_nthreads == 1) {
852 KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
"reduced reservation to 1 thread\n", master_tid); }
853 "reduced reservation to 1 thread\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
"reduced reservation to 1 thread\n", master_tid); }
854 master_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
"reduced reservation to 1 thread\n", master_tid); }
;
855 return 1;
856 }
857 KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
"reservation to %d threads\n", master_tid, tl_nthreads); }
858 "reservation to %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
"reservation to %d threads\n", master_tid, tl_nthreads); }
859 master_tid, tl_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
"reservation to %d threads\n", master_tid, tl_nthreads); }
;
860 new_nthreads = tl_nthreads;
861 }
862
863 // Check if the threads array is large enough, or needs expanding.
864 // See comment in __kmp_register_root() about the adjustment if
865 // __kmp_threads[0] == NULL.
866 capacity = __kmp_threads_capacity;
867 if (TCR_PTR(__kmp_threads[0])((void *)(__kmp_threads[0])) == NULL__null) {
868 --capacity;
869 }
870 // If it is not for initializing the hidden helper team, we need to take
871 // __kmp_hidden_helper_threads_num out of the capacity because it is included
872 // in __kmp_threads_capacity.
873 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)(__kmp_init_hidden_helper_threads)) {
874 capacity -= __kmp_hidden_helper_threads_num;
875 }
876 if (__kmp_nth + new_nthreads -
877 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
878 capacity) {
879 // Expand the threads array.
880 int slotsRequired = __kmp_nth + new_nthreads -
881 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
882 capacity;
883 int slotsAdded = __kmp_expand_threads(slotsRequired);
884 if (slotsAdded < slotsRequired) {
885 // The threads array was not expanded enough.
886 new_nthreads -= (slotsRequired - slotsAdded);
887 KMP_ASSERT(new_nthreads >= 1)if (!(new_nthreads >= 1)) { __kmp_debug_assert("new_nthreads >= 1"
, "openmp/runtime/src/kmp_runtime.cpp", 887); }
;
888
889 // If dyn-var is false, emit a 1-time warning.
890 if (!get__dynamic_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.dynamic)
&& (!__kmp_reserve_warn)) {
891 __kmp_reserve_warn = 1;
892 if (__kmp_tp_cached) {
893 __kmp_msg(kmp_ms_warning,
894 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, set_nthreads, new_nthreads
)
,
895 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity)__kmp_msg_format(kmp_i18n_hnt_Set_ALL_THREADPRIVATE, __kmp_tp_capacity
)
,
896 KMP_HNT(PossibleSystemLimitOnThreads)__kmp_msg_format(kmp_i18n_hnt_PossibleSystemLimitOnThreads), __kmp_msg_null);
897 } else {
898 __kmp_msg(kmp_ms_warning,
899 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads)__kmp_msg_format(kmp_i18n_msg_CantFormThrTeam, set_nthreads, new_nthreads
)
,
900 KMP_HNT(SystemLimitOnThreads)__kmp_msg_format(kmp_i18n_hnt_SystemLimitOnThreads), __kmp_msg_null);
901 }
902 }
903 }
904 }
905
906#ifdef KMP_DEBUG1
907 if (new_nthreads == 1) {
908 KC_TRACE(10,if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d serializing team after reclaiming "
"dead roots and rechecking; requested %d threads\n", __kmp_get_global_thread_id
(), set_nthreads); }
909 ("__kmp_reserve_threads: T#%d serializing team after reclaiming "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d serializing team after reclaiming "
"dead roots and rechecking; requested %d threads\n", __kmp_get_global_thread_id
(), set_nthreads); }
910 "dead roots and rechecking; requested %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d serializing team after reclaiming "
"dead roots and rechecking; requested %d threads\n", __kmp_get_global_thread_id
(), set_nthreads); }
911 __kmp_get_gtid(), set_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d serializing team after reclaiming "
"dead roots and rechecking; requested %d threads\n", __kmp_get_global_thread_id
(), set_nthreads); }
;
912 } else {
913 KC_TRACE(10, ("__kmp_reserve_threads: T#%d allocating %d threads; requested"if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d allocating %d threads; requested"
" %d threads\n", __kmp_get_global_thread_id(), new_nthreads,
set_nthreads); }
914 " %d threads\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d allocating %d threads; requested"
" %d threads\n", __kmp_get_global_thread_id(), new_nthreads,
set_nthreads); }
915 __kmp_get_gtid(), new_nthreads, set_nthreads))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_reserve_threads: T#%d allocating %d threads; requested"
" %d threads\n", __kmp_get_global_thread_id(), new_nthreads,
set_nthreads); }
;
916 }
917#endif // KMP_DEBUG
918 return new_nthreads;
919}
920
921/* Allocate threads from the thread pool and assign them to the new team. We are
922 assured that there are enough threads available, because we checked on that
923 earlier within critical section forkjoin */
924static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
925 kmp_info_t *master_th, int master_gtid,
926 int fork_teams_workers) {
927 int i;
928 int use_hot_team;
929
930 KA_TRACE(10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_team_threads: new_nprocs = %d\n"
, team->t.t_nproc); }
;
931 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid())if (!(master_gtid == __kmp_get_global_thread_id())) { __kmp_debug_assert
("master_gtid == __kmp_get_global_thread_id()", "openmp/runtime/src/kmp_runtime.cpp"
, 931); }
;
932 KMP_MB();
933
934 /* first, let's setup the primary thread */
935 master_th->th.th_info.ds.ds_tid = 0;
936 master_th->th.th_team = team;
937 master_th->th.th_team_nproc = team->t.t_nproc;
938 master_th->th.th_team_master = master_th;
939 master_th->th.th_team_serialized = FALSE0;
940 master_th->th.th_dispatch = &team->t.t_dispatch[0];
941
942/* make sure we are not the optimized hot team */
943#if KMP_NESTED_HOT_TEAMS1
944 use_hot_team = 0;
945 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
946 if (hot_teams) { // hot teams array is not allocated if
947 // KMP_HOT_TEAMS_MAX_LEVEL=0
948 int level = team->t.t_active_level - 1; // index in array of hot teams
949 if (master_th->th.th_teams_microtask) { // are we inside the teams?
950 if (master_th->th.th_teams_size.nteams > 1) {
951 ++level; // level was not increased in teams construct for
952 // team_of_masters
953 }
954 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
955 master_th->th.th_teams_level == team->t.t_level) {
956 ++level; // level was not increased in teams construct for
957 // team_of_workers before the parallel
958 } // team->t.t_level will be increased inside parallel
959 }
960 if (level < __kmp_hot_teams_max_level) {
961 if (hot_teams[level].hot_team) {
962 // hot team has already been allocated for given level
963 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team)if (!(hot_teams[level].hot_team == team)) { __kmp_debug_assert
("hot_teams[level].hot_team == team", "openmp/runtime/src/kmp_runtime.cpp"
, 963); }
;
964 use_hot_team = 1; // the team is ready to use
965 } else {
966 use_hot_team = 0; // AC: threads are not allocated yet
967 hot_teams[level].hot_team = team; // remember new hot team
968 hot_teams[level].hot_team_nth = team->t.t_nproc;
969 }
970 } else {
971 use_hot_team = 0;
972 }
973 }
974#else
975 use_hot_team = team == root->r.r_hot_team;
976#endif
977 if (!use_hot_team) {
978
979 /* install the primary thread */
980 team->t.t_threads[0] = master_th;
981 __kmp_initialize_info(master_th, team, 0, master_gtid);
982
983 /* now, install the worker threads */
984 for (i = 1; i < team->t.t_nproc; i++) {
985
986 /* fork or reallocate a new thread and install it in team */
987 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
988 team->t.t_threads[i] = thr;
989 KMP_DEBUG_ASSERT(thr)if (!(thr)) { __kmp_debug_assert("thr", "openmp/runtime/src/kmp_runtime.cpp"
, 989); }
;
990 KMP_DEBUG_ASSERT(thr->th.th_team == team)if (!(thr->th.th_team == team)) { __kmp_debug_assert("thr->th.th_team == team"
, "openmp/runtime/src/kmp_runtime.cpp", 990); }
;
991 /* align team and thread arrived states */
992 KA_TRACE(20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
"T#%d(%d:%d) join =%llu, plain=%llu\n", __kmp_gtid_from_tid(
0, team), team->t.t_id, 0, __kmp_gtid_from_tid(i, team), team
->t.t_id, i, team->t.t_bar[bs_forkjoin_barrier].b_arrived
, team->t.t_bar[bs_plain_barrier].b_arrived); }
993 "T#%d(%d:%d) join =%llu, plain=%llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
"T#%d(%d:%d) join =%llu, plain=%llu\n", __kmp_gtid_from_tid(
0, team), team->t.t_id, 0, __kmp_gtid_from_tid(i, team), team
->t.t_id, i, team->t.t_bar[bs_forkjoin_barrier].b_arrived
, team->t.t_bar[bs_plain_barrier].b_arrived); }
994 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
"T#%d(%d:%d) join =%llu, plain=%llu\n", __kmp_gtid_from_tid(
0, team), team->t.t_id, 0, __kmp_gtid_from_tid(i, team), team
->t.t_id, i, team->t.t_bar[bs_forkjoin_barrier].b_arrived
, team->t.t_bar[bs_plain_barrier].b_arrived); }
995 __kmp_gtid_from_tid(i, team), team->t.t_id, i,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
"T#%d(%d:%d) join =%llu, plain=%llu\n", __kmp_gtid_from_tid(
0, team), team->t.t_id, 0, __kmp_gtid_from_tid(i, team), team
->t.t_id, i, team->t.t_bar[bs_forkjoin_barrier].b_arrived
, team->t.t_bar[bs_plain_barrier].b_arrived); }
996 team->t.t_bar[bs_forkjoin_barrier].b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
"T#%d(%d:%d) join =%llu, plain=%llu\n", __kmp_gtid_from_tid(
0, team), team->t.t_id, 0, __kmp_gtid_from_tid(i, team), team
->t.t_id, i, team->t.t_bar[bs_forkjoin_barrier].b_arrived
, team->t.t_bar[bs_plain_barrier].b_arrived); }
997 team->t.t_bar[bs_plain_barrier].b_arrived))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
"T#%d(%d:%d) join =%llu, plain=%llu\n", __kmp_gtid_from_tid(
0, team), team->t.t_id, 0, __kmp_gtid_from_tid(i, team), team
->t.t_id, i, team->t.t_bar[bs_forkjoin_barrier].b_arrived
, team->t.t_bar[bs_plain_barrier].b_arrived); }
;
998 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
999 thr->th.th_teams_level = master_th->th.th_teams_level;
1000 thr->th.th_teams_size = master_th->th.th_teams_size;
1001 { // Initialize threads' barrier data.
1002 int b;
1003 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1004 for (b = 0; b < bs_last_barrier; ++b) {
1005 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1006 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG)if (!(balign[b].bb.wait_flag != 2)) { __kmp_debug_assert("balign[b].bb.wait_flag != 2"
, "openmp/runtime/src/kmp_runtime.cpp", 1006); }
;
1007#if USE_DEBUGGER0
1008 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1009#endif
1010 }
1011 }
1012 }
1013
1014#if KMP_AFFINITY_SUPPORTED1
1015 // Do not partition the places list for teams construct workers who
1016 // haven't actually been forked to do real work yet. This partitioning
1017 // will take place in the parallel region nested within the teams construct.
1018 if (!fork_teams_workers) {
1019 __kmp_partition_places(team);
1020 }
1021#endif
1022
1023 if (team->t.t_nproc > 1 &&
1024 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1025 team->t.b->update_num_threads(team->t.t_nproc);
1026 __kmp_add_threads_to_team(team, team->t.t_nproc);
1027 }
1028 }
1029
1030 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1031 for (i = 0; i < team->t.t_nproc; i++) {
1032 kmp_info_t *thr = team->t.t_threads[i];
1033 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1034 thr->th.th_prev_level != team->t.t_level) {
1035 team->t.t_display_affinity = 1;
1036 break;
1037 }
1038 }
1039 }
1040
1041 KMP_MB();
1042}
1043
1044#if KMP_ARCH_X860 || KMP_ARCH_X86_641
1045// Propagate any changes to the floating point control registers out to the team
1046// We try to avoid unnecessary writes to the relevant cache line in the team
1047// structure, so we don't make changes unless they are needed.
1048inline static void propagateFPControl(kmp_team_t *team) {
1049 if (__kmp_inherit_fp_control) {
1050 kmp_int16 x87_fpu_control_word;
1051 kmp_uint32 mxcsr;
1052
1053 // Get primary thread's values of FPU control flags (both X87 and vector)
1054 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1055 __kmp_store_mxcsr(&mxcsr);
1056 mxcsr &= KMP_X86_MXCSR_MASK0xffffffc0;
1057
1058 // There is no point looking at t_fp_control_saved here.
1059 // If it is TRUE, we still have to update the values if they are different
1060 // from those we now have. If it is FALSE we didn't save anything yet, but
1061 // our objective is the same. We have to ensure that the values in the team
1062 // are the same as those we have.
1063 // So, this code achieves what we need whether or not t_fp_control_saved is
1064 // true. By checking whether the value needs updating we avoid unnecessary
1065 // writes that would put the cache-line into a written state, causing all
1066 // threads in the team to have to read it again.
1067 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word)if ((team->t.t_x87_fpu_control_word) != (x87_fpu_control_word
)) (team->t.t_x87_fpu_control_word) = (x87_fpu_control_word
)
;
1068 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr)if ((team->t.t_mxcsr) != (mxcsr)) (team->t.t_mxcsr) = (
mxcsr)
;
1069 // Although we don't use this value, other code in the runtime wants to know
1070 // whether it should restore them. So we must ensure it is correct.
1071 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE)if ((team->t.t_fp_control_saved) != ((!0))) (team->t.t_fp_control_saved
) = ((!0))
;
1072 } else {
1073 // Similarly here. Don't write to this cache-line in the team structure
1074 // unless we have to.
1075 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE)if ((team->t.t_fp_control_saved) != (0)) (team->t.t_fp_control_saved
) = (0)
;
1076 }
1077}
1078
1079// Do the opposite, setting the hardware registers to the updated values from
1080// the team.
1081inline static void updateHWFPControl(kmp_team_t *team) {
1082 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1083 // Only reset the fp control regs if they have been changed in the team.
1084 // the parallel region that we are exiting.
1085 kmp_int16 x87_fpu_control_word;
1086 kmp_uint32 mxcsr;
1087 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1088 __kmp_store_mxcsr(&mxcsr);
1089 mxcsr &= KMP_X86_MXCSR_MASK0xffffffc0;
1090
1091 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1092 __kmp_clear_x87_fpu_status_word();
1093 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1094 }
1095
1096 if (team->t.t_mxcsr != mxcsr) {
1097 __kmp_load_mxcsr(&team->t.t_mxcsr);
1098 }
1099 }
1100}
1101#else
1102#define propagateFPControl(x) ((void)0)
1103#define updateHWFPControl(x) ((void)0)
1104#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1105
1106static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team,
1107 int realloc); // forward declaration
1108
1109/* Run a parallel region that has been serialized, so runs only in a team of the
1110 single primary thread. */
1111void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
1112 kmp_info_t *this_thr;
1113 kmp_team_t *serial_team;
1114
1115 KC_TRACE(10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: called by T#%d\n"
, global_tid); }
;
1116
1117 /* Skip all this code for autopar serialized loops since it results in
1118 unacceptable overhead */
1119 if (loc != NULL__null && (loc->flags & KMP_IDENT_AUTOPAR))
1120 return;
1121
1122 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
1123 __kmp_parallel_initialize();
1124 __kmp_resume_if_soft_paused();
1125
1126 this_thr = __kmp_threads[global_tid];
1127 serial_team = this_thr->th.th_serial_team;
1128
1129 /* utilize the serialized team held by this thread */
1130 KMP_DEBUG_ASSERT(serial_team)if (!(serial_team)) { __kmp_debug_assert("serial_team", "openmp/runtime/src/kmp_runtime.cpp"
, 1130); }
;
1131 KMP_MB();
1132
1133 if (__kmp_tasking_mode != tskm_immediate_exec) {
1134 KMP_DEBUG_ASSERT(if (!(this_thr->th.th_task_team == this_thr->th.th_team
->t.t_task_team[this_thr->th.th_task_state])) { __kmp_debug_assert
("this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 1136); }
1135 this_thr->th.th_task_team ==if (!(this_thr->th.th_task_team == this_thr->th.th_team
->t.t_task_team[this_thr->th.th_task_state])) { __kmp_debug_assert
("this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 1136); }
1136 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state])if (!(this_thr->th.th_task_team == this_thr->th.th_team
->t.t_task_team[this_thr->th.th_task_state])) { __kmp_debug_assert
("this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 1136); }
;
1137 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==if (!(serial_team->t.t_task_team[this_thr->th.th_task_state
] == __null)) { __kmp_debug_assert("serial_team->t.t_task_team[this_thr->th.th_task_state] == __null"
, "openmp/runtime/src/kmp_runtime.cpp", 1138); }
1138 NULL)if (!(serial_team->t.t_task_team[this_thr->th.th_task_state
] == __null)) { __kmp_debug_assert("serial_team->t.t_task_team[this_thr->th.th_task_state] == __null"
, "openmp/runtime/src/kmp_runtime.cpp", 1138); }
;
1139 KA_TRACE(20, ("__kmpc_serialized_parallel: T#%d pushing task_team %p / "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d pushing task_team %p / "
"team %p, new task_team = NULL\n", global_tid, this_thr->
th.th_task_team, this_thr->th.th_team); }
1140 "team %p, new task_team = NULL\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d pushing task_team %p / "
"team %p, new task_team = NULL\n", global_tid, this_thr->
th.th_task_team, this_thr->th.th_team); }
1141 global_tid, this_thr->th.th_task_team, this_thr->th.th_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d pushing task_team %p / "
"team %p, new task_team = NULL\n", global_tid, this_thr->
th.th_task_team, this_thr->th.th_team); }
;
1142 this_thr->th.th_task_team = NULL__null;
1143 }
1144
1145 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1146 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1147 proc_bind = proc_bind_false;
1148 } else if (proc_bind == proc_bind_default) {
1149 // No proc_bind clause was specified, so use the current value
1150 // of proc-bind-var for this parallel region.
1151 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1152 }
1153 // Reset for next parallel region
1154 this_thr->th.th_set_proc_bind = proc_bind_default;
1155
1156#if OMPT_SUPPORT1
1157 ompt_data_t ompt_parallel_data = ompt_data_none{0};
1158 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid)__ompt_load_return_address(global_tid);
1159 if (ompt_enabled.enabled &&
1160 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1161
1162 ompt_task_info_t *parent_task_info;
1163 parent_task_info = OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info));
1164
1165 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
1166 if (ompt_enabled.ompt_callback_parallel_begin) {
1167 int team_size = 1;
1168
1169 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)ompt_callback_parallel_begin_callback(
1170 &(parent_task_info->task_data), &(parent_task_info->frame),
1171 &ompt_parallel_data, team_size,
1172 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1173 }
1174 }
1175#endif // OMPT_SUPPORT
1176
1177 if (this_thr->th.th_team != serial_team) {
1178 // Nested level will be an index in the nested nthreads array
1179 int level = this_thr->th.th_team->t.t_level;
1180
1181 if (serial_team->t.t_serialized) {
1182 /* this serial team was already used
1183 TODO increase performance by making this locks more specific */
1184 kmp_team_t *new_team;
1185
1186 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1187
1188 new_team =
1189 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1190#if OMPT_SUPPORT1
1191 ompt_parallel_data,
1192#endif
1193 proc_bind, &this_thr->th.th_current_task->td_icvs,
1194 0 USE_NESTED_HOT_ARG(NULL), __null);
1195 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1196 KMP_ASSERT(new_team)if (!(new_team)) { __kmp_debug_assert("new_team", "openmp/runtime/src/kmp_runtime.cpp"
, 1196); }
;
1197
1198 /* setup new serialized team and install it */
1199 new_team->t.t_threads[0] = this_thr;
1200 new_team->t.t_parent = this_thr->th.th_team;
1201 serial_team = new_team;
1202 this_thr->th.th_serial_team = serial_team;
1203
1204 KF_TRACE(if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n"
, global_tid, serial_team); }
1205 10,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n"
, global_tid, serial_team); }
1206 ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n"
, global_tid, serial_team); }
1207 global_tid, serial_team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n"
, global_tid, serial_team); }
;
1208
1209 /* TODO the above breaks the requirement that if we run out of resources,
1210 then we can still guarantee that serialized teams are ok, since we may
1211 need to allocate a new one */
1212 } else {
1213 KF_TRACE(if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n"
, global_tid, serial_team); }
1214 10,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n"
, global_tid, serial_team); }
1215 ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n"
, global_tid, serial_team); }
1216 global_tid, serial_team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n"
, global_tid, serial_team); }
;
1217 }
1218
1219 /* we have to initialize this serial team */
1220 KMP_DEBUG_ASSERT(serial_team->t.t_threads)if (!(serial_team->t.t_threads)) { __kmp_debug_assert("serial_team->t.t_threads"
, "openmp/runtime/src/kmp_runtime.cpp", 1220); }
;
1221 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr)if (!(serial_team->t.t_threads[0] == this_thr)) { __kmp_debug_assert
("serial_team->t.t_threads[0] == this_thr", "openmp/runtime/src/kmp_runtime.cpp"
, 1221); }
;
1222 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team)if (!(this_thr->th.th_team != serial_team)) { __kmp_debug_assert
("this_thr->th.th_team != serial_team", "openmp/runtime/src/kmp_runtime.cpp"
, 1222); }
;
1223 serial_team->t.t_ident = loc;
1224 serial_team->t.t_serialized = 1;
1225 serial_team->t.t_nproc = 1;
1226 serial_team->t.t_parent = this_thr->th.th_team;
1227 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1228 this_thr->th.th_team = serial_team;
1229 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1230
1231 KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d curtask=%p\n"
, global_tid, this_thr->th.th_current_task); }
1232 this_thr->th.th_current_task))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d curtask=%p\n"
, global_tid, this_thr->th.th_current_task); }
;
1233 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1)if (!(this_thr->th.th_current_task->td_flags.executing ==
1)) { __kmp_debug_assert("this_thr->th.th_current_task->td_flags.executing == 1"
, "openmp/runtime/src/kmp_runtime.cpp", 1233); }
;
1234 this_thr->th.th_current_task->td_flags.executing = 0;
1235
1236 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1237
1238 /* TODO: GEH: do ICVs work for nested serialized teams? Don't we need an
1239 implicit task for each serialized task represented by
1240 team->t.t_serialized? */
1241 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1242 &this_thr->th.th_current_task->td_parent->td_icvs);
1243
1244 // Thread value exists in the nested nthreads array for the next nested
1245 // level
1246 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1247 this_thr->th.th_current_task->td_icvs.nproc =
1248 __kmp_nested_nth.nth[level + 1];
1249 }
1250
1251 if (__kmp_nested_proc_bind.used &&
1252 (level + 1 < __kmp_nested_proc_bind.used)) {
1253 this_thr->th.th_current_task->td_icvs.proc_bind =
1254 __kmp_nested_proc_bind.bind_types[level + 1];
1255 }
1256
1257#if USE_DEBUGGER0
1258 serial_team->t.t_pkfn = (microtask_t)(~0); // For the debugger.
1259#endif
1260 this_thr->th.th_info.ds.ds_tid = 0;
1261
1262 /* set thread cache values */
1263 this_thr->th.th_team_nproc = 1;
1264 this_thr->th.th_team_master = this_thr;
1265 this_thr->th.th_team_serialized = 1;
1266
1267 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1268 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1269 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator; // save
1270
1271 propagateFPControl(serial_team);
1272
1273 /* check if we need to allocate dispatch buffers stack */
1274 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch)if (!(serial_team->t.t_dispatch)) { __kmp_debug_assert("serial_team->t.t_dispatch"
, "openmp/runtime/src/kmp_runtime.cpp", 1274); }
;
1275 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1276 serial_team->t.t_dispatch->th_disp_buffer =
1277 (dispatch_private_info_t *)__kmp_allocate(___kmp_allocate((sizeof(dispatch_private_info_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 1278)
1278 sizeof(dispatch_private_info_t))___kmp_allocate((sizeof(dispatch_private_info_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 1278)
;
1279 }
1280 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1281
1282 KMP_MB();
1283
1284 } else {
1285 /* this serialized team is already being used,
1286 * that's fine, just add another nested level */
1287 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team)if (!(this_thr->th.th_team == serial_team)) { __kmp_debug_assert
("this_thr->th.th_team == serial_team", "openmp/runtime/src/kmp_runtime.cpp"
, 1287); }
;
1288 KMP_DEBUG_ASSERT(serial_team->t.t_threads)if (!(serial_team->t.t_threads)) { __kmp_debug_assert("serial_team->t.t_threads"
, "openmp/runtime/src/kmp_runtime.cpp", 1288); }
;
1289 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr)if (!(serial_team->t.t_threads[0] == this_thr)) { __kmp_debug_assert
("serial_team->t.t_threads[0] == this_thr", "openmp/runtime/src/kmp_runtime.cpp"
, 1289); }
;
1290 ++serial_team->t.t_serialized;
1291 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1292
1293 // Nested level will be an index in the nested nthreads array
1294 int level = this_thr->th.th_team->t.t_level;
1295 // Thread value exists in the nested nthreads array for the next nested
1296 // level
1297 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1298 this_thr->th.th_current_task->td_icvs.nproc =
1299 __kmp_nested_nth.nth[level + 1];
1300 }
1301 serial_team->t.t_level++;
1302 KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d increasing nesting level "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d increasing nesting level "
"of serial team %p to %d\n", global_tid, serial_team, serial_team
->t.t_level); }
1303 "of serial team %p to %d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d increasing nesting level "
"of serial team %p to %d\n", global_tid, serial_team, serial_team
->t.t_level); }
1304 global_tid, serial_team, serial_team->t.t_level))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmpc_serialized_parallel: T#%d increasing nesting level "
"of serial team %p to %d\n", global_tid, serial_team, serial_team
->t.t_level); }
;
1305
1306 /* allocate/push dispatch buffers stack */
1307 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch)if (!(serial_team->t.t_dispatch)) { __kmp_debug_assert("serial_team->t.t_dispatch"
, "openmp/runtime/src/kmp_runtime.cpp", 1307); }
;
1308 {
1309 dispatch_private_info_t *disp_buffer =
1310 (dispatch_private_info_t *)__kmp_allocate(___kmp_allocate((sizeof(dispatch_private_info_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 1311)
1311 sizeof(dispatch_private_info_t))___kmp_allocate((sizeof(dispatch_private_info_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 1311)
;
1312 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1313 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1314 }
1315 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1316
1317 KMP_MB();
1318 }
1319 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq)if ((serial_team->t.t_cancel_request) != (cancel_noreq)) (
serial_team->t.t_cancel_request) = (cancel_noreq)
;
1320
1321 // Perform the display affinity functionality for
1322 // serialized parallel regions
1323 if (__kmp_display_affinity) {
1324 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1325 this_thr->th.th_prev_num_threads != 1) {
1326 // NULL means use the affinity-format-var ICV
1327 __kmp_aux_display_affinity(global_tid, NULL__null);
1328 this_thr->th.th_prev_level = serial_team->t.t_level;
1329 this_thr->th.th_prev_num_threads = 1;
1330 }
1331 }
1332
1333 if (__kmp_env_consistency_check)
1334 __kmp_push_parallel(global_tid, NULL__null);
1335#if OMPT_SUPPORT1
1336 serial_team->t.ompt_team_info.master_return_address = codeptr;
1337 if (ompt_enabled.enabled &&
1338 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1339 OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info))->frame.exit_frame.ptr =
1340 OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
1341
1342 ompt_lw_taskteam_t lw_taskteam;
1343 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1344 &ompt_parallel_data, codeptr);
1345
1346 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1347 // don't use lw_taskteam after linking. content was swaped
1348
1349 /* OMPT implicit task begin */
1350 if (ompt_enabled.ompt_callback_implicit_task) {
1351 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1352 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr)(&(this_thr->th.th_team->t.ompt_team_info.parallel_data
))
,
1353 OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data
))
, 1, __kmp_tid_from_gtid(global_tid),
1354 ompt_task_implicit); // TODO: Can this be ompt_task_initial?
1355 OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info))->thread_num =
1356 __kmp_tid_from_gtid(global_tid);
1357 }
1358
1359 /* OMPT state */
1360 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1361 OMPT_CUR_TASK_INFO(this_thr)(&(this_thr->th.th_current_task->ompt_task_info))->frame.exit_frame.ptr =
1362 OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
1363 }
1364#endif
1365}
1366
1367// Test if this fork is for a team closely nested in a teams construct
1368static inline bool __kmp_is_fork_in_teams(kmp_info_t *master_th,
1369 microtask_t microtask, int level,
1370 int teams_level, kmp_va_list ap) {
1371 return (master_th->th.th_teams_microtask && ap &&
1372 microtask != (microtask_t)__kmp_teams_master && level == teams_level);
1373}
1374
1375// Test if this fork is for the teams construct, i.e. to form the outer league
1376// of teams
1377static inline bool __kmp_is_entering_teams(int active_level, int level,
1378 int teams_level, kmp_va_list ap) {
1379 return ((ap == NULL__null && active_level == 0) ||
1380 (ap && teams_level > 0 && teams_level == level));
1381}
1382
1383// AC: This is start of parallel that is nested inside teams construct.
1384// The team is actual (hot), all workers are ready at the fork barrier.
1385// No lock needed to initialize the team a bit, then free workers.
1386static inline int
1387__kmp_fork_in_teams(ident_t *loc, int gtid, kmp_team_t *parent_team,
1388 kmp_int32 argc, kmp_info_t *master_th, kmp_root_t *root,
1389 enum fork_context_e call_context, microtask_t microtask,
1390 launch_t invoker, int master_set_numthreads, int level,
1391#if OMPT_SUPPORT1
1392 ompt_data_t ompt_parallel_data, void *return_address,
1393#endif
1394 kmp_va_list ap) {
1395 void **argv;
1396 int i;
1397
1398 parent_team->t.t_ident = loc;
1399 __kmp_alloc_argv_entries(argc, parent_team, TRUE(!0));
1400 parent_team->t.t_argc = argc;
1401 argv = (void **)parent_team->t.t_argv;
1402 for (i = argc - 1; i >= 0; --i) {
1403 *argv++ = va_arg(kmp_va_deref(ap), void *)__builtin_va_arg((*(ap)), void *);
1404 }
1405 // Increment our nested depth levels, but not increase the serialization
1406 if (parent_team == master_th->th.th_serial_team) {
1407 // AC: we are in serialized parallel
1408 __kmpc_serialized_parallel(loc, gtid);
1409 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1)if (!(parent_team->t.t_serialized > 1)) { __kmp_debug_assert
("parent_team->t.t_serialized > 1", "openmp/runtime/src/kmp_runtime.cpp"
, 1409); }
;
1410
1411 if (call_context == fork_context_gnu) {
1412 // AC: need to decrement t_serialized for enquiry functions to work
1413 // correctly, will restore at join time
1414 parent_team->t.t_serialized--;
1415 return TRUE(!0);
1416 }
1417
1418#if OMPD_SUPPORT1
1419 parent_team->t.t_pkfn = microtask;
1420#endif
1421
1422#if OMPT_SUPPORT1
1423 void *dummy;
1424 void **exit_frame_p;
1425 ompt_data_t *implicit_task_data;
1426 ompt_lw_taskteam_t lw_taskteam;
1427
1428 if (ompt_enabled.enabled) {
1429 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1430 &ompt_parallel_data, return_address);
1431 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1432
1433 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1434 // Don't use lw_taskteam after linking. Content was swapped.
1435
1436 /* OMPT implicit task begin */
1437 implicit_task_data = OMPT_CUR_TASK_DATA(master_th)(&(master_th->th.th_current_task->ompt_task_info.task_data
))
;
1438 if (ompt_enabled.ompt_callback_implicit_task) {
1439 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num = __kmp_tid_from_gtid(gtid);
1440 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1441 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
, implicit_task_data,
1442 1, OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, ompt_task_implicit);
1443 }
1444
1445 /* OMPT state */
1446 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1447 } else {
1448 exit_frame_p = &dummy;
1449 }
1450#endif
1451
1452 // AC: need to decrement t_serialized for enquiry functions to work
1453 // correctly, will restore at join time
1454 parent_team->t.t_serialized--;
1455
1456 {
1457 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel)((void)0);
1458 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK)((void)0);
1459 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1460#if OMPT_SUPPORT1
1461 ,
1462 exit_frame_p
1463#endif
1464 );
1465 }
1466
1467#if OMPT_SUPPORT1
1468 if (ompt_enabled.enabled) {
1469 *exit_frame_p = NULL__null;
1470 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->frame.exit_frame = ompt_data_none{0};
1471 if (ompt_enabled.ompt_callback_implicit_task) {
1472 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1473 ompt_scope_end, NULL__null, implicit_task_data, 1,
1474 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, ompt_task_implicit);
1475 }
1476 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
;
1477 __ompt_lw_taskteam_unlink(master_th);
1478 if (ompt_enabled.ompt_callback_parallel_end) {
1479 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)ompt_callback_parallel_end_callback(
1480 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th)(&(master_th->th.th_current_task->ompt_task_info.task_data
))
,
1481 OMPT_INVOKER(call_context)((call_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
| ompt_parallel_team, return_address);
1482 }
1483 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1484 }
1485#endif
1486 return TRUE(!0);
1487 }
1488
1489 parent_team->t.t_pkfn = microtask;
1490 parent_team->t.t_invoke = invoker;
1491 KMP_ATOMIC_INC(&root->r.r_in_parallel)(&root->r.r_in_parallel)->fetch_add(1, std::memory_order_acq_rel
)
;
1492 parent_team->t.t_active_level++;
1493 parent_team->t.t_level++;
1494 parent_team->t.t_def_allocator = master_th->th.th_def_allocator; // save
1495
1496 // If the threads allocated to the team are less than the thread limit, update
1497 // the thread limit here. th_teams_size.nth is specific to this team nested
1498 // in a teams construct, the team is fully created, and we're about to do
1499 // the actual fork. Best to do this here so that the subsequent uses below
1500 // and in the join have the correct value.
1501 master_th->th.th_teams_size.nth = parent_team->t.t_nproc;
1502
1503#if OMPT_SUPPORT1
1504 if (ompt_enabled.enabled) {
1505 ompt_lw_taskteam_t lw_taskteam;
1506 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, &ompt_parallel_data,
1507 return_address);
1508 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1, true);
1509 }
1510#endif
1511
1512 /* Change number of threads in the team if requested */
1513 if (master_set_numthreads) { // The parallel has num_threads clause
1514 if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1515 // AC: only can reduce number of threads dynamically, can't increase
1516 kmp_info_t **other_threads = parent_team->t.t_threads;
1517 // NOTE: if using distributed barrier, we need to run this code block
1518 // even when the team size appears not to have changed from the max.
1519 int old_proc = master_th->th.th_teams_size.nth;
1520 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1521 __kmp_resize_dist_barrier(parent_team, old_proc, master_set_numthreads);
1522 __kmp_add_threads_to_team(parent_team, master_set_numthreads);
1523 }
1524 parent_team->t.t_nproc = master_set_numthreads;
1525 for (i = 0; i < master_set_numthreads; ++i) {
1526 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1527 }
1528 }
1529 // Keep extra threads hot in the team for possible next parallels
1530 master_th->th.th_set_nproc = 0;
1531 }
1532
1533#if USE_DEBUGGER0
1534 if (__kmp_debugging) { // Let debugger override number of threads.
1535 int nth = __kmp_omp_num_threads(loc);
1536 if (nth > 0) { // 0 means debugger doesn't want to change num threads
1537 master_set_numthreads = nth;
1538 }
1539 }
1540#endif
1541
1542 // Figure out the proc_bind policy for the nested parallel within teams
1543 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1544 // proc_bind_default means don't update
1545 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1546 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1547 proc_bind = proc_bind_false;
1548 } else {
1549 // No proc_bind clause specified; use current proc-bind-var
1550 if (proc_bind == proc_bind_default) {
1551 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1552 }
1553 /* else: The proc_bind policy was specified explicitly on parallel clause.
1554 This overrides proc-bind-var for this parallel region, but does not
1555 change proc-bind-var. */
1556 // Figure the value of proc-bind-var for the child threads.
1557 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1558 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1559 master_th->th.th_current_task->td_icvs.proc_bind)) {
1560 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1561 }
1562 }
1563 KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind)if ((parent_team->t.t_proc_bind) != (proc_bind)) (parent_team
->t.t_proc_bind) = (proc_bind)
;
1564 // Need to change the bind-var ICV to correct value for each implicit task
1565 if (proc_bind_icv != proc_bind_default &&
1566 master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1567 kmp_info_t **other_threads = parent_team->t.t_threads;
1568 for (i = 0; i < master_th->th.th_team_nproc; ++i) {
1569 other_threads[i]->th.th_current_task->td_icvs.proc_bind = proc_bind_icv;
1570 }
1571 }
1572 // Reset for next parallel region
1573 master_th->th.th_set_proc_bind = proc_bind_default;
1574
1575#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
1576 if (((__itt_frame_submit_v3_ptr__kmp_itt_frame_submit_v3_ptr__3_0 && __itt_get_timestamp_ptr__kmp_itt_get_timestamp_ptr__3_0) ||
1577 KMP_ITT_DEBUG0) &&
1578 __kmp_forkjoin_frames_mode == 3 &&
1579 parent_team->t.t_active_level == 1 // only report frames at level 1
1580 && master_th->th.th_teams_size.nteams == 1) {
1581 kmp_uint64 tmp_time = __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0();
1582 master_th->th.th_frame_time = tmp_time;
1583 parent_team->t.t_region_time = tmp_time;
1584 }
1585 if (__itt_stack_caller_create_ptr__kmp_itt_stack_caller_create_ptr__3_0) {
1586 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL)if (!(parent_team->t.t_stack_id == __null)) { __kmp_debug_assert
("parent_team->t.t_stack_id == __null", "openmp/runtime/src/kmp_runtime.cpp"
, 1586); }
;
1587 // create new stack stitching id before entering fork barrier
1588 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1589 }
1590#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1591#if KMP_AFFINITY_SUPPORTED1
1592 __kmp_partition_places(parent_team);
1593#endif
1594
1595 KF_TRACE(10, ("__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid
); }
1596 "master_th=%p, gtid=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid
); }
1597 root, parent_team, master_th, gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid
); }
;
1598 __kmp_internal_fork(loc, gtid, parent_team);
1599 KF_TRACE(10, ("__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid
); }
1600 "master_th=%p, gtid=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid
); }
1601 root, parent_team, master_th, gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid
); }
;
1602
1603 if (call_context == fork_context_gnu)
1604 return TRUE(!0);
1605
1606 /* Invoke microtask for PRIMARY thread */
1607 KA_TRACE(20, ("__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n"
, gtid, parent_team->t.t_id, parent_team->t.t_pkfn); }
1608 parent_team->t.t_id, parent_team->t.t_pkfn))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n"
, gtid, parent_team->t.t_id, parent_team->t.t_pkfn); }
;
1609
1610 if (!parent_team->t.t_invoke(gtid)) {
1611 KMP_ASSERT2(0, "cannot invoke microtask for PRIMARY thread")if (!(0)) { __kmp_debug_assert(("cannot invoke microtask for PRIMARY thread"
), "openmp/runtime/src/kmp_runtime.cpp", 1611); }
;
1612 }
1613 KA_TRACE(20, ("__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n"
, gtid, parent_team->t.t_id, parent_team->t.t_pkfn); }
1614 parent_team->t.t_id, parent_team->t.t_pkfn))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n"
, gtid, parent_team->t.t_id, parent_team->t.t_pkfn); }
;
1615 KMP_MB(); /* Flush all pending memory write invalidates. */
1616
1617 KA_TRACE(20, ("__kmp_fork_in_teams: parallel exit T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_in_teams: parallel exit T#%d\n"
, gtid); }
;
1618
1619 return TRUE(!0);
1620}
1621
1622// Create a serialized parallel region
1623static inline int
1624__kmp_serial_fork_call(ident_t *loc, int gtid, enum fork_context_e call_context,
1625 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1626 kmp_info_t *master_th, kmp_team_t *parent_team,
1627#if OMPT_SUPPORT1
1628 ompt_data_t *ompt_parallel_data, void **return_address,
1629 ompt_data_t **parent_task_data,
1630#endif
1631 kmp_va_list ap) {
1632 kmp_team_t *team;
1633 int i;
1634 void **argv;
1635
1636/* josh todo: hypothetical question: what do we do for OS X*? */
1637#if KMP_OS_LINUX1 && \
1638 (KMP_ARCH_X860 || KMP_ARCH_X86_641 || KMP_ARCH_ARM || KMP_ARCH_AARCH640)
1639 void *args[argc];
1640#else
1641 void **args = (void **)KMP_ALLOCA(argc * sizeof(void *))__builtin_alloca (argc * sizeof(void *));
1642#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || \
1643 KMP_ARCH_AARCH64) */
1644
1645 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_serial_fork_call: T#%d serializing parallel region\n"
, gtid); }
1646 20, ("__kmp_serial_fork_call: T#%d serializing parallel region\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_serial_fork_call: T#%d serializing parallel region\n"
, gtid); }
;
1647
1648 __kmpc_serialized_parallel(loc, gtid);
1649
1650#if OMPD_SUPPORT1
1651 master_th->th.th_serial_team->t.t_pkfn = microtask;
1652#endif
1653
1654 if (call_context == fork_context_intel) {
1655 /* TODO this sucks, use the compiler itself to pass args! :) */
1656 master_th->th.th_serial_team->t.t_ident = loc;
1657 if (!ap) {
1658 // revert change made in __kmpc_serialized_parallel()
1659 master_th->th.th_serial_team->t.t_level--;
1660// Get args from parent team for teams construct
1661
1662#if OMPT_SUPPORT1
1663 void *dummy;
1664 void **exit_frame_p;
1665 ompt_task_info_t *task_info;
1666 ompt_lw_taskteam_t lw_taskteam;
1667
1668 if (ompt_enabled.enabled) {
1669 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1670 ompt_parallel_data, *return_address);
1671
1672 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1673 // don't use lw_taskteam after linking. content was swaped
1674 task_info = OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info));
1675 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1676 if (ompt_enabled.ompt_callback_implicit_task) {
1677 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num = __kmp_tid_from_gtid(gtid);
1678 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1679 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
,
1680 &(task_info->task_data), 1,
1681 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, ompt_task_implicit);
1682 }
1683
1684 /* OMPT state */
1685 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1686 } else {
1687 exit_frame_p = &dummy;
1688 }
1689#endif
1690
1691 {
1692 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel)((void)0);
1693 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK)((void)0);
1694 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1695#if OMPT_SUPPORT1
1696 ,
1697 exit_frame_p
1698#endif
1699 );
1700 }
1701
1702#if OMPT_SUPPORT1
1703 if (ompt_enabled.enabled) {
1704 *exit_frame_p = NULL__null;
1705 if (ompt_enabled.ompt_callback_implicit_task) {
1706 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1707 ompt_scope_end, NULL__null, &(task_info->task_data), 1,
1708 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, ompt_task_implicit);
1709 }
1710 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
;
1711 __ompt_lw_taskteam_unlink(master_th);
1712 if (ompt_enabled.ompt_callback_parallel_end) {
1713 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)ompt_callback_parallel_end_callback(
1714 ompt_parallel_data, *parent_task_data,
1715 OMPT_INVOKER(call_context)((call_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
| ompt_parallel_team, *return_address);
1716 }
1717 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1718 }
1719#endif
1720 } else if (microtask == (microtask_t)__kmp_teams_master) {
1721 KMP_DEBUG_ASSERT(master_th->th.th_team == master_th->th.th_serial_team)if (!(master_th->th.th_team == master_th->th.th_serial_team
)) { __kmp_debug_assert("master_th->th.th_team == master_th->th.th_serial_team"
, "openmp/runtime/src/kmp_runtime.cpp", 1721); }
;
1722 team = master_th->th.th_team;
1723 // team->t.t_pkfn = microtask;
1724 team->t.t_invoke = invoker;
1725 __kmp_alloc_argv_entries(argc, team, TRUE(!0));
1726 team->t.t_argc = argc;
1727 argv = (void **)team->t.t_argv;
1728 if (ap) {
1729 for (i = argc - 1; i >= 0; --i)
1730 *argv++ = va_arg(kmp_va_deref(ap), void *)__builtin_va_arg((*(ap)), void *);
1731 } else {
1732 for (i = 0; i < argc; ++i)
1733 // Get args from parent team for teams construct
1734 argv[i] = parent_team->t.t_argv[i];
1735 }
1736 // AC: revert change made in __kmpc_serialized_parallel()
1737 // because initial code in teams should have level=0
1738 team->t.t_level--;
1739 // AC: call special invoker for outer "parallel" of teams construct
1740 invoker(gtid);
1741#if OMPT_SUPPORT1
1742 if (ompt_enabled.enabled) {
1743 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info));
1744 if (ompt_enabled.ompt_callback_implicit_task) {
1745 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1746 ompt_scope_end, NULL__null, &(task_info->task_data), 0,
1747 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, ompt_task_initial);
1748 }
1749 if (ompt_enabled.ompt_callback_parallel_end) {
1750 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)ompt_callback_parallel_end_callback(
1751 ompt_parallel_data, *parent_task_data,
1752 OMPT_INVOKER(call_context)((call_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
| ompt_parallel_league,
1753 *return_address);
1754 }
1755 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1756 }
1757#endif
1758 } else {
1759 argv = args;
1760 for (i = argc - 1; i >= 0; --i)
1761 *argv++ = va_arg(kmp_va_deref(ap), void *)__builtin_va_arg((*(ap)), void *);
1762 KMP_MB();
1763
1764#if OMPT_SUPPORT1
1765 void *dummy;
1766 void **exit_frame_p;
1767 ompt_task_info_t *task_info;
1768 ompt_lw_taskteam_t lw_taskteam;
1769 ompt_data_t *implicit_task_data;
1770
1771 if (ompt_enabled.enabled) {
1772 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1773 ompt_parallel_data, *return_address);
1774 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1775 // don't use lw_taskteam after linking. content was swaped
1776 task_info = OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info));
1777 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1778
1779 /* OMPT implicit task begin */
1780 implicit_task_data = OMPT_CUR_TASK_DATA(master_th)(&(master_th->th.th_current_task->ompt_task_info.task_data
))
;
1781 if (ompt_enabled.ompt_callback_implicit_task) {
1782 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1783 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
,
1784 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1785 ompt_task_implicit);
1786 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num = __kmp_tid_from_gtid(gtid);
1787 }
1788
1789 /* OMPT state */
1790 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1791 } else {
1792 exit_frame_p = &dummy;
1793 }
1794#endif
1795
1796 {
1797 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel)((void)0);
1798 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK)((void)0);
1799 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1800#if OMPT_SUPPORT1
1801 ,
1802 exit_frame_p
1803#endif
1804 );
1805 }
1806
1807#if OMPT_SUPPORT1
1808 if (ompt_enabled.enabled) {
1809 *exit_frame_p = NULL__null;
1810 if (ompt_enabled.ompt_callback_implicit_task) {
1811 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1812 ompt_scope_end, NULL__null, &(task_info->task_data), 1,
1813 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, ompt_task_implicit);
1814 }
1815
1816 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
;
1817 __ompt_lw_taskteam_unlink(master_th);
1818 if (ompt_enabled.ompt_callback_parallel_end) {
1819 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)ompt_callback_parallel_end_callback(
1820 ompt_parallel_data, *parent_task_data,
1821 OMPT_INVOKER(call_context)((call_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
| ompt_parallel_team, *return_address);
1822 }
1823 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1824 }
1825#endif
1826 }
1827 } else if (call_context == fork_context_gnu) {
1828#if OMPT_SUPPORT1
1829 if (ompt_enabled.enabled) {
1830 ompt_lw_taskteam_t lwt;
1831 __ompt_lw_taskteam_init(&lwt, master_th, gtid, ompt_parallel_data,
1832 *return_address);
1833
1834 lwt.ompt_task_info.frame.exit_frame = ompt_data_none{0};
1835 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1836 }
1837// don't use lw_taskteam after linking. content was swaped
1838#endif
1839
1840 // we were called from GNU native code
1841 KA_TRACE(20, ("__kmp_serial_fork_call: T#%d serial exit\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_serial_fork_call: T#%d serial exit\n"
, gtid); }
;
1842 return FALSE0;
1843 } else {
1844 KMP_ASSERT2(call_context < fork_context_last,if (!(call_context < fork_context_last)) { __kmp_debug_assert
(("__kmp_serial_fork_call: unknown fork_context parameter"), "openmp/runtime/src/kmp_runtime.cpp"
, 1845); }
1845 "__kmp_serial_fork_call: unknown fork_context parameter")if (!(call_context < fork_context_last)) { __kmp_debug_assert
(("__kmp_serial_fork_call: unknown fork_context parameter"), "openmp/runtime/src/kmp_runtime.cpp"
, 1845); }
;
1846 }
1847
1848 KA_TRACE(20, ("__kmp_serial_fork_call: T#%d serial exit\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_serial_fork_call: T#%d serial exit\n"
, gtid); }
;
1849 KMP_MB();
1850 return FALSE0;
1851}
1852
1853/* most of the work for a fork */
1854/* return true if we really went parallel, false if serialized */
1855int __kmp_fork_call(ident_t *loc, int gtid,
1856 enum fork_context_e call_context, // Intel, GNU, ...
1857 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1858 kmp_va_list ap) {
1859 void **argv;
1860 int i;
1861 int master_tid;
1862 int master_this_cons;
1863 kmp_team_t *team;
1864 kmp_team_t *parent_team;
1865 kmp_info_t *master_th;
1866 kmp_root_t *root;
1867 int nthreads;
1868 int master_active;
1869 int master_set_numthreads;
1870 int level;
1871 int active_level;
1872 int teams_level;
1873#if KMP_NESTED_HOT_TEAMS1
1874 kmp_hot_team_ptr_t **p_hot_teams;
1875#endif
1876 { // KMP_TIME_BLOCK
1877 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call)((void)0);
1878 KMP_COUNT_VALUE(OMP_PARALLEL_args, argc)((void)0);
1879
1880 KA_TRACE(20, ("__kmp_fork_call: enter T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: enter T#%d\n"
, gtid); }
;
1881 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL__null) {
1882 /* Some systems prefer the stack for the root thread(s) to start with */
1883 /* some gap from the parent stack to prevent false sharing. */
1884 void *dummy = KMP_ALLOCA(__kmp_stkpadding)__builtin_alloca (__kmp_stkpadding);
1885 /* These 2 lines below are so this does not get optimized out */
1886 if (__kmp_stkpadding > KMP_MAX_STKPADDING(2 * 1024 * 1024))
1887 __kmp_stkpadding += (short)((kmp_int64)dummy);
1888 }
1889
1890 /* initialize if needed */
1891 KMP_DEBUG_ASSERT(if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 1892); }
1892 __kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 1892); }
; // AC: potentially unsafe, not in sync with shutdown
1893 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
1894 __kmp_parallel_initialize();
1895 __kmp_resume_if_soft_paused();
1896
1897 /* setup current data */
1898 // AC: potentially unsafe, not in sync with library shutdown,
1899 // __kmp_threads can be freed
1900 master_th = __kmp_threads[gtid];
1901
1902 parent_team = master_th->th.th_team;
1903 master_tid = master_th->th.th_info.ds.ds_tid;
1904 master_this_cons = master_th->th.th_local.this_construct;
1905 root = master_th->th.th_root;
1906 master_active = root->r.r_active;
1907 master_set_numthreads = master_th->th.th_set_nproc;
1908
1909#if OMPT_SUPPORT1
1910 ompt_data_t ompt_parallel_data = ompt_data_none{0};
1911 ompt_data_t *parent_task_data;
1912 ompt_frame_t *ompt_frame;
1913 void *return_address = NULL__null;
1914
1915 if (ompt_enabled.enabled) {
1916 __ompt_get_task_info_internal(0, NULL__null, &parent_task_data, &ompt_frame,
1917 NULL__null, NULL__null);
1918 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
1919 }
1920#endif
1921
1922 // Assign affinity to root thread if it hasn't happened yet
1923 __kmp_assign_root_init_mask();
1924
1925 // Nested level will be an index in the nested nthreads array
1926 level = parent_team->t.t_level;
1927 // used to launch non-serial teams even if nested is not allowed
1928 active_level = parent_team->t.t_active_level;
1929 // needed to check nesting inside the teams
1930 teams_level = master_th->th.th_teams_level;
1931#if KMP_NESTED_HOT_TEAMS1
1932 p_hot_teams = &master_th->th.th_hot_teams;
1933 if (*p_hot_teams == NULL__null && __kmp_hot_teams_max_level > 0) {
1934 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(___kmp_allocate((sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level
), "openmp/runtime/src/kmp_runtime.cpp", 1935)
1935 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level)___kmp_allocate((sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level
), "openmp/runtime/src/kmp_runtime.cpp", 1935)
;
1936 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1937 // it is either actual or not needed (when active_level > 0)
1938 (*p_hot_teams)[0].hot_team_nth = 1;
1939 }
1940#endif
1941
1942#if OMPT_SUPPORT1
1943 if (ompt_enabled.enabled) {
1944 if (ompt_enabled.ompt_callback_parallel_begin) {
1945 int team_size = master_set_numthreads
1946 ? master_set_numthreads
1947 : get__nproc_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.nproc)
;
1948 int flags = OMPT_INVOKER(call_context)((call_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
|
1949 ((microtask == (microtask_t)__kmp_teams_master)
1950 ? ompt_parallel_league
1951 : ompt_parallel_team);
1952 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)ompt_callback_parallel_begin_callback(
1953 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
1954 return_address);
1955 }
1956 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1957 }
1958#endif
1959
1960 master_th->th.th_ident = loc;
1961
1962 // Parallel closely nested in teams construct:
1963 if (__kmp_is_fork_in_teams(master_th, microtask, level, teams_level, ap)) {
1964 return __kmp_fork_in_teams(loc, gtid, parent_team, argc, master_th, root,
1965 call_context, microtask, invoker,
1966 master_set_numthreads, level,
1967#if OMPT_SUPPORT1
1968 ompt_parallel_data, return_address,
1969#endif
1970 ap);
1971 } // End parallel closely nested in teams construct
1972
1973#if KMP_DEBUG1
1974 if (__kmp_tasking_mode != tskm_immediate_exec) {
1975 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==if (!(master_th->th.th_task_team == parent_team->t.t_task_team
[master_th->th.th_task_state])) { __kmp_debug_assert("master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 1976); }
1976 parent_team->t.t_task_team[master_th->th.th_task_state])if (!(master_th->th.th_task_team == parent_team->t.t_task_team
[master_th->th.th_task_state])) { __kmp_debug_assert("master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 1976); }
;
1977 }
1978#endif
1979
1980 // Need this to happen before we determine the number of threads, not while
1981 // we are allocating the team
1982 //__kmp_push_current_task_to_thread(master_th, parent_team, 0);
1983
1984 // Determine the number of threads
1985 int enter_teams =
1986 __kmp_is_entering_teams(active_level, level, teams_level, ap);
1987 if ((!enter_teams &&
1988 (parent_team->t.t_active_level >=
1989 master_th->th.th_current_task->td_icvs.max_active_levels)) ||
1990 (__kmp_library == library_serial)) {
1991 KC_TRACE(10, ("__kmp_fork_call: T#%d serializing team\n", gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: T#%d serializing team\n"
, gtid); }
;
1992 nthreads = 1;
1993 } else {
1994 nthreads = master_set_numthreads
1995 ? master_set_numthreads
1996 // TODO: get nproc directly from current task
1997 : get__nproc_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.nproc)
;
1998 // Check if we need to take forkjoin lock? (no need for serialized
1999 // parallel out of teams construct).
2000 if (nthreads > 1) {
2001 /* determine how many new threads we can use */
2002 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2003 /* AC: If we execute teams from parallel region (on host), then teams
2004 should be created but each can only have 1 thread if nesting is
2005 disabled. If teams called from serial region, then teams and their
2006 threads should be created regardless of the nesting setting. */
2007 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
2008 nthreads, enter_teams);
2009 if (nthreads == 1) {
2010 // Free lock for single thread execution here; for multi-thread
2011 // execution it will be freed later after team of threads created
2012 // and initialized
2013 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2014 }
2015 }
2016 }
2017 KMP_DEBUG_ASSERT(nthreads > 0)if (!(nthreads > 0)) { __kmp_debug_assert("nthreads > 0"
, "openmp/runtime/src/kmp_runtime.cpp", 2017); }
;
2018
2019 // If we temporarily changed the set number of threads then restore it now
2020 master_th->th.th_set_nproc = 0;
2021
2022 if (nthreads == 1) {
2023 return __kmp_serial_fork_call(loc, gtid, call_context, argc, microtask,
2024 invoker, master_th, parent_team,
2025#if OMPT_SUPPORT1
2026 &ompt_parallel_data, &return_address,
2027 &parent_task_data,
2028#endif
2029 ap);
2030 } // if (nthreads == 1)
2031
2032 // GEH: only modify the executing flag in the case when not serialized
2033 // serialized case is handled in kmpc_serialized_parallel
2034 KF_TRACE(10, ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
"curtask=%p, curtask_max_aclevel=%d\n", parent_team->t.t_active_level
, master_th, master_th->th.th_current_task, master_th->
th.th_current_task->td_icvs.max_active_levels); }
2035 "curtask=%p, curtask_max_aclevel=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
"curtask=%p, curtask_max_aclevel=%d\n", parent_team->t.t_active_level
, master_th, master_th->th.th_current_task, master_th->
th.th_current_task->td_icvs.max_active_levels); }
2036 parent_team->t.t_active_level, master_th,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
"curtask=%p, curtask_max_aclevel=%d\n", parent_team->t.t_active_level
, master_th, master_th->th.th_current_task, master_th->
th.th_current_task->td_icvs.max_active_levels); }
2037 master_th->th.th_current_task,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
"curtask=%p, curtask_max_aclevel=%d\n", parent_team->t.t_active_level
, master_th, master_th->th.th_current_task, master_th->
th.th_current_task->td_icvs.max_active_levels); }
2038 master_th->th.th_current_task->td_icvs.max_active_levels))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
"curtask=%p, curtask_max_aclevel=%d\n", parent_team->t.t_active_level
, master_th, master_th->th.th_current_task, master_th->
th.th_current_task->td_icvs.max_active_levels); }
;
2039 // TODO: GEH - cannot do this assertion because root thread not set up as
2040 // executing
2041 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );
2042 master_th->th.th_current_task->td_flags.executing = 0;
2043
2044 if (!master_th->th.th_teams_microtask || level > teams_level) {
2045 /* Increment our nested depth level */
2046 KMP_ATOMIC_INC(&root->r.r_in_parallel)(&root->r.r_in_parallel)->fetch_add(1, std::memory_order_acq_rel
)
;
2047 }
2048
2049 // See if we need to make a copy of the ICVs.
2050 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
2051 if ((level + 1 < __kmp_nested_nth.used) &&
2052 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
2053 nthreads_icv = __kmp_nested_nth.nth[level + 1];
2054 } else {
2055 nthreads_icv = 0; // don't update
2056 }
2057
2058 // Figure out the proc_bind_policy for the new team.
2059 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2060 // proc_bind_default means don't update
2061 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
2062 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2063 proc_bind = proc_bind_false;
2064 } else {
2065 // No proc_bind clause specified; use current proc-bind-var for this
2066 // parallel region
2067 if (proc_bind == proc_bind_default) {
2068 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2069 }
2070 // Have teams construct take proc_bind value from KMP_TEAMS_PROC_BIND
2071 if (master_th->th.th_teams_microtask &&
2072 microtask == (microtask_t)__kmp_teams_master) {
2073 proc_bind = __kmp_teams_proc_bind;
2074 }
2075 /* else: The proc_bind policy was specified explicitly on parallel clause.
2076 This overrides proc-bind-var for this parallel region, but does not
2077 change proc-bind-var. */
2078 // Figure the value of proc-bind-var for the child threads.
2079 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2080 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2081 master_th->th.th_current_task->td_icvs.proc_bind)) {
2082 // Do not modify the proc bind icv for the two teams construct forks
2083 // They just let the proc bind icv pass through
2084 if (!master_th->th.th_teams_microtask ||
2085 !(microtask == (microtask_t)__kmp_teams_master || ap == NULL__null))
2086 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2087 }
2088 }
2089
2090 // Reset for next parallel region
2091 master_th->th.th_set_proc_bind = proc_bind_default;
2092
2093 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
2094 kmp_internal_control_t new_icvs;
2095 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2096 new_icvs.next = NULL__null;
2097 if (nthreads_icv > 0) {
2098 new_icvs.nproc = nthreads_icv;
2099 }
2100 if (proc_bind_icv != proc_bind_default) {
2101 new_icvs.proc_bind = proc_bind_icv;
2102 }
2103
2104 /* allocate a new parallel team */
2105 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: before __kmp_allocate_team\n"
); }
;
2106 team = __kmp_allocate_team(root, nthreads, nthreads,
2107#if OMPT_SUPPORT1
2108 ompt_parallel_data,
2109#endif
2110 proc_bind, &new_icvs,
2111 argc USE_NESTED_HOT_ARG(master_th), master_th);
2112 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2113 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
2114 } else {
2115 /* allocate a new parallel team */
2116 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: before __kmp_allocate_team\n"
); }
;
2117 team = __kmp_allocate_team(root, nthreads, nthreads,
2118#if OMPT_SUPPORT1
2119 ompt_parallel_data,
2120#endif
2121 proc_bind,
2122 &master_th->th.th_current_task->td_icvs,
2123 argc USE_NESTED_HOT_ARG(master_th), master_th);
2124 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2125 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
2126 &master_th->th.th_current_task->td_icvs);
2127 }
2128 KF_TRACE(if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n"
, team); }
2129 10, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n"
, team); }
;
2130
2131 /* setup the new team */
2132 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid)if ((team->t.t_master_tid) != (master_tid)) (team->t.t_master_tid
) = (master_tid)
;
2133 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons)if ((team->t.t_master_this_cons) != (master_this_cons)) (team
->t.t_master_this_cons) = (master_this_cons)
;
2134 KMP_CHECK_UPDATE(team->t.t_ident, loc)if ((team->t.t_ident) != (loc)) (team->t.t_ident) = (loc
)
;
2135 KMP_CHECK_UPDATE(team->t.t_parent, parent_team)if ((team->t.t_parent) != (parent_team)) (team->t.t_parent
) = (parent_team)
;
2136 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask)if ((team->t.t_pkfn) != (microtask)) (((team->t.t_pkfn)
)) = (((microtask)))
;
2137#if OMPT_SUPPORT1
2138 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,if ((team->t.ompt_team_info.master_return_address) != (return_address
)) (((team->t.ompt_team_info.master_return_address))) = ((
(return_address)))
2139 return_address)if ((team->t.ompt_team_info.master_return_address) != (return_address
)) (((team->t.ompt_team_info.master_return_address))) = ((
(return_address)))
;
2140#endif
2141 KMP_CHECK_UPDATE(team->t.t_invoke, invoker)if ((team->t.t_invoke) != (invoker)) (team->t.t_invoke)
= (invoker)
; // TODO move to root, maybe
2142 // TODO: parent_team->t.t_level == INT_MAX ???
2143 if (!master_th->th.th_teams_microtask || level > teams_level) {
2144 int new_level = parent_team->t.t_level + 1;
2145 KMP_CHECK_UPDATE(team->t.t_level, new_level)if ((team->t.t_level) != (new_level)) (team->t.t_level)
= (new_level)
;
2146 new_level = parent_team->t.t_active_level + 1;
2147 KMP_CHECK_UPDATE(team->t.t_active_level, new_level)if ((team->t.t_active_level) != (new_level)) (team->t.t_active_level
) = (new_level)
;
2148 } else {
2149 // AC: Do not increase parallel level at start of the teams construct
2150 int new_level = parent_team->t.t_level;
2151 KMP_CHECK_UPDATE(team->t.t_level, new_level)if ((team->t.t_level) != (new_level)) (team->t.t_level)
= (new_level)
;
2152 new_level = parent_team->t.t_active_level;
2153 KMP_CHECK_UPDATE(team->t.t_active_level, new_level)if ((team->t.t_active_level) != (new_level)) (team->t.t_active_level
) = (new_level)
;
2154 }
2155 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid)((parent_team)->t.t_threads[(master_tid)]->th.th_current_task
->td_icvs.sched)
;
2156 // set primary thread's schedule as new run-time schedule
2157 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched)if ((team->t.t_sched.sched) != (new_sched.sched)) (team->
t.t_sched.sched) = (new_sched.sched)
;
2158
2159 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq)if ((team->t.t_cancel_request) != (cancel_noreq)) (team->
t.t_cancel_request) = (cancel_noreq)
;
2160 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator)if ((team->t.t_def_allocator) != (master_th->th.th_def_allocator
)) (team->t.t_def_allocator) = (master_th->th.th_def_allocator
)
;
2161
2162 // Update the floating point rounding in the team if required.
2163 propagateFPControl(team);
2164#if OMPD_SUPPORT1
2165 if (ompd_state & OMPD_ENABLE_BP0x1)
2166 ompd_bp_parallel_begin();
2167#endif
2168
2169 if (__kmp_tasking_mode != tskm_immediate_exec) {
2170 // Set primary thread's task team to team's task team. Unless this is hot
2171 // team, it should be NULL.
2172 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==if (!(master_th->th.th_task_team == parent_team->t.t_task_team
[master_th->th.th_task_state])) { __kmp_debug_assert("master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 2173); }
2173 parent_team->t.t_task_team[master_th->th.th_task_state])if (!(master_th->th.th_task_team == parent_team->t.t_task_team
[master_th->th.th_task_state])) { __kmp_debug_assert("master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 2173); }
;
2174 KA_TRACE(20, ("__kmp_fork_call: Primary T#%d pushing task_team %p / team "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: Primary T#%d pushing task_team %p / team "
"%p, new task_team %p / team %p\n", __kmp_gtid_from_thread(master_th
), master_th->th.th_task_team, parent_team, team->t.t_task_team
[master_th->th.th_task_state], team); }
2175 "%p, new task_team %p / team %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: Primary T#%d pushing task_team %p / team "
"%p, new task_team %p / team %p\n", __kmp_gtid_from_thread(master_th
), master_th->th.th_task_team, parent_team, team->t.t_task_team
[master_th->th.th_task_state], team); }
2176 __kmp_gtid_from_thread(master_th),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: Primary T#%d pushing task_team %p / team "
"%p, new task_team %p / team %p\n", __kmp_gtid_from_thread(master_th
), master_th->th.th_task_team, parent_team, team->t.t_task_team
[master_th->th.th_task_state], team); }
2177 master_th->th.th_task_team, parent_team,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: Primary T#%d pushing task_team %p / team "
"%p, new task_team %p / team %p\n", __kmp_gtid_from_thread(master_th
), master_th->th.th_task_team, parent_team, team->t.t_task_team
[master_th->th.th_task_state], team); }
2178 team->t.t_task_team[master_th->th.th_task_state], team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: Primary T#%d pushing task_team %p / team "
"%p, new task_team %p / team %p\n", __kmp_gtid_from_thread(master_th
), master_th->th.th_task_team, parent_team, team->t.t_task_team
[master_th->th.th_task_state], team); }
;
2179
2180 if (active_level || master_th->th.th_task_team) {
2181 // Take a memo of primary thread's task_state
2182 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack)if (!(master_th->th.th_task_state_memo_stack)) { __kmp_debug_assert
("master_th->th.th_task_state_memo_stack", "openmp/runtime/src/kmp_runtime.cpp"
, 2182); }
;
2183 if (master_th->th.th_task_state_top >=
2184 master_th->th.th_task_state_stack_sz) { // increase size
2185 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2186 kmp_uint8 *old_stack, *new_stack;
2187 kmp_uint32 i;
2188 new_stack = (kmp_uint8 *)__kmp_allocate(new_size)___kmp_allocate((new_size), "openmp/runtime/src/kmp_runtime.cpp"
, 2188)
;
2189 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2190 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2191 }
2192 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2193 ++i) { // zero-init rest of stack
2194 new_stack[i] = 0;
2195 }
2196 old_stack = master_th->th.th_task_state_memo_stack;
2197 master_th->th.th_task_state_memo_stack = new_stack;
2198 master_th->th.th_task_state_stack_sz = new_size;
2199 __kmp_free(old_stack)___kmp_free((old_stack), "openmp/runtime/src/kmp_runtime.cpp"
, 2199)
;
2200 }
2201 // Store primary thread's task_state on stack
2202 master_th->th
2203 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2204 master_th->th.th_task_state;
2205 master_th->th.th_task_state_top++;
2206#if KMP_NESTED_HOT_TEAMS1
2207 if (master_th->th.th_hot_teams &&
2208 active_level < __kmp_hot_teams_max_level &&
2209 team == master_th->th.th_hot_teams[active_level].hot_team) {
2210 // Restore primary thread's nested state if nested hot team
2211 master_th->th.th_task_state =
2212 master_th->th
2213 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2214 } else {
2215#endif
2216 master_th->th.th_task_state = 0;
2217#if KMP_NESTED_HOT_TEAMS1
2218 }
2219#endif
2220 }
2221#if !KMP_NESTED_HOT_TEAMS1
2222 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||if (!((master_th->th.th_task_team == __null) || (team == root
->r.r_hot_team))) { __kmp_debug_assert("(master_th->th.th_task_team == __null) || (team == root->r.r_hot_team)"
, "openmp/runtime/src/kmp_runtime.cpp", 2223); }
2223 (team == root->r.r_hot_team))if (!((master_th->th.th_task_team == __null) || (team == root
->r.r_hot_team))) { __kmp_debug_assert("(master_th->th.th_task_team == __null) || (team == root->r.r_hot_team)"
, "openmp/runtime/src/kmp_runtime.cpp", 2223); }
;
2224#endif
2225 }
2226
2227 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n"
, gtid, parent_team->t.t_id, team->t.t_master_tid, team
->t.t_id, team->t.t_nproc); }
2228 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n"
, gtid, parent_team->t.t_id, team->t.t_master_tid, team
->t.t_id, team->t.t_nproc); }
2229 ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n"
, gtid, parent_team->t.t_id, team->t.t_master_tid, team
->t.t_id, team->t.t_nproc); }
2230 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n"
, gtid, parent_team->t.t_id, team->t.t_master_tid, team
->t.t_id, team->t.t_nproc); }
2231 team->t.t_nproc))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n"
, gtid, parent_team->t.t_id, team->t.t_master_tid, team
->t.t_id, team->t.t_nproc); }
;
2232 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||if (!(team != root->r.r_hot_team || (team->t.t_master_tid
== 0 && (team->t.t_parent == root->r.r_root_team
|| team->t.t_parent->t.t_serialized)))) { __kmp_debug_assert
("team != root->r.r_hot_team || (team->t.t_master_tid == 0 && (team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized))"
, "openmp/runtime/src/kmp_runtime.cpp", 2235); }
2233 (team->t.t_master_tid == 0 &&if (!(team != root->r.r_hot_team || (team->t.t_master_tid
== 0 && (team->t.t_parent == root->r.r_root_team
|| team->t.t_parent->t.t_serialized)))) { __kmp_debug_assert
("team != root->r.r_hot_team || (team->t.t_master_tid == 0 && (team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized))"
, "openmp/runtime/src/kmp_runtime.cpp", 2235); }
2234 (team->t.t_parent == root->r.r_root_team ||if (!(team != root->r.r_hot_team || (team->t.t_master_tid
== 0 && (team->t.t_parent == root->r.r_root_team
|| team->t.t_parent->t.t_serialized)))) { __kmp_debug_assert
("team != root->r.r_hot_team || (team->t.t_master_tid == 0 && (team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized))"
, "openmp/runtime/src/kmp_runtime.cpp", 2235); }
2235 team->t.t_parent->t.t_serialized)))if (!(team != root->r.r_hot_team || (team->t.t_master_tid
== 0 && (team->t.t_parent == root->r.r_root_team
|| team->t.t_parent->t.t_serialized)))) { __kmp_debug_assert
("team != root->r.r_hot_team || (team->t.t_master_tid == 0 && (team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized))"
, "openmp/runtime/src/kmp_runtime.cpp", 2235); }
;
2236 KMP_MB();
2237
2238 /* now, setup the arguments */
2239 argv = (void **)team->t.t_argv;
2240 if (ap) {
2241 for (i = argc - 1; i >= 0; --i) {
2242 void *new_argv = va_arg(kmp_va_deref(ap), void *)__builtin_va_arg((*(ap)), void *);
2243 KMP_CHECK_UPDATE(*argv, new_argv)if ((*argv) != (new_argv)) (*argv) = (new_argv);
2244 argv++;
2245 }
2246 } else {
2247 for (i = 0; i < argc; ++i) {
2248 // Get args from parent team for teams construct
2249 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i])if ((argv[i]) != (team->t.t_parent->t.t_argv[i])) (argv
[i]) = (team->t.t_parent->t.t_argv[i])
;
2250 }
2251 }
2252
2253 /* now actually fork the threads */
2254 KMP_CHECK_UPDATE(team->t.t_master_active, master_active)if ((team->t.t_master_active) != (master_active)) (team->
t.t_master_active) = (master_active)
;
2255 if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
2256 root->r.r_active = TRUE(!0);
2257
2258 __kmp_fork_team_threads(root, team, master_th, gtid, !ap);
2259 __kmp_setup_icv_copy(team, nthreads,
2260 &master_th->th.th_current_task->td_icvs, loc);
2261
2262#if OMPT_SUPPORT1
2263 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2264#endif
2265
2266 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2267
2268#if USE_ITT_BUILD1
2269 if (team->t.t_active_level == 1 // only report frames at level 1
2270 && !master_th->th.th_teams_microtask) { // not in teams construct
2271#if USE_ITT_NOTIFY1
2272 if ((__itt_frame_submit_v3_ptr__kmp_itt_frame_submit_v3_ptr__3_0 || KMP_ITT_DEBUG0) &&
2273 (__kmp_forkjoin_frames_mode == 3 ||
2274 __kmp_forkjoin_frames_mode == 1)) {
2275 kmp_uint64 tmp_time = 0;
2276 if (__itt_get_timestamp_ptr__kmp_itt_get_timestamp_ptr__3_0)
2277 tmp_time = __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0();
2278 // Internal fork - report frame begin
2279 master_th->th.th_frame_time = tmp_time;
2280 if (__kmp_forkjoin_frames_mode == 3)
2281 team->t.t_region_time = tmp_time;
2282 } else
2283// only one notification scheme (either "submit" or "forking/joined", not both)
2284#endif /* USE_ITT_NOTIFY */
2285 if ((__itt_frame_begin_v3_ptr__kmp_itt_frame_begin_v3_ptr__3_0 || KMP_ITT_DEBUG0) &&
2286 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2287 // Mark start of "parallel" region for Intel(R) VTune(TM) analyzer.
2288 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2289 }
2290 }
2291#endif /* USE_ITT_BUILD */
2292
2293 /* now go on and do the work */
2294 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team)if (!(team == __kmp_threads[gtid]->th.th_team)) { __kmp_debug_assert
("team == __kmp_threads[gtid]->th.th_team", "openmp/runtime/src/kmp_runtime.cpp"
, 2294); }
;
2295 KMP_MB();
2296 KF_TRACE(10,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n"
, root, team, master_th, gtid); }
2297 ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n"
, root, team, master_th, gtid); }
2298 root, team, master_th, gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n"
, root, team, master_th, gtid); }
;
2299
2300#if USE_ITT_BUILD1
2301 if (__itt_stack_caller_create_ptr__kmp_itt_stack_caller_create_ptr__3_0) {
2302 // create new stack stitching id before entering fork barrier
2303 if (!enter_teams) {
2304 KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL)if (!(team->t.t_stack_id == __null)) { __kmp_debug_assert(
"team->t.t_stack_id == __null", "openmp/runtime/src/kmp_runtime.cpp"
, 2304); }
;
2305 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2306 } else if (parent_team->t.t_serialized) {
2307 // keep stack stitching id in the serialized parent_team;
2308 // current team will be used for parallel inside the teams;
2309 // if parent_team is active, then it already keeps stack stitching id
2310 // for the league of teams
2311 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL)if (!(parent_team->t.t_stack_id == __null)) { __kmp_debug_assert
("parent_team->t.t_stack_id == __null", "openmp/runtime/src/kmp_runtime.cpp"
, 2311); }
;
2312 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
2313 }
2314 }
2315#endif /* USE_ITT_BUILD */
2316
2317 // AC: skip __kmp_internal_fork at teams construct, let only primary
2318 // threads execute
2319 if (ap) {
2320 __kmp_internal_fork(loc, gtid, team);
2321 KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_internal_fork : after : root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, team, master_th, gtid); }
2322 "master_th=%p, gtid=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_internal_fork : after : root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, team, master_th, gtid); }
2323 root, team, master_th, gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_internal_fork : after : root=%p, team=%p, "
"master_th=%p, gtid=%d\n", root, team, master_th, gtid); }
;
2324 }
2325
2326 if (call_context == fork_context_gnu) {
2327 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: parallel exit T#%d\n"
, gtid); }
;
2328 return TRUE(!0);
2329 }
2330
2331 /* Invoke microtask for PRIMARY thread */
2332 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n"
, gtid, team->t.t_id, team->t.t_pkfn); }
2333 team->t.t_id, team->t.t_pkfn))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n"
, gtid, team->t.t_id, team->t.t_pkfn); }
;
2334 } // END of timer KMP_fork_call block
2335
2336#if KMP_STATS_ENABLED0
2337 // If beginning a teams construct, then change thread state
2338 stats_state_e previous_state = KMP_GET_THREAD_STATE()((void)0);
2339 if (!ap) {
2340 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION)((void)0);
2341 }
2342#endif
2343
2344 if (!team->t.t_invoke(gtid)) {
2345 KMP_ASSERT2(0, "cannot invoke microtask for PRIMARY thread")if (!(0)) { __kmp_debug_assert(("cannot invoke microtask for PRIMARY thread"
), "openmp/runtime/src/kmp_runtime.cpp", 2345); }
;
2346 }
2347
2348#if KMP_STATS_ENABLED0
2349 // If was beginning of a teams construct, then reset thread state
2350 if (!ap) {
2351 KMP_SET_THREAD_STATE(previous_state)((void)0);
2352 }
2353#endif
2354
2355 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n"
, gtid, team->t.t_id, team->t.t_pkfn); }
2356 team->t.t_id, team->t.t_pkfn))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n"
, gtid, team->t.t_id, team->t.t_pkfn); }
;
2357 KMP_MB(); /* Flush all pending memory write invalidates. */
2358
2359 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_fork_call: parallel exit T#%d\n"
, gtid); }
;
2360#if OMPT_SUPPORT1
2361 if (ompt_enabled.enabled) {
2362 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2363 }
2364#endif
2365
2366 return TRUE(!0);
2367}
2368
2369#if OMPT_SUPPORT1
2370static inline void __kmp_join_restore_state(kmp_info_t *thread,
2371 kmp_team_t *team) {
2372 // restore state outside the region
2373 thread->th.ompt_thread_info.state =
2374 ((team->t.t_serialized) ? ompt_state_work_serial
2375 : ompt_state_work_parallel);
2376}
2377
2378static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread,
2379 kmp_team_t *team, ompt_data_t *parallel_data,
2380 int flags, void *codeptr) {
2381 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2382 if (ompt_enabled.ompt_callback_parallel_end) {
2383 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)ompt_callback_parallel_end_callback(
2384 parallel_data, &(task_info->task_data), flags, codeptr);
2385 }
2386
2387 task_info->frame.enter_frame = ompt_data_none{0};
2388 __kmp_join_restore_state(thread, team);
2389}
2390#endif
2391
2392void __kmp_join_call(ident_t *loc, int gtid
2393#if OMPT_SUPPORT1
2394 ,
2395 enum fork_context_e fork_context
2396#endif
2397 ,
2398 int exit_teams) {
2399 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call)((void)0);
2400 kmp_team_t *team;
2401 kmp_team_t *parent_team;
2402 kmp_info_t *master_th;
2403 kmp_root_t *root;
2404 int master_active;
2405
2406 KA_TRACE(20, ("__kmp_join_call: enter T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: enter T#%d\n"
, gtid); }
;
2407
2408 /* setup current data */
2409 master_th = __kmp_threads[gtid];
2410 root = master_th->th.th_root;
2411 team = master_th->th.th_team;
2412 parent_team = team->t.t_parent;
2413
2414 master_th->th.th_ident = loc;
2415
2416#if OMPT_SUPPORT1
2417 void *team_microtask = (void *)team->t.t_pkfn;
2418 // For GOMP interface with serialized parallel, need the
2419 // __kmpc_end_serialized_parallel to call hooks for OMPT end-implicit-task
2420 // and end-parallel events.
2421 if (ompt_enabled.enabled &&
2422 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2423 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2424 }
2425#endif
2426
2427#if KMP_DEBUG1
2428 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2429 KA_TRACE(20, ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
"th_task_team = %p\n", __kmp_gtid_from_thread(master_th), team
, team->t.t_task_team[master_th->th.th_task_state], master_th
->th.th_task_team); }
2430 "th_task_team = %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
"th_task_team = %p\n", __kmp_gtid_from_thread(master_th), team
, team->t.t_task_team[master_th->th.th_task_state], master_th
->th.th_task_team); }
2431 __kmp_gtid_from_thread(master_th), team,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
"th_task_team = %p\n", __kmp_gtid_from_thread(master_th), team
, team->t.t_task_team[master_th->th.th_task_state], master_th
->th.th_task_team); }
2432 team->t.t_task_team[master_th->th.th_task_state],if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
"th_task_team = %p\n", __kmp_gtid_from_thread(master_th), team
, team->t.t_task_team[master_th->th.th_task_state], master_th
->th.th_task_team); }
2433 master_th->th.th_task_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
"th_task_team = %p\n", __kmp_gtid_from_thread(master_th), team
, team->t.t_task_team[master_th->th.th_task_state], master_th
->th.th_task_team); }
;
2434 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==if (!(master_th->th.th_task_team == team->t.t_task_team
[master_th->th.th_task_state])) { __kmp_debug_assert("master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 2435); }
2435 team->t.t_task_team[master_th->th.th_task_state])if (!(master_th->th.th_task_team == team->t.t_task_team
[master_th->th.th_task_state])) { __kmp_debug_assert("master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state]"
, "openmp/runtime/src/kmp_runtime.cpp", 2435); }
;
2436 }
2437#endif
2438
2439 if (team->t.t_serialized) {
2440 if (master_th->th.th_teams_microtask) {
2441 // We are in teams construct
2442 int level = team->t.t_level;
2443 int tlevel = master_th->th.th_teams_level;
2444 if (level == tlevel) {
2445 // AC: we haven't incremented it earlier at start of teams construct,
2446 // so do it here - at the end of teams construct
2447 team->t.t_level++;
2448 } else if (level == tlevel + 1) {
2449 // AC: we are exiting parallel inside teams, need to increment
2450 // serialization in order to restore it in the next call to
2451 // __kmpc_end_serialized_parallel
2452 team->t.t_serialized++;
2453 }
2454 }
2455 __kmpc_end_serialized_parallel(loc, gtid);
2456
2457#if OMPT_SUPPORT1
2458 if (ompt_enabled.enabled) {
2459 if (fork_context == fork_context_gnu) {
2460 __ompt_lw_taskteam_unlink(master_th);
2461 }
2462 __kmp_join_restore_state(master_th, parent_team);
2463 }
2464#endif
2465
2466 return;
2467 }
2468
2469 master_active = team->t.t_master_active;
2470
2471 if (!exit_teams) {
2472 // AC: No barrier for internal teams at exit from teams construct.
2473 // But there is barrier for external team (league).
2474 __kmp_internal_join(loc, gtid, team);
2475#if USE_ITT_BUILD1
2476 if (__itt_stack_caller_create_ptr__kmp_itt_stack_caller_create_ptr__3_0) {
2477 KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL)if (!(team->t.t_stack_id != __null)) { __kmp_debug_assert(
"team->t.t_stack_id != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 2477); }
;
2478 // destroy the stack stitching id after join barrier
2479 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2480 team->t.t_stack_id = NULL__null;
2481 }
2482#endif
2483 } else {
2484 master_th->th.th_task_state =
2485 0; // AC: no tasking in teams (out of any parallel)
2486#if USE_ITT_BUILD1
2487 if (__itt_stack_caller_create_ptr__kmp_itt_stack_caller_create_ptr__3_0 && parent_team->t.t_serialized) {
2488 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL)if (!(parent_team->t.t_stack_id != __null)) { __kmp_debug_assert
("parent_team->t.t_stack_id != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 2488); }
;
2489 // destroy the stack stitching id on exit from the teams construct
2490 // if parent_team is active, then the id will be destroyed later on
2491 // by master of the league of teams
2492 __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
2493 parent_team->t.t_stack_id = NULL__null;
2494 }
2495#endif
2496 }
2497
2498 KMP_MB();
2499
2500#if OMPT_SUPPORT1
2501 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2502 void *codeptr = team->t.ompt_team_info.master_return_address;
2503#endif
2504
2505#if USE_ITT_BUILD1
2506 // Mark end of "parallel" region for Intel(R) VTune(TM) analyzer.
2507 if (team->t.t_active_level == 1 &&
2508 (!master_th->th.th_teams_microtask || /* not in teams construct */
2509 master_th->th.th_teams_size.nteams == 1)) {
2510 master_th->th.th_ident = loc;
2511 // only one notification scheme (either "submit" or "forking/joined", not
2512 // both)
2513 if ((__itt_frame_submit_v3_ptr__kmp_itt_frame_submit_v3_ptr__3_0 || KMP_ITT_DEBUG0) &&
2514 __kmp_forkjoin_frames_mode == 3)
2515 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2516 master_th->th.th_frame_time, 0, loc,
2517 master_th->th.th_team_nproc, 1);
2518 else if ((__itt_frame_end_v3_ptr__kmp_itt_frame_end_v3_ptr__3_0 || KMP_ITT_DEBUG0) &&
2519 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2520 __kmp_itt_region_joined(gtid);
2521 } // active_level == 1
2522#endif /* USE_ITT_BUILD */
2523
2524#if KMP_AFFINITY_SUPPORTED1
2525 if (!exit_teams) {
2526 // Restore master thread's partition.
2527 master_th->th.th_first_place = team->t.t_first_place;
2528 master_th->th.th_last_place = team->t.t_last_place;
2529 }
2530#endif // KMP_AFFINITY_SUPPORTED
2531
2532 if (master_th->th.th_teams_microtask && !exit_teams &&
2533 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2534 team->t.t_level == master_th->th.th_teams_level + 1) {
2535// AC: We need to leave the team structure intact at the end of parallel
2536// inside the teams construct, so that at the next parallel same (hot) team
2537// works, only adjust nesting levels
2538#if OMPT_SUPPORT1
2539 ompt_data_t ompt_parallel_data = ompt_data_none{0};
2540 if (ompt_enabled.enabled) {
2541 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2542 if (ompt_enabled.ompt_callback_implicit_task) {
2543 int ompt_team_size = team->t.t_nproc;
2544 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
2545 ompt_scope_end, NULL__null, &(task_info->task_data), ompt_team_size,
2546 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, ompt_task_implicit);
2547 }
2548 task_info->frame.exit_frame = ompt_data_none{0};
2549 task_info->task_data = ompt_data_none{0};
2550 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th)(&(master_th->th.th_team->t.ompt_team_info.parallel_data
))
;
2551 __ompt_lw_taskteam_unlink(master_th);
2552 }
2553#endif
2554 /* Decrement our nested depth level */
2555 team->t.t_level--;
2556 team->t.t_active_level--;
2557 KMP_ATOMIC_DEC(&root->r.r_in_parallel)(&root->r.r_in_parallel)->fetch_sub(1, std::memory_order_acq_rel
)
;
2558
2559 // Restore number of threads in the team if needed. This code relies on
2560 // the proper adjustment of th_teams_size.nth after the fork in
2561 // __kmp_teams_master on each teams primary thread in the case that
2562 // __kmp_reserve_threads reduced it.
2563 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2564 int old_num = master_th->th.th_team_nproc;
2565 int new_num = master_th->th.th_teams_size.nth;
2566 kmp_info_t **other_threads = team->t.t_threads;
2567 team->t.t_nproc = new_num;
2568 for (int i = 0; i < old_num; ++i) {
2569 other_threads[i]->th.th_team_nproc = new_num;
2570 }
2571 // Adjust states of non-used threads of the team
2572 for (int i = old_num; i < new_num; ++i) {
2573 // Re-initialize thread's barrier data.
2574 KMP_DEBUG_ASSERT(other_threads[i])if (!(other_threads[i])) { __kmp_debug_assert("other_threads[i]"
, "openmp/runtime/src/kmp_runtime.cpp", 2574); }
;
2575 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2576 for (int b = 0; b < bs_last_barrier; ++b) {
2577 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2578 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG)if (!(balign[b].bb.wait_flag != 2)) { __kmp_debug_assert("balign[b].bb.wait_flag != 2"
, "openmp/runtime/src/kmp_runtime.cpp", 2578); }
;
2579#if USE_DEBUGGER0
2580 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2581#endif
2582 }
2583 if (__kmp_tasking_mode != tskm_immediate_exec) {
2584 // Synchronize thread's task state
2585 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2586 }
2587 }
2588 }
2589
2590#if OMPT_SUPPORT1
2591 if (ompt_enabled.enabled) {
2592 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2593 OMPT_INVOKER(fork_context)((fork_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
| ompt_parallel_team, codeptr);
2594 }
2595#endif
2596
2597 return;
2598 }
2599
2600 /* do cleanup and restore the parent team */
2601 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2602 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2603
2604 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2605
2606 /* jc: The following lock has instructions with REL and ACQ semantics,
2607 separating the parallel user code called in this parallel region
2608 from the serial user code called after this function returns. */
2609 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2610
2611 if (!master_th->th.th_teams_microtask ||
2612 team->t.t_level > master_th->th.th_teams_level) {
2613 /* Decrement our nested depth level */
2614 KMP_ATOMIC_DEC(&root->r.r_in_parallel)(&root->r.r_in_parallel)->fetch_sub(1, std::memory_order_acq_rel
)
;
2615 }
2616 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0)if (!(root->r.r_in_parallel >= 0)) { __kmp_debug_assert
("root->r.r_in_parallel >= 0", "openmp/runtime/src/kmp_runtime.cpp"
, 2616); }
;
2617
2618#if OMPT_SUPPORT1
2619 if (ompt_enabled.enabled) {
2620 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2621 if (ompt_enabled.ompt_callback_implicit_task) {
2622 int flags = (team_microtask == (void *)__kmp_teams_master)
2623 ? ompt_task_initial
2624 : ompt_task_implicit;
2625 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2626 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
2627 ompt_scope_end, NULL__null, &(task_info->task_data), ompt_team_size,
2628 OMPT_CUR_TASK_INFO(master_th)(&(master_th->th.th_current_task->ompt_task_info))->thread_num, flags);
2629 }
2630 task_info->frame.exit_frame = ompt_data_none{0};
2631 task_info->task_data = ompt_data_none{0};
2632 }
2633#endif
2634
2635 KF_TRACE(10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n"
, 0, master_th, team); }
2636 master_th, team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n"
, 0, master_th, team); }
;
2637 __kmp_pop_current_task_from_thread(master_th);
2638
2639 master_th->th.th_def_allocator = team->t.t_def_allocator;
2640
2641#if OMPD_SUPPORT1
2642 if (ompd_state & OMPD_ENABLE_BP0x1)
2643 ompd_bp_parallel_end();
2644#endif
2645 updateHWFPControl(team);
2646
2647 if (root->r.r_active != master_active)
2648 root->r.r_active = master_active;
2649
2650 __kmp_free_team(root, team USE_NESTED_HOT_ARG(, master_th
2651 master_th), master_th); // this will free worker threads
2652
2653 /* this race was fun to find. make sure the following is in the critical
2654 region otherwise assertions may fail occasionally since the old team may be
2655 reallocated and the hierarchy appears inconsistent. it is actually safe to
2656 run and won't cause any bugs, but will cause those assertion failures. it's
2657 only one deref&assign so might as well put this in the critical region */
2658 master_th->th.th_team = parent_team;
2659 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2660 master_th->th.th_team_master = parent_team->t.t_threads[0];
2661 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2662
2663 /* restore serialized team, if need be */
2664 if (parent_team->t.t_serialized &&
2665 parent_team != master_th->th.th_serial_team &&
2666 parent_team != root->r.r_root_team) {
2667 __kmp_free_team(root,
2668 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL), __null);
2669 master_th->th.th_serial_team = parent_team;
2670 }
2671
2672 if (__kmp_tasking_mode != tskm_immediate_exec) {
2673 if (master_th->th.th_task_state_top >
2674 0) { // Restore task state from memo stack
2675 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack)if (!(master_th->th.th_task_state_memo_stack)) { __kmp_debug_assert
("master_th->th.th_task_state_memo_stack", "openmp/runtime/src/kmp_runtime.cpp"
, 2675); }
;
2676 // Remember primary thread's state if we re-use this nested hot team
2677 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2678 master_th->th.th_task_state;
2679 --master_th->th.th_task_state_top; // pop
2680 // Now restore state at this level
2681 master_th->th.th_task_state =
2682 master_th->th
2683 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2684 } else if (team != root->r.r_hot_team) {
2685 // Reset the task state of primary thread if we are not hot team because
2686 // in this case all the worker threads will be free, and their task state
2687 // will be reset. If not reset the primary's, the task state will be
2688 // inconsistent.
2689 master_th->th.th_task_state = 0;
2690 }
2691 // Copy the task team from the parent team to the primary thread
2692 master_th->th.th_task_team =
2693 parent_team->t.t_task_team[master_th->th.th_task_state];
2694 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n"
, __kmp_gtid_from_thread(master_th), master_th->th.th_task_team
, parent_team); }
2695 ("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n"
, __kmp_gtid_from_thread(master_th), master_th->th.th_task_team
, parent_team); }
2696 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n"
, __kmp_gtid_from_thread(master_th), master_th->th.th_task_team
, parent_team); }
2697 parent_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n"
, __kmp_gtid_from_thread(master_th), master_th->th.th_task_team
, parent_team); }
;
2698 }
2699
2700 // TODO: GEH - cannot do this assertion because root thread not set up as
2701 // executing
2702 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 );
2703 master_th->th.th_current_task->td_flags.executing = 1;
2704
2705 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2706
2707#if KMP_AFFINITY_SUPPORTED1
2708 if (master_th->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
2709 __kmp_reset_root_init_mask(gtid);
2710 }
2711#endif
2712#if OMPT_SUPPORT1
2713 int flags =
2714 OMPT_INVOKER(fork_context)((fork_context == fork_context_gnu) ? ompt_parallel_invoker_program
: ompt_parallel_invoker_runtime)
|
2715 ((team_microtask == (void *)__kmp_teams_master) ? ompt_parallel_league
2716 : ompt_parallel_team);
2717 if (ompt_enabled.enabled) {
2718 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2719 codeptr);
2720 }
2721#endif
2722
2723 KMP_MB();
2724 KA_TRACE(20, ("__kmp_join_call: exit T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_call: exit T#%d\n"
, gtid); }
;
2725}
2726
2727/* Check whether we should push an internal control record onto the
2728 serial team stack. If so, do it. */
2729void __kmp_save_internal_controls(kmp_info_t *thread) {
2730
2731 if (thread->th.th_team != thread->th.th_serial_team) {
2732 return;
2733 }
2734 if (thread->th.th_team->t.t_serialized > 1) {
2735 int push = 0;
2736
2737 if (thread->th.th_team->t.t_control_stack_top == NULL__null) {
2738 push = 1;
2739 } else {
2740 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2741 thread->th.th_team->t.t_serialized) {
2742 push = 1;
2743 }
2744 }
2745 if (push) { /* push a record on the serial team's stack */
2746 kmp_internal_control_t *control =
2747 (kmp_internal_control_t *)__kmp_allocate(___kmp_allocate((sizeof(kmp_internal_control_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 2748)
2748 sizeof(kmp_internal_control_t))___kmp_allocate((sizeof(kmp_internal_control_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 2748)
;
2749
2750 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2751
2752 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2753
2754 control->next = thread->th.th_team->t.t_control_stack_top;
2755 thread->th.th_team->t.t_control_stack_top = control;
2756 }
2757 }
2758}
2759
2760/* Changes set_nproc */
2761void __kmp_set_num_threads(int new_nth, int gtid) {
2762 kmp_info_t *thread;
2763 kmp_root_t *root;
2764
2765 KF_TRACE(10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_num_threads: new __kmp_nth = %d\n"
, new_nth); }
;
2766 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 2766); }
;
2767
2768 if (new_nth < 1)
2769 new_nth = 1;
2770 else if (new_nth > __kmp_max_nth)
2771 new_nth = __kmp_max_nth;
2772
2773 KMP_COUNT_VALUE(OMP_set_numthreads, new_nth)((void)0);
2774 thread = __kmp_threads[gtid];
2775 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2776 return; // nothing to do
2777
2778 __kmp_save_internal_controls(thread);
2779
2780 set__nproc(thread, new_nth)(((thread)->th.th_current_task->td_icvs.nproc) = (new_nth
))
;
2781
2782 // If this omp_set_num_threads() call will cause the hot team size to be
2783 // reduced (in the absence of a num_threads clause), then reduce it now,
2784 // rather than waiting for the next parallel region.
2785 root = thread->th.th_root;
2786 if (__kmp_init_parallel && (!root->r.r_active) &&
2787 (root->r.r_hot_team->t.t_nproc > new_nth)
2788#if KMP_NESTED_HOT_TEAMS1
2789 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2790#endif
2791 ) {
2792 kmp_team_t *hot_team = root->r.r_hot_team;
2793 int f;
2794
2795 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2796
2797 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2798 __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
2799 }
2800 // Release the extra threads we don't need any more.
2801 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2802 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL)if (!(hot_team->t.t_threads[f] != __null)) { __kmp_debug_assert
("hot_team->t.t_threads[f] != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 2802); }
;
2803 if (__kmp_tasking_mode != tskm_immediate_exec) {
2804 // When decreasing team size, threads no longer in the team should unref
2805 // task team.
2806 hot_team->t.t_threads[f]->th.th_task_team = NULL__null;
2807 }
2808 __kmp_free_thread(hot_team->t.t_threads[f]);
2809 hot_team->t.t_threads[f] = NULL__null;
2810 }
2811 hot_team->t.t_nproc = new_nth;
2812#if KMP_NESTED_HOT_TEAMS1
2813 if (thread->th.th_hot_teams) {
2814 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team)if (!(hot_team == thread->th.th_hot_teams[0].hot_team)) { __kmp_debug_assert
("hot_team == thread->th.th_hot_teams[0].hot_team", "openmp/runtime/src/kmp_runtime.cpp"
, 2814); }
;
2815 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2816 }
2817#endif
2818
2819 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2820 hot_team->t.b->update_num_threads(new_nth);
2821 __kmp_add_threads_to_team(hot_team, new_nth);
2822 }
2823
2824 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2825
2826 // Update the t_nproc field in the threads that are still active.
2827 for (f = 0; f < new_nth; f++) {
2828 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL)if (!(hot_team->t.t_threads[f] != __null)) { __kmp_debug_assert
("hot_team->t.t_threads[f] != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 2828); }
;
2829 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2830 }
2831 // Special flag in case omp_set_num_threads() call
2832 hot_team->t.t_size_changed = -1;
2833 }
2834}
2835
2836/* Changes max_active_levels */
2837void __kmp_set_max_active_levels(int gtid, int max_active_levels) {
2838 kmp_info_t *thread;
2839
2840 KF_TRACE(10, ("__kmp_set_max_active_levels: new max_active_levels for thread "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: new max_active_levels for thread "
"%d = (%d)\n", gtid, max_active_levels); }
2841 "%d = (%d)\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: new max_active_levels for thread "
"%d = (%d)\n", gtid, max_active_levels); }
2842 gtid, max_active_levels))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: new max_active_levels for thread "
"%d = (%d)\n", gtid, max_active_levels); }
;
2843 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 2843); }
;
2844
2845 // validate max_active_levels
2846 if (max_active_levels < 0) {
2847 KMP_WARNING(ActiveLevelsNegative, max_active_levels)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_ActiveLevelsNegative
, max_active_levels), __kmp_msg_null)
;
2848 // We ignore this call if the user has specified a negative value.
2849 // The current setting won't be changed. The last valid setting will be
2850 // used. A warning will be issued (if warnings are allowed as controlled by
2851 // the KMP_WARNINGS env var).
2852 KF_TRACE(10, ("__kmp_set_max_active_levels: the call is ignored: new "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: the call is ignored: new "
"max_active_levels for thread %d = (%d)\n", gtid, max_active_levels
); }
2853 "max_active_levels for thread %d = (%d)\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: the call is ignored: new "
"max_active_levels for thread %d = (%d)\n", gtid, max_active_levels
); }
2854 gtid, max_active_levels))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: the call is ignored: new "
"max_active_levels for thread %d = (%d)\n", gtid, max_active_levels
); }
;
2855 return;
2856 }
2857 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT2147483647) {
2858 // it's OK, the max_active_levels is within the valid range: [ 0;
2859 // KMP_MAX_ACTIVE_LEVELS_LIMIT ]
2860 // We allow a zero value. (implementation defined behavior)
2861 } else {
2862 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_ActiveLevelsExceedLimit
, max_active_levels, 2147483647), __kmp_msg_null)
2863 KMP_MAX_ACTIVE_LEVELS_LIMIT)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_ActiveLevelsExceedLimit
, max_active_levels, 2147483647), __kmp_msg_null)
;
2864 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT2147483647;
2865 // Current upper limit is MAX_INT. (implementation defined behavior)
2866 // If the input exceeds the upper limit, we correct the input to be the
2867 // upper limit. (implementation defined behavior)
2868 // Actually, the flow should never get here until we use MAX_INT limit.
2869 }
2870 KF_TRACE(10, ("__kmp_set_max_active_levels: after validation: new "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: after validation: new "
"max_active_levels for thread %d = (%d)\n", gtid, max_active_levels
); }
2871 "max_active_levels for thread %d = (%d)\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: after validation: new "
"max_active_levels for thread %d = (%d)\n", gtid, max_active_levels
); }
2872 gtid, max_active_levels))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_max_active_levels: after validation: new "
"max_active_levels for thread %d = (%d)\n", gtid, max_active_levels
); }
;
2873
2874 thread = __kmp_threads[gtid];
2875
2876 __kmp_save_internal_controls(thread);
2877
2878 set__max_active_levels(thread, max_active_levels)(((thread)->th.th_current_task->td_icvs.max_active_levels
) = (max_active_levels))
;
2879}
2880
2881/* Gets max_active_levels */
2882int __kmp_get_max_active_levels(int gtid) {
2883 kmp_info_t *thread;
2884
2885 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d\n", gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_max_active_levels: thread %d\n"
, gtid); }
;
2886 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 2886); }
;
2887
2888 thread = __kmp_threads[gtid];
2889 KMP_DEBUG_ASSERT(thread->th.th_current_task)if (!(thread->th.th_current_task)) { __kmp_debug_assert("thread->th.th_current_task"
, "openmp/runtime/src/kmp_runtime.cpp", 2889); }
;
2890 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d, curtask=%p, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_max_active_levels: thread %d, curtask=%p, "
"curtask_maxaclevel=%d\n", gtid, thread->th.th_current_task
, thread->th.th_current_task->td_icvs.max_active_levels
); }
2891 "curtask_maxaclevel=%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_max_active_levels: thread %d, curtask=%p, "
"curtask_maxaclevel=%d\n", gtid, thread->th.th_current_task
, thread->th.th_current_task->td_icvs.max_active_levels
); }
2892 gtid, thread->th.th_current_task,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_max_active_levels: thread %d, curtask=%p, "
"curtask_maxaclevel=%d\n", gtid, thread->th.th_current_task
, thread->th.th_current_task->td_icvs.max_active_levels
); }
2893 thread->th.th_current_task->td_icvs.max_active_levels))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_max_active_levels: thread %d, curtask=%p, "
"curtask_maxaclevel=%d\n", gtid, thread->th.th_current_task
, thread->th.th_current_task->td_icvs.max_active_levels
); }
;
2894 return thread->th.th_current_task->td_icvs.max_active_levels;
2895}
2896
2897// nteams-var per-device ICV
2898void __kmp_set_num_teams(int num_teams) {
2899 if (num_teams > 0)
2900 __kmp_nteams = num_teams;
2901}
2902int __kmp_get_max_teams(void) { return __kmp_nteams; }
2903// teams-thread-limit-var per-device ICV
2904void __kmp_set_teams_thread_limit(int limit) {
2905 if (limit > 0)
2906 __kmp_teams_thread_limit = limit;
2907}
2908int __kmp_get_teams_thread_limit(void) { return __kmp_teams_thread_limit; }
2909
2910KMP_BUILD_ASSERT(sizeof(kmp_sched_t) == sizeof(int))static_assert(sizeof(kmp_sched_t) == sizeof(int), "Build condition error"
)
;
2911KMP_BUILD_ASSERT(sizeof(enum sched_type) == sizeof(int))static_assert(sizeof(enum sched_type) == sizeof(int), "Build condition error"
)
;
2912
2913/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */
2914void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) {
2915 kmp_info_t *thread;
2916 kmp_sched_t orig_kind;
2917 // kmp_team_t *team;
2918
2919 KF_TRACE(10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n"
, gtid, (int)kind, chunk); }
2920 gtid, (int)kind, chunk))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n"
, gtid, (int)kind, chunk); }
;
2921 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 2921); }
;
2922
2923 // Check if the kind parameter is valid, correct if needed.
2924 // Valid parameters should fit in one of two intervals - standard or extended:
2925 // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper>
2926 // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103
2927 orig_kind = kind;
2928 kind = __kmp_sched_without_mods(kind);
2929
2930 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2931 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2932 // TODO: Hint needs attention in case we change the default schedule.
2933 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind)__kmp_msg_format(kmp_i18n_msg_ScheduleKindOutOfRange, kind),
2934 KMP_HNT(DefaultScheduleKindUsed, "static, no chunk")__kmp_msg_format(kmp_i18n_hnt_DefaultScheduleKindUsed, "static, no chunk"
)
,
2935 __kmp_msg_null);
2936 kind = kmp_sched_default;
2937 chunk = 0; // ignore chunk value in case of bad kind
2938 }
2939
2940 thread = __kmp_threads[gtid];
2941
2942 __kmp_save_internal_controls(thread);
2943
2944 if (kind < kmp_sched_upper_std) {
2945 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK1) {
2946 // differ static chunked vs. unchunked: chunk should be invalid to
2947 // indicate unchunked schedule (which is the default)
2948 thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static;
2949 } else {
2950 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2951 __kmp_sch_map[kind - kmp_sched_lower - 1];
2952 }
2953 } else {
2954 // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2955 // kmp_sched_lower - 2 ];
2956 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2957 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2958 kmp_sched_lower - 2];
2959 }
2960 __kmp_sched_apply_mods_intkind(
2961 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2962 if (kind == kmp_sched_auto || chunk < 1) {
2963 // ignore parameter chunk for schedule auto
2964 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK1;
2965 } else {
2966 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2967 }
2968}
2969
2970/* Gets def_sched_var ICV values */
2971void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk) {
2972 kmp_info_t *thread;
2973 enum sched_type th_type;
2974
2975 KF_TRACE(10, ("__kmp_get_schedule: thread %d\n", gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_schedule: thread %d\n"
, gtid); }
;
2976 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 2976); }
;
2977
2978 thread = __kmp_threads[gtid];
2979
2980 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2981 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)(enum sched_type)( (th_type) & ~(kmp_sch_modifier_nonmonotonic
| kmp_sch_modifier_monotonic))
) {
2982 case kmp_sch_static:
2983 case kmp_sch_static_greedy:
2984 case kmp_sch_static_balanced:
2985 *kind = kmp_sched_static;
2986 __kmp_sched_apply_mods_stdkind(kind, th_type);
2987 *chunk = 0; // chunk was not set, try to show this fact via zero value
2988 return;
2989 case kmp_sch_static_chunked:
2990 *kind = kmp_sched_static;
2991 break;
2992 case kmp_sch_dynamic_chunked:
2993 *kind = kmp_sched_dynamic;
2994 break;
2995 case kmp_sch_guided_chunked:
2996 case kmp_sch_guided_iterative_chunked:
2997 case kmp_sch_guided_analytical_chunked:
2998 *kind = kmp_sched_guided;
2999 break;
3000 case kmp_sch_auto:
3001 *kind = kmp_sched_auto;
3002 break;
3003 case kmp_sch_trapezoidal:
3004 *kind = kmp_sched_trapezoidal;
3005 break;
3006#if KMP_STATIC_STEAL_ENABLED1
3007 case kmp_sch_static_steal:
3008 *kind = kmp_sched_static_steal;
3009 break;
3010#endif
3011 default:
3012 KMP_FATAL(UnknownSchedulingType, th_type)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_UnknownSchedulingType
, th_type), __kmp_msg_null)
;
3013 }
3014
3015 __kmp_sched_apply_mods_stdkind(kind, th_type);
3016 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
3017}
3018
3019int __kmp_get_ancestor_thread_num(int gtid, int level) {
3020
3021 int ii, dd;
3022 kmp_team_t *team;
3023 kmp_info_t *thr;
3024
3025 KF_TRACE(10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_ancestor_thread_num: thread %d %d\n"
, gtid, level); }
;
3026 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 3026); }
;
3027
3028 // validate level
3029 if (level == 0)
3030 return 0;
3031 if (level < 0)
3032 return -1;
3033 thr = __kmp_threads[gtid];
3034 team = thr->th.th_team;
3035 ii = team->t.t_level;
3036 if (level > ii)
3037 return -1;
3038
3039 if (thr->th.th_teams_microtask) {
3040 // AC: we are in teams region where multiple nested teams have same level
3041 int tlevel = thr->th.th_teams_level; // the level of the teams construct
3042 if (level <=
3043 tlevel) { // otherwise usual algorithm works (will not touch the teams)
3044 KMP_DEBUG_ASSERT(ii >= tlevel)if (!(ii >= tlevel)) { __kmp_debug_assert("ii >= tlevel"
, "openmp/runtime/src/kmp_runtime.cpp", 3044); }
;
3045 // AC: As we need to pass by the teams league, we need to artificially
3046 // increase ii
3047 if (ii == tlevel) {
3048 ii += 2; // three teams have same level
3049 } else {
3050 ii++; // two teams have same level
3051 }
3052 }
3053 }
3054
3055 if (ii == level)
3056 return __kmp_tid_from_gtid(gtid);
3057
3058 dd = team->t.t_serialized;
3059 level++;
3060 while (ii > level) {
3061 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3062 }
3063 if ((team->t.t_serialized) && (!dd)) {
3064 team = team->t.t_parent;
3065 continue;
3066 }
3067 if (ii > level) {
3068 team = team->t.t_parent;
3069 dd = team->t.t_serialized;
3070 ii--;
3071 }
3072 }
3073
3074 return (dd > 1) ? (0) : (team->t.t_master_tid);
3075}
3076
3077int __kmp_get_team_size(int gtid, int level) {
3078
3079 int ii, dd;
3080 kmp_team_t *team;
3081 kmp_info_t *thr;
3082
3083 KF_TRACE(10, ("__kmp_get_team_size: thread %d %d\n", gtid, level))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_get_team_size: thread %d %d\n"
, gtid, level); }
;
3084 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_runtime.cpp", 3084); }
;
3085
3086 // validate level
3087 if (level == 0)
3088 return 1;
3089 if (level < 0)
3090 return -1;
3091 thr = __kmp_threads[gtid];
3092 team = thr->th.th_team;
3093 ii = team->t.t_level;
3094 if (level > ii)
3095 return -1;
3096
3097 if (thr->th.th_teams_microtask) {
3098 // AC: we are in teams region where multiple nested teams have same level
3099 int tlevel = thr->th.th_teams_level; // the level of the teams construct
3100 if (level <=
3101 tlevel) { // otherwise usual algorithm works (will not touch the teams)
3102 KMP_DEBUG_ASSERT(ii >= tlevel)if (!(ii >= tlevel)) { __kmp_debug_assert("ii >= tlevel"
, "openmp/runtime/src/kmp_runtime.cpp", 3102); }
;
3103 // AC: As we need to pass by the teams league, we need to artificially
3104 // increase ii
3105 if (ii == tlevel) {
3106 ii += 2; // three teams have same level
3107 } else {
3108 ii++; // two teams have same level
3109 }
3110 }
3111 }
3112
3113 while (ii > level) {
3114 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3115 }
3116 if (team->t.t_serialized && (!dd)) {
3117 team = team->t.t_parent;
3118 continue;
3119 }
3120 if (ii > level) {
3121 team = team->t.t_parent;
3122 ii--;
3123 }
3124 }
3125
3126 return team->t.t_nproc;
3127}
3128
3129kmp_r_sched_t __kmp_get_schedule_global() {
3130 // This routine created because pairs (__kmp_sched, __kmp_chunk) and
3131 // (__kmp_static, __kmp_guided) may be changed by kmp_set_defaults
3132 // independently. So one can get the updated schedule here.
3133
3134 kmp_r_sched_t r_sched;
3135
3136 // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static,
3137 // __kmp_guided. __kmp_sched should keep original value, so that user can set
3138 // KMP_SCHEDULE multiple times, and thus have different run-time schedules in
3139 // different roots (even in OMP 2.5)
3140 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched)(enum sched_type)( (__kmp_sched) & ~(kmp_sch_modifier_nonmonotonic
| kmp_sch_modifier_monotonic))
;
3141 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched)((enum sched_type)( (__kmp_sched) & (kmp_sch_modifier_nonmonotonic
| kmp_sch_modifier_monotonic)))
;
3142 if (s == kmp_sch_static) {
3143 // replace STATIC with more detailed schedule (balanced or greedy)
3144 r_sched.r_sched_type = __kmp_static;
3145 } else if (s == kmp_sch_guided_chunked) {
3146 // replace GUIDED with more detailed schedule (iterative or analytical)
3147 r_sched.r_sched_type = __kmp_guided;
3148 } else { // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other
3149 r_sched.r_sched_type = __kmp_sched;
3150 }
3151 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers)(r_sched.r_sched_type = (enum sched_type)((kmp_int32)r_sched.
r_sched_type | (kmp_int32)sched_modifiers))
;
3152
3153 if (__kmp_chunk < KMP_DEFAULT_CHUNK1) {
3154 // __kmp_chunk may be wrong here (if it was not ever set)
3155 r_sched.chunk = KMP_DEFAULT_CHUNK1;
3156 } else {
3157 r_sched.chunk = __kmp_chunk;
3158 }
3159
3160 return r_sched;
3161}
3162
3163/* Allocate (realloc == FALSE) * or reallocate (realloc == TRUE)
3164 at least argc number of *t_argv entries for the requested team. */
3165static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, int realloc) {
3166
3167 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 3167); }
;
3168 if (!realloc || argc > team->t.t_max_argc) {
3169
3170 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: needed entries=%d, "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: needed entries=%d, "
"current entries=%d\n", team->t.t_id, argc, (realloc) ? team
->t.t_max_argc : 0); }
3171 "current entries=%d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: needed entries=%d, "
"current entries=%d\n", team->t.t_id, argc, (realloc) ? team
->t.t_max_argc : 0); }
3172 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: needed entries=%d, "
"current entries=%d\n", team->t.t_id, argc, (realloc) ? team
->t.t_max_argc : 0); }
;
3173 /* if previously allocated heap space for args, free them */
3174 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3175 __kmp_free((void *)team->t.t_argv)___kmp_free(((void *)team->t.t_argv), "openmp/runtime/src/kmp_runtime.cpp"
, 3175)
;
3176
3177 if (argc <= KMP_INLINE_ARGV_ENTRIES(int)((4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 *
sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) %
64)) / (sizeof(void *)))
) {
3178 /* use unused space in the cache line for arguments */
3179 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES(int)((4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 *
sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) %
64)) / (sizeof(void *)))
;
3180 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: inline allocate %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: inline allocate %d "
"argv entries\n", team->t.t_id, team->t.t_max_argc); }
3181 "argv entries\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: inline allocate %d "
"argv entries\n", team->t.t_id, team->t.t_max_argc); }
3182 team->t.t_id, team->t.t_max_argc))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: inline allocate %d "
"argv entries\n", team->t.t_id, team->t.t_max_argc); }
;
3183 team->t.t_argv = &team->t.t_inline_argv[0];
3184 if (__kmp_storage_map) {
3185 __kmp_print_storage_map_gtid(
3186 -1, &team->t.t_inline_argv[0],
3187 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES(int)((4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 *
sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) %
64)) / (sizeof(void *)))
],
3188 (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES(int)((4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 *
sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) %
64)) / (sizeof(void *)))
), "team_%d.t_inline_argv",
3189 team->t.t_id);
3190 }
3191 } else {
3192 /* allocate space for arguments in the heap */
3193 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES100 >> 1))
3194 ? KMP_MIN_MALLOC_ARGV_ENTRIES100
3195 : 2 * argc;
3196 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
"argv entries\n", team->t.t_id, team->t.t_max_argc); }
3197 "argv entries\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
"argv entries\n", team->t.t_id, team->t.t_max_argc); }
3198 team->t.t_id, team->t.t_max_argc))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
"argv entries\n", team->t.t_id, team->t.t_max_argc); }
;
3199 team->t.t_argv =
3200 (void **)__kmp_page_allocate(sizeof(void *) * team->t.t_max_argc)___kmp_page_allocate((sizeof(void *) * team->t.t_max_argc)
, "openmp/runtime/src/kmp_runtime.cpp", 3200)
;
3201 if (__kmp_storage_map) {
3202 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3203 &team->t.t_argv[team->t.t_max_argc],
3204 sizeof(void *) * team->t.t_max_argc,
3205 "team_%d.t_argv", team->t.t_id);
3206 }
3207 }
3208 }
3209}
3210
3211static void __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) {
3212 int i;
3213 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3214 team->t.t_threads =
3215 (kmp_info_t **)__kmp_allocate(sizeof(kmp_info_t *) * max_nth)___kmp_allocate((sizeof(kmp_info_t *) * max_nth), "openmp/runtime/src/kmp_runtime.cpp"
, 3215)
;
3216 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(___kmp_allocate((sizeof(dispatch_shared_info_t) * num_disp_buff
), "openmp/runtime/src/kmp_runtime.cpp", 3217)
3217 sizeof(dispatch_shared_info_t) * num_disp_buff)___kmp_allocate((sizeof(dispatch_shared_info_t) * num_disp_buff
), "openmp/runtime/src/kmp_runtime.cpp", 3217)
;
3218 team->t.t_dispatch =
3219 (kmp_disp_t *)__kmp_allocate(sizeof(kmp_disp_t) * max_nth)___kmp_allocate((sizeof(kmp_disp_t) * max_nth), "openmp/runtime/src/kmp_runtime.cpp"
, 3219)
;
3220 team->t.t_implicit_task_taskdata =
3221 (kmp_taskdata_t *)__kmp_allocate(sizeof(kmp_taskdata_t) * max_nth)___kmp_allocate((sizeof(kmp_taskdata_t) * max_nth), "openmp/runtime/src/kmp_runtime.cpp"
, 3221)
;
3222 team->t.t_max_nproc = max_nth;
3223
3224 /* setup dispatch buffers */
3225 for (i = 0; i < num_disp_buff; ++i) {
3226 team->t.t_disp_buffer[i].buffer_index = i;
3227 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3228 }
3229}
3230
3231static void __kmp_free_team_arrays(kmp_team_t *team) {
3232 /* Note: this does not free the threads in t_threads (__kmp_free_threads) */
3233 int i;
3234 for (i = 0; i < team->t.t_max_nproc; ++i) {
3235 if (team->t.t_dispatch[i].th_disp_buffer != NULL__null) {
3236 __kmp_free(team->t.t_dispatch[i].th_disp_buffer)___kmp_free((team->t.t_dispatch[i].th_disp_buffer), "openmp/runtime/src/kmp_runtime.cpp"
, 3236)
;
3237 team->t.t_dispatch[i].th_disp_buffer = NULL__null;
3238 }
3239 }
3240#if KMP_USE_HIER_SCHED0
3241 __kmp_dispatch_free_hierarchies(team);
3242#endif
3243 __kmp_free(team->t.t_threads)___kmp_free((team->t.t_threads), "openmp/runtime/src/kmp_runtime.cpp"
, 3243)
;
3244 __kmp_free(team->t.t_disp_buffer)___kmp_free((team->t.t_disp_buffer), "openmp/runtime/src/kmp_runtime.cpp"
, 3244)
;
3245 __kmp_free(team->t.t_dispatch)___kmp_free((team->t.t_dispatch), "openmp/runtime/src/kmp_runtime.cpp"
, 3245)
;
3246 __kmp_free(team->t.t_implicit_task_taskdata)___kmp_free((team->t.t_implicit_task_taskdata), "openmp/runtime/src/kmp_runtime.cpp"
, 3246)
;
3247 team->t.t_threads = NULL__null;
3248 team->t.t_disp_buffer = NULL__null;
3249 team->t.t_dispatch = NULL__null;
3250 team->t.t_implicit_task_taskdata = 0;
3251}
3252
3253static void __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) {
3254 kmp_info_t **oldThreads = team->t.t_threads;
3255
3256 __kmp_free(team->t.t_disp_buffer)___kmp_free((team->t.t_disp_buffer), "openmp/runtime/src/kmp_runtime.cpp"
, 3256)
;
3257 __kmp_free(team->t.t_dispatch)___kmp_free((team->t.t_dispatch), "openmp/runtime/src/kmp_runtime.cpp"
, 3257)
;
3258 __kmp_free(team->t.t_implicit_task_taskdata)___kmp_free((team->t.t_implicit_task_taskdata), "openmp/runtime/src/kmp_runtime.cpp"
, 3258)
;
3259 __kmp_allocate_team_arrays(team, max_nth);
3260
3261 KMP_MEMCPYmemcpy(team->t.t_threads, oldThreads,
3262 team->t.t_nproc * sizeof(kmp_info_t *));
3263
3264 __kmp_free(oldThreads)___kmp_free((oldThreads), "openmp/runtime/src/kmp_runtime.cpp"
, 3264)
;
3265}
3266
3267static kmp_internal_control_t __kmp_get_global_icvs(void) {
3268
3269 kmp_r_sched_t r_sched =
3270 __kmp_get_schedule_global(); // get current state of scheduling globals
3271
3272 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0)if (!(__kmp_nested_proc_bind.used > 0)) { __kmp_debug_assert
("__kmp_nested_proc_bind.used > 0", "openmp/runtime/src/kmp_runtime.cpp"
, 3272); }
;
3273
3274 kmp_internal_control_t g_icvs = {
3275 0, // int serial_nesting_level; //corresponds to value of th_team_serialized
3276 (kmp_int8)__kmp_global.g.g_dynamic, // internal control for dynamic
3277 // adjustment of threads (per thread)
3278 (kmp_int8)__kmp_env_blocktime, // int bt_set; //internal control for
3279 // whether blocktime is explicitly set
3280 __kmp_dflt_blocktime, // int blocktime; //internal control for blocktime
3281#if KMP_USE_MONITOR
3282 __kmp_bt_intervals, // int bt_intervals; //internal control for blocktime
3283// intervals
3284#endif
3285 __kmp_dflt_team_nth, // int nproc; //internal control for # of threads for
3286 // next parallel region (per thread)
3287 // (use a max ub on value if __kmp_parallel_initialize not called yet)
3288 __kmp_cg_max_nth, // int thread_limit;
3289 __kmp_dflt_max_active_levels, // int max_active_levels; //internal control
3290 // for max_active_levels
3291 r_sched, // kmp_r_sched_t sched; //internal control for runtime schedule
3292 // {sched,chunk} pair
3293 __kmp_nested_proc_bind.bind_types[0],
3294 __kmp_default_device,
3295 NULL__null // struct kmp_internal_control *next;
3296 };
3297
3298 return g_icvs;
3299}
3300
3301static kmp_internal_control_t __kmp_get_x_global_icvs(const kmp_team_t *team) {
3302
3303 kmp_internal_control_t gx_icvs;
3304 gx_icvs.serial_nesting_level =
3305 0; // probably =team->t.t_serial like in save_inter_controls
3306 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3307 gx_icvs.next = NULL__null;
3308
3309 return gx_icvs;
3310}
3311
3312static void __kmp_initialize_root(kmp_root_t *root) {
3313 int f;
3314 kmp_team_t *root_team;
3315 kmp_team_t *hot_team;
3316 int hot_team_max_nth;
3317 kmp_r_sched_t r_sched =
3318 __kmp_get_schedule_global(); // get current state of scheduling globals
3319 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3320 KMP_DEBUG_ASSERT(root)if (!(root)) { __kmp_debug_assert("root", "openmp/runtime/src/kmp_runtime.cpp"
, 3320); }
;
3321 KMP_ASSERT(!root->r.r_begin)if (!(!root->r.r_begin)) { __kmp_debug_assert("!root->r.r_begin"
, "openmp/runtime/src/kmp_runtime.cpp", 3321); }
;
3322
3323 /* setup the root state structure */
3324 __kmp_init_lock(&root->r.r_begin_lock);
3325 root->r.r_begin = FALSE0;
3326 root->r.r_active = FALSE0;
3327 root->r.r_in_parallel = 0;
3328 root->r.r_blocktime = __kmp_dflt_blocktime;
3329#if KMP_AFFINITY_SUPPORTED1
3330 root->r.r_affinity_assigned = FALSE0;
3331#endif
3332
3333 /* setup the root team for this task */
3334 /* allocate the root team structure */
3335 KF_TRACE(10, ("__kmp_initialize_root: before root_team\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_root: before root_team\n"
); }
;
3336
3337 root_team =
3338 __kmp_allocate_team(root,
3339 1, // new_nproc
3340 1, // max_nproc
3341#if OMPT_SUPPORT1
3342 ompt_data_none{0}, // root parallel id
3343#endif
3344 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3345 0 // argc
3346 USE_NESTED_HOT_ARG(NULL), __null // primary thread is unknown
3347 );
3348#if USE_DEBUGGER0
3349 // Non-NULL value should be assigned to make the debugger display the root
3350 // team.
3351 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0))((root_team->t.t_pkfn)) = (((microtask_t)(~0)));
3352#endif
3353
3354 KF_TRACE(10, ("__kmp_initialize_root: after root_team = %p\n", root_team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_root: after root_team = %p\n"
, root_team); }
;
3355
3356 root->r.r_root_team = root_team;
3357 root_team->t.t_control_stack_top = NULL__null;
3358
3359 /* initialize root team */
3360 root_team->t.t_threads[0] = NULL__null;
3361 root_team->t.t_nproc = 1;
3362 root_team->t.t_serialized = 1;
3363 // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3364 root_team->t.t_sched.sched = r_sched.sched;
3365 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n"
, root_team->t.t_id, 0, 0); }
3366 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n"
, root_team->t.t_id, 0, 0); }
3367 ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n"
, root_team->t.t_id, 0, 0); }
3368 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n"
, root_team->t.t_id, 0, 0); }
;
3369
3370 /* setup the hot team for this task */
3371 /* allocate the hot team structure */
3372 KF_TRACE(10, ("__kmp_initialize_root: before hot_team\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_root: before hot_team\n"
); }
;
3373
3374 hot_team =
3375 __kmp_allocate_team(root,
3376 1, // new_nproc
3377 __kmp_dflt_team_nth_ub * 2, // max_nproc
3378#if OMPT_SUPPORT1
3379 ompt_data_none{0}, // root parallel id
3380#endif
3381 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3382 0 // argc
3383 USE_NESTED_HOT_ARG(NULL), __null // primary thread is unknown
3384 );
3385 KF_TRACE(10, ("__kmp_initialize_root: after hot_team = %p\n", hot_team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_root: after hot_team = %p\n"
, hot_team); }
;
3386
3387 root->r.r_hot_team = hot_team;
3388 root_team->t.t_control_stack_top = NULL__null;
3389
3390 /* first-time initialization */
3391 hot_team->t.t_parent = root_team;
3392
3393 /* initialize hot team */
3394 hot_team_max_nth = hot_team->t.t_max_nproc;
3395 for (f = 0; f < hot_team_max_nth; ++f) {
3396 hot_team->t.t_threads[f] = NULL__null;
3397 }
3398 hot_team->t.t_nproc = 1;
3399 // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3400 hot_team->t.t_sched.sched = r_sched.sched;
3401 hot_team->t.t_size_changed = 0;
3402}
3403
3404#ifdef KMP_DEBUG1
3405
3406typedef struct kmp_team_list_item {
3407 kmp_team_p const *entry;
3408 struct kmp_team_list_item *next;
3409} kmp_team_list_item_t;
3410typedef kmp_team_list_item_t *kmp_team_list_t;
3411
3412static void __kmp_print_structure_team_accum( // Add team to list of teams.
3413 kmp_team_list_t list, // List of teams.
3414 kmp_team_p const *team // Team to add.
3415) {
3416
3417 // List must terminate with item where both entry and next are NULL.
3418 // Team is added to the list only once.
3419 // List is sorted in ascending order by team id.
3420 // Team id is *not* a key.
3421
3422 kmp_team_list_t l;
3423
3424 KMP_DEBUG_ASSERT(list != NULL)if (!(list != __null)) { __kmp_debug_assert("list != __null",
"openmp/runtime/src/kmp_runtime.cpp", 3424); }
;
3425 if (team == NULL__null) {
3426 return;
3427 }
3428
3429 __kmp_print_structure_team_accum(list, team->t.t_parent);
3430 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3431
3432 // Search list for the team.
3433 l = list;
3434 while (l->next != NULL__null && l->entry != team) {
3435 l = l->next;
3436 }
3437 if (l->next != NULL__null) {
3438 return; // Team has been added before, exit.
3439 }
3440
3441 // Team is not found. Search list again for insertion point.
3442 l = list;
3443 while (l->next != NULL__null && l->entry->t.t_id <= team->t.t_id) {
3444 l = l->next;
3445 }
3446
3447 // Insert team.
3448 {
3449 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(malloc(sizeof(kmp_team_list_item_t))
3450 sizeof(kmp_team_list_item_t))malloc(sizeof(kmp_team_list_item_t));
3451 *item = *l;
3452 l->entry = team;
3453 l->next = item;
3454 }
3455}
3456
3457static void __kmp_print_structure_team(char const *title, kmp_team_p const *team
3458
3459) {
3460 __kmp_printf("%s", title);
3461 if (team != NULL__null) {
3462 __kmp_printf("%2x %p\n", team->t.t_id, team);
3463 } else {
3464 __kmp_printf(" - (nil)\n");
3465 }
3466}
3467
3468static void __kmp_print_structure_thread(char const *title,
3469 kmp_info_p const *thread) {
3470 __kmp_printf("%s", title);
3471 if (thread != NULL__null) {
3472 __kmp_printf("%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3473 } else {
3474 __kmp_printf(" - (nil)\n");
3475 }
3476}
3477
3478void __kmp_print_structure(void) {
3479
3480 kmp_team_list_t list;
3481
3482 // Initialize list of teams.
3483 list =
3484 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(sizeof(kmp_team_list_item_t))malloc(sizeof(kmp_team_list_item_t));
3485 list->entry = NULL__null;
3486 list->next = NULL__null;
3487
3488 __kmp_printf("\n------------------------------\nGlobal Thread "
3489 "Table\n------------------------------\n");
3490 {
3491 int gtid;
3492 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3493 __kmp_printf("%2d", gtid);
3494 if (__kmp_threads != NULL__null) {
3495 __kmp_printf(" %p", __kmp_threads[gtid]);
3496 }
3497 if (__kmp_root != NULL__null) {
3498 __kmp_printf(" %p", __kmp_root[gtid]);
3499 }
3500 __kmp_printf("\n");
3501 }
3502 }
3503
3504 // Print out __kmp_threads array.
3505 __kmp_printf("\n------------------------------\nThreads\n--------------------"
3506 "----------\n");
3507 if (__kmp_threads != NULL__null) {
3508 int gtid;
3509 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3510 kmp_info_t const *thread = __kmp_threads[gtid];
3511 if (thread != NULL__null) {
3512 __kmp_printf("GTID %2d %p:\n", gtid, thread);
3513 __kmp_printf(" Our Root: %p\n", thread->th.th_root);
3514 __kmp_print_structure_team(" Our Team: ", thread->th.th_team);
3515 __kmp_print_structure_team(" Serial Team: ",
3516 thread->th.th_serial_team);
3517 __kmp_printf(" Threads: %2d\n", thread->th.th_team_nproc);
3518 __kmp_print_structure_thread(" Primary: ",
3519 thread->th.th_team_master);
3520 __kmp_printf(" Serialized?: %2d\n", thread->th.th_team_serialized);
3521 __kmp_printf(" Set NProc: %2d\n", thread->th.th_set_nproc);
3522 __kmp_printf(" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3523 __kmp_print_structure_thread(" Next in pool: ",
3524 thread->th.th_next_pool);
3525 __kmp_printf("\n");
3526 __kmp_print_structure_team_accum(list, thread->th.th_team);
3527 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3528 }
3529 }
3530 } else {
3531 __kmp_printf("Threads array is not allocated.\n");
3532 }
3533
3534 // Print out __kmp_root array.
3535 __kmp_printf("\n------------------------------\nUbers\n----------------------"
3536 "--------\n");
3537 if (__kmp_root != NULL__null) {
3538 int gtid;
3539 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3540 kmp_root_t const *root = __kmp_root[gtid];
3541 if (root != NULL__null) {
3542 __kmp_printf("GTID %2d %p:\n", gtid, root);
3543 __kmp_print_structure_team(" Root Team: ", root->r.r_root_team);
3544 __kmp_print_structure_team(" Hot Team: ", root->r.r_hot_team);
3545 __kmp_print_structure_thread(" Uber Thread: ",
3546 root->r.r_uber_thread);
3547 __kmp_printf(" Active?: %2d\n", root->r.r_active);
3548 __kmp_printf(" In Parallel: %2d\n",
3549 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel)(&root->r.r_in_parallel)->load(std::memory_order_relaxed
)
);
3550 __kmp_printf("\n");
3551 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3552 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3553 }
3554 }
3555 } else {
3556 __kmp_printf("Ubers array is not allocated.\n");
3557 }
3558
3559 __kmp_printf("\n------------------------------\nTeams\n----------------------"
3560 "--------\n");
3561 while (list->next != NULL__null) {
3562 kmp_team_p const *team = list->entry;
3563 int i;
3564 __kmp_printf("Team %2x %p:\n", team->t.t_id, team);
3565 __kmp_print_structure_team(" Parent Team: ", team->t.t_parent);
3566 __kmp_printf(" Primary TID: %2d\n", team->t.t_master_tid);
3567 __kmp_printf(" Max threads: %2d\n", team->t.t_max_nproc);
3568 __kmp_printf(" Levels of serial: %2d\n", team->t.t_serialized);
3569 __kmp_printf(" Number threads: %2d\n", team->t.t_nproc);
3570 for (i = 0; i < team->t.t_nproc; ++i) {
3571 __kmp_printf(" Thread %2d: ", i);
3572 __kmp_print_structure_thread("", team->t.t_threads[i]);
3573 }
3574 __kmp_print_structure_team(" Next in pool: ", team->t.t_next_pool);
3575 __kmp_printf("\n");
3576 list = list->next;
3577 }
3578
3579 // Print out __kmp_thread_pool and __kmp_team_pool.
3580 __kmp_printf("\n------------------------------\nPools\n----------------------"
3581 "--------\n");
3582 __kmp_print_structure_thread("Thread pool: ",
3583 CCAST(kmp_info_t *, __kmp_thread_pool)const_cast<kmp_info_t *>(__kmp_thread_pool));
3584 __kmp_print_structure_team("Team pool: ",
3585 CCAST(kmp_team_t *, __kmp_team_pool)const_cast<kmp_team_t *>(__kmp_team_pool));
3586 __kmp_printf("\n");
3587
3588 // Free team list.
3589 while (list != NULL__null) {
3590 kmp_team_list_item_t *item = list;
3591 list = list->next;
3592 KMP_INTERNAL_FREE(item)free(item);
3593 }
3594}
3595
3596#endif
3597
3598//---------------------------------------------------------------------------
3599// Stuff for per-thread fast random number generator
3600// Table of primes
3601static const unsigned __kmp_primes[] = {
3602 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3603 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3604 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3605 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3606 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3607 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3608 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3609 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3610 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3611 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3612 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3613
3614//---------------------------------------------------------------------------
3615// __kmp_get_random: Get a random number using a linear congruential method.
3616unsigned short __kmp_get_random(kmp_info_t *thread) {
3617 unsigned x = thread->th.th_x;
3618 unsigned short r = (unsigned short)(x >> 16);
3619
3620 thread->th.th_x = x * thread->th.th_a + 1;
3621
3622 KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n",if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_get_random: THREAD: %d, RETURN: %u\n"
, thread->th.th_info.ds.ds_tid, r); }
3623 thread->th.th_info.ds.ds_tid, r))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_get_random: THREAD: %d, RETURN: %u\n"
, thread->th.th_info.ds.ds_tid, r); }
;
3624
3625 return r;
3626}
3627//--------------------------------------------------------
3628// __kmp_init_random: Initialize a random number generator
3629void __kmp_init_random(kmp_info_t *thread) {
3630 unsigned seed = thread->th.th_info.ds.ds_tid;
3631
3632 thread->th.th_a =
3633 __kmp_primes[seed % (sizeof(__kmp_primes) / sizeof(__kmp_primes[0]))];
3634 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3635 KA_TRACE(30,if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_init_random: THREAD: %u; A: %u\n"
, seed, thread->th.th_a); }
3636 ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_init_random: THREAD: %u; A: %u\n"
, seed, thread->th.th_a); }
;
3637}
3638
3639#if KMP_OS_WINDOWS0
3640/* reclaim array entries for root threads that are already dead, returns number
3641 * reclaimed */
3642static int __kmp_reclaim_dead_roots(void) {
3643 int i, r = 0;
3644
3645 for (i = 0; i < __kmp_threads_capacity; ++i) {
3646 if (KMP_UBER_GTID(i) &&
3647 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])((void *)(__kmp_threads[i]))) &&
3648 !__kmp_root[i]
3649 ->r.r_active) { // AC: reclaim only roots died in non-active state
3650 r += __kmp_unregister_root_other_thread(i);
3651 }
3652 }
3653 return r;
3654}
3655#endif
3656
3657/* This function attempts to create free entries in __kmp_threads and
3658 __kmp_root, and returns the number of free entries generated.
3659
3660 For Windows* OS static library, the first mechanism used is to reclaim array
3661 entries for root threads that are already dead.
3662
3663 On all platforms, expansion is attempted on the arrays __kmp_threads_ and
3664 __kmp_root, with appropriate update to __kmp_threads_capacity. Array
3665 capacity is increased by doubling with clipping to __kmp_tp_capacity, if
3666 threadprivate cache array has been created. Synchronization with
3667 __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock.
3668
3669 After any dead root reclamation, if the clipping value allows array expansion
3670 to result in the generation of a total of nNeed free slots, the function does
3671 that expansion. If not, nothing is done beyond the possible initial root
3672 thread reclamation.
3673
3674 If any argument is negative, the behavior is undefined. */
3675static int __kmp_expand_threads(int nNeed) {
3676 int added = 0;
3677 int minimumRequiredCapacity;
3678 int newCapacity;
3679 kmp_info_t **newThreads;
3680 kmp_root_t **newRoot;
3681
3682 // All calls to __kmp_expand_threads should be under __kmp_forkjoin_lock, so
3683 // resizing __kmp_threads does not need additional protection if foreign
3684 // threads are present
3685
3686#if KMP_OS_WINDOWS0 && !KMP_DYNAMIC_LIB1
3687 /* only for Windows static library */
3688 /* reclaim array entries for root threads that are already dead */
3689 added = __kmp_reclaim_dead_roots();
3690
3691 if (nNeed) {
3692 nNeed -= added;
3693 if (nNeed < 0)
3694 nNeed = 0;
3695 }
3696#endif
3697 if (nNeed <= 0)
3698 return added;
3699
3700 // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. If
3701 // __kmp_max_nth is set to some value less than __kmp_sys_max_nth by the
3702 // user via KMP_DEVICE_THREAD_LIMIT, then __kmp_threads_capacity may become
3703 // > __kmp_max_nth in one of two ways:
3704 //
3705 // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0]
3706 // may not be reused by another thread, so we may need to increase
3707 // __kmp_threads_capacity to __kmp_max_nth + 1.
3708 //
3709 // 2) New foreign root(s) are encountered. We always register new foreign
3710 // roots. This may cause a smaller # of threads to be allocated at
3711 // subsequent parallel regions, but the worker threads hang around (and
3712 // eventually go to sleep) and need slots in the __kmp_threads[] array.
3713 //
3714 // Anyway, that is the reason for moving the check to see if
3715 // __kmp_max_nth was exceeded into __kmp_reserve_threads()
3716 // instead of having it performed here. -BB
3717
3718 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity)if (!(__kmp_sys_max_nth >= __kmp_threads_capacity)) { __kmp_debug_assert
("__kmp_sys_max_nth >= __kmp_threads_capacity", "openmp/runtime/src/kmp_runtime.cpp"
, 3718); }
;
3719
3720 /* compute expansion headroom to check if we can expand */
3721 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3722 /* possible expansion too small -- give up */
3723 return added;
3724 }
3725 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3726
3727 newCapacity = __kmp_threads_capacity;
3728 do {
3729 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3730 : __kmp_sys_max_nth;
3731 } while (newCapacity < minimumRequiredCapacity);
3732 newThreads = (kmp_info_t **)__kmp_allocate(___kmp_allocate(((sizeof(kmp_info_t *) + sizeof(kmp_root_t *)
) * newCapacity + 64), "openmp/runtime/src/kmp_runtime.cpp", 3733
)
3733 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE)___kmp_allocate(((sizeof(kmp_info_t *) + sizeof(kmp_root_t *)
) * newCapacity + 64), "openmp/runtime/src/kmp_runtime.cpp", 3733
)
;
3734 newRoot =
3735 (kmp_root_t **)((char *)newThreads + sizeof(kmp_info_t *) * newCapacity);
3736 KMP_MEMCPYmemcpy(newThreads, __kmp_threads,
3737 __kmp_threads_capacity * sizeof(kmp_info_t *));
3738 KMP_MEMCPYmemcpy(newRoot, __kmp_root,
3739 __kmp_threads_capacity * sizeof(kmp_root_t *));
3740 // Put old __kmp_threads array on a list. Any ongoing references to the old
3741 // list will be valid. This list is cleaned up at library shutdown.
3742 kmp_old_threads_list_t *node =
3743 (kmp_old_threads_list_t *)__kmp_allocate(sizeof(kmp_old_threads_list_t))___kmp_allocate((sizeof(kmp_old_threads_list_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 3743)
;
3744 node->threads = __kmp_threads;
3745 node->next = __kmp_old_threads_list;
3746 __kmp_old_threads_list = node;
3747
3748 *(kmp_info_t * *volatile *)&__kmp_threads = newThreads;
3749 *(kmp_root_t * *volatile *)&__kmp_root = newRoot;
3750 added += newCapacity - __kmp_threads_capacity;
3751 *(volatile int *)&__kmp_threads_capacity = newCapacity;
3752
3753 if (newCapacity > __kmp_tp_capacity) {
3754 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3755 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3756 __kmp_threadprivate_resize_cache(newCapacity);
3757 } else { // increase __kmp_tp_capacity to correspond with kmp_threads size
3758 *(volatile int *)&__kmp_tp_capacity = newCapacity;
3759 }
3760 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3761 }
3762
3763 return added;
3764}
3765
3766/* Register the current thread as a root thread and obtain our gtid. We must
3767 have the __kmp_initz_lock held at this point. Argument TRUE only if are the
3768 thread that calls from __kmp_do_serial_initialize() */
3769int __kmp_register_root(int initial_thread) {
3770 kmp_info_t *root_thread;
3771 kmp_root_t *root;
3772 int gtid;
3773 int capacity;
3774 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3775 KA_TRACE(20, ("__kmp_register_root: entered\n"))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_register_root: entered\n"
); }
;
3776 KMP_MB();
3777
3778 /* 2007-03-02:
3779 If initial thread did not invoke OpenMP RTL yet, and this thread is not an
3780 initial one, "__kmp_all_nth >= __kmp_threads_capacity" condition does not
3781 work as expected -- it may return false (that means there is at least one
3782 empty slot in __kmp_threads array), but it is possible the only free slot
3783 is #0, which is reserved for initial thread and so cannot be used for this
3784 one. Following code workarounds this bug.
3785
3786 However, right solution seems to be not reserving slot #0 for initial
3787 thread because:
3788 (1) there is no magic in slot #0,
3789 (2) we cannot detect initial thread reliably (the first thread which does
3790 serial initialization may be not a real initial thread).
3791 */
3792 capacity = __kmp_threads_capacity;
3793 if (!initial_thread && TCR_PTR(__kmp_threads[0])((void *)(__kmp_threads[0])) == NULL__null) {
3794 --capacity;
3795 }
3796
3797 // If it is not for initializing the hidden helper team, we need to take
3798 // __kmp_hidden_helper_threads_num out of the capacity because it is included
3799 // in __kmp_threads_capacity.
3800 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)(__kmp_init_hidden_helper_threads)) {
3801 capacity -= __kmp_hidden_helper_threads_num;
3802 }
3803
3804 /* see if there are too many threads */
3805 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3806 if (__kmp_tp_cached) {
3807 __kmp_fatal(KMP_MSG(CantRegisterNewThread)__kmp_msg_format(kmp_i18n_msg_CantRegisterNewThread),
3808 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity)__kmp_msg_format(kmp_i18n_hnt_Set_ALL_THREADPRIVATE, __kmp_tp_capacity
)
,
3809 KMP_HNT(PossibleSystemLimitOnThreads)__kmp_msg_format(kmp_i18n_hnt_PossibleSystemLimitOnThreads), __kmp_msg_null);
3810 } else {
3811 __kmp_fatal(KMP_MSG(CantRegisterNewThread)__kmp_msg_format(kmp_i18n_msg_CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads)__kmp_msg_format(kmp_i18n_hnt_SystemLimitOnThreads),
3812 __kmp_msg_null);
3813 }
3814 }
3815
3816 // When hidden helper task is enabled, __kmp_threads is organized as follows:
3817 // 0: initial thread, also a regular OpenMP thread.
3818 // [1, __kmp_hidden_helper_threads_num]: slots for hidden helper threads.
3819 // [__kmp_hidden_helper_threads_num + 1, __kmp_threads_capacity): slots for
3820 // regular OpenMP threads.
3821 if (TCR_4(__kmp_init_hidden_helper_threads)(__kmp_init_hidden_helper_threads)) {
3822 // Find an available thread slot for hidden helper thread. Slots for hidden
3823 // helper threads start from 1 to __kmp_hidden_helper_threads_num.
3824 for (gtid = 1; TCR_PTR(__kmp_threads[gtid])((void *)(__kmp_threads[gtid])) != NULL__null &&
3825 gtid <= __kmp_hidden_helper_threads_num;
3826 gtid++)
3827 ;
3828 KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num)if (!(gtid <= __kmp_hidden_helper_threads_num)) { __kmp_debug_assert
("gtid <= __kmp_hidden_helper_threads_num", "openmp/runtime/src/kmp_runtime.cpp"
, 3828); }
;
3829 KA_TRACE(1, ("__kmp_register_root: found slot in threads array for "if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_register_root: found slot in threads array for "
"hidden helper thread: T#%d\n", gtid); }
3830 "hidden helper thread: T#%d\n",if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_register_root: found slot in threads array for "
"hidden helper thread: T#%d\n", gtid); }
3831 gtid))if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_register_root: found slot in threads array for "
"hidden helper thread: T#%d\n", gtid); }
;
3832 } else {
3833 /* find an available thread slot */
3834 // Don't reassign the zero slot since we need that to only be used by
3835 // initial thread. Slots for hidden helper threads should also be skipped.
3836 if (initial_thread && TCR_PTR(__kmp_threads[0])((void *)(__kmp_threads[0])) == NULL__null) {
3837 gtid = 0;
3838 } else {
3839 for (gtid = __kmp_hidden_helper_threads_num + 1;
3840 TCR_PTR(__kmp_threads[gtid])((void *)(__kmp_threads[gtid])) != NULL__null; gtid++)
3841 ;
3842 }
3843 KA_TRACE(if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_register_root: found slot in threads array: T#%d\n"
, gtid); }
3844 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid))if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_register_root: found slot in threads array: T#%d\n"
, gtid); }
;
3845 KMP_ASSERT(gtid < __kmp_threads_capacity)if (!(gtid < __kmp_threads_capacity)) { __kmp_debug_assert
("gtid < __kmp_threads_capacity", "openmp/runtime/src/kmp_runtime.cpp"
, 3845); }
;
3846 }
3847
3848 /* update global accounting */
3849 __kmp_all_nth++;
3850 TCW_4(__kmp_nth, __kmp_nth + 1)(__kmp_nth) = (__kmp_nth + 1);
3851
3852 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
3853 // numbers of procs, and method #2 (keyed API call) for higher numbers.
3854 if (__kmp_adjust_gtid_mode) {
3855 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3856 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) != 2) {
3857 TCW_4(__kmp_gtid_mode, 2)(__kmp_gtid_mode) = (2);
3858 }
3859 } else {
3860 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) != 1) {
3861 TCW_4(__kmp_gtid_mode, 1)(__kmp_gtid_mode) = (1);
3862 }
3863 }
3864 }
3865
3866#ifdef KMP_ADJUST_BLOCKTIME1
3867 /* Adjust blocktime to zero if necessary */
3868 /* Middle initialization might not have occurred yet */
3869 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3870 if (__kmp_nth > __kmp_avail_proc) {
3871 __kmp_zero_bt = TRUE(!0);
3872 }
3873 }
3874#endif /* KMP_ADJUST_BLOCKTIME */
3875
3876 /* setup this new hierarchy */
3877 if (!(root = __kmp_root[gtid])) {
3878 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(sizeof(kmp_root_t))___kmp_allocate((sizeof(kmp_root_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 3878)
;
3879 KMP_DEBUG_ASSERT(!root->r.r_root_team)if (!(!root->r.r_root_team)) { __kmp_debug_assert("!root->r.r_root_team"
, "openmp/runtime/src/kmp_runtime.cpp", 3879); }
;
3880 }
3881
3882#if KMP_STATS_ENABLED0
3883 // Initialize stats as soon as possible (right after gtid assignment).
3884 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3885 __kmp_stats_thread_ptr->startLife();
3886 KMP_SET_THREAD_STATE(SERIAL_REGION)((void)0);
3887 KMP_INIT_PARTITIONED_TIMERS(OMP_serial)((void)0);
3888#endif
3889 __kmp_initialize_root(root);
3890
3891 /* setup new root thread structure */
3892 if (root->r.r_uber_thread) {
3893 root_thread = root->r.r_uber_thread;
3894 } else {
3895 root_thread = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t))___kmp_allocate((sizeof(kmp_info_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 3895)
;
3896 if (__kmp_storage_map) {
3897 __kmp_print_thread_storage_map(root_thread, gtid);
3898 }
3899 root_thread->th.th_info.ds.ds_gtid = gtid;
3900#if OMPT_SUPPORT1
3901 root_thread->th.ompt_thread_info.thread_data = ompt_data_none{0};
3902#endif
3903 root_thread->th.th_root = root;
3904 if (__kmp_env_consistency_check) {
3905 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3906 }
3907#if USE_FAST_MEMORY3
3908 __kmp_initialize_fast_memory(root_thread);
3909#endif /* USE_FAST_MEMORY */
3910
3911#if KMP_USE_BGET1
3912 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL)if (!(root_thread->th.th_local.bget_data == __null)) { __kmp_debug_assert
("root_thread->th.th_local.bget_data == __null", "openmp/runtime/src/kmp_runtime.cpp"
, 3912); }
;
3913 __kmp_initialize_bget(root_thread);
3914#endif
3915 __kmp_init_random(root_thread); // Initialize random number generator
3916 }
3917
3918 /* setup the serial team held in reserve by the root thread */
3919 if (!root_thread->th.th_serial_team) {
3920 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3921 KF_TRACE(10, ("__kmp_register_root: before serial_team\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_register_root: before serial_team\n"
); }
;
3922 root_thread->th.th_serial_team = __kmp_allocate_team(
3923 root, 1, 1,
3924#if OMPT_SUPPORT1
3925 ompt_data_none{0}, // root parallel id
3926#endif
3927 proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL), __null);
3928 }
3929 KMP_ASSERT(root_thread->th.th_serial_team)if (!(root_thread->th.th_serial_team)) { __kmp_debug_assert
("root_thread->th.th_serial_team", "openmp/runtime/src/kmp_runtime.cpp"
, 3929); }
;
3930 KF_TRACE(10, ("__kmp_register_root: after serial_team = %p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_register_root: after serial_team = %p\n"
, root_thread->th.th_serial_team); }
3931 root_thread->th.th_serial_team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_register_root: after serial_team = %p\n"
, root_thread->th.th_serial_team); }
;
3932
3933 /* drop root_thread into place */
3934 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread)((__kmp_threads[gtid])) = ((root_thread));
3935
3936 root->r.r_root_team->t.t_threads[0] = root_thread;
3937 root->r.r_hot_team->t.t_threads[0] = root_thread;
3938 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3939 // AC: the team created in reserve, not for execution (it is unused for now).
3940 root_thread->th.th_serial_team->t.t_serialized = 0;
3941 root->r.r_uber_thread = root_thread;
3942
3943 /* initialize the thread, get it ready to go */
3944 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3945 TCW_4(__kmp_init_gtid, TRUE)(__kmp_init_gtid) = ((!0));
3946
3947 /* prepare the primary thread for get_gtid() */
3948 __kmp_gtid_set_specific(gtid);
3949
3950#if USE_ITT_BUILD1
3951 __kmp_itt_thread_name(gtid);
3952#endif /* USE_ITT_BUILD */
3953
3954#ifdef KMP_TDATA_GTID1
3955 __kmp_gtid = gtid;
3956#endif
3957 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3958 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid)if (!(__kmp_gtid_get_specific() == gtid)) { __kmp_debug_assert
("__kmp_gtid_get_specific() == gtid", "openmp/runtime/src/kmp_runtime.cpp"
, 3958); }
;
3959
3960 KA_TRACE(20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
"plain=%u\n", gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team
), root->r.r_hot_team->t.t_id, 0, 0, 0); }
3961 "plain=%u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
"plain=%u\n", gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team
), root->r.r_hot_team->t.t_id, 0, 0, 0); }
3962 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
"plain=%u\n", gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team
), root->r.r_hot_team->t.t_id, 0, 0, 0); }
3963 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
"plain=%u\n", gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team
), root->r.r_hot_team->t.t_id, 0, 0, 0); }
3964 KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
"plain=%u\n", gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team
), root->r.r_hot_team->t.t_id, 0, 0, 0); }
;
3965 { // Initialize barrier data.
3966 int b;
3967 for (b = 0; b < bs_last_barrier; ++b) {
3968 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE0;
3969#if USE_DEBUGGER0
3970 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3971#endif
3972 }
3973 }
3974 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==if (!(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].
b_arrived == 0)) { __kmp_debug_assert("root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived == 0"
, "openmp/runtime/src/kmp_runtime.cpp", 3975); }
3975 KMP_INIT_BARRIER_STATE)if (!(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].
b_arrived == 0)) { __kmp_debug_assert("root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived == 0"
, "openmp/runtime/src/kmp_runtime.cpp", 3975); }
;
3976
3977#if KMP_AFFINITY_SUPPORTED1
3978 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED(-2);
3979 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED(-2);
3980 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED(-2);
3981 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED(-2);
3982#endif /* KMP_AFFINITY_SUPPORTED */
3983 root_thread->th.th_def_allocator = __kmp_def_allocator;
3984 root_thread->th.th_prev_level = 0;
3985 root_thread->th.th_prev_num_threads = 1;
3986
3987 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(sizeof(kmp_cg_root_t))___kmp_allocate((sizeof(kmp_cg_root_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 3987)
;
3988 tmp->cg_root = root_thread;
3989 tmp->cg_thread_limit = __kmp_cg_max_nth;
3990 tmp->cg_nthreads = 1;
3991 KA_TRACE(100, ("__kmp_register_root: Thread %p created node %p with"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_register_root: Thread %p created node %p with"
" cg_nthreads init to 1\n", root_thread, tmp); }
3992 " cg_nthreads init to 1\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_register_root: Thread %p created node %p with"
" cg_nthreads init to 1\n", root_thread, tmp); }
3993 root_thread, tmp))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_register_root: Thread %p created node %p with"
" cg_nthreads init to 1\n", root_thread, tmp); }
;
3994 tmp->up = NULL__null;
3995 root_thread->th.th_cg_roots = tmp;
3996
3997 __kmp_root_counter++;
3998
3999#if OMPT_SUPPORT1
4000 if (!initial_thread && ompt_enabled.enabled) {
4001
4002 kmp_info_t *root_thread = ompt_get_thread();
4003
4004 ompt_set_thread_state(root_thread, ompt_state_overhead);
4005
4006 if (ompt_enabled.ompt_callback_thread_begin) {
4007 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)ompt_callback_thread_begin_callback(
4008 ompt_thread_initial, __ompt_get_thread_data_internal());
4009 }
4010 ompt_data_t *task_data;
4011 ompt_data_t *parallel_data;
4012 __ompt_get_task_info_internal(0, NULL__null, &task_data, NULL__null, &parallel_data,
4013 NULL__null);
4014 if (ompt_enabled.ompt_callback_implicit_task) {
4015 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
4016 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
4017 }
4018
4019 ompt_set_thread_state(root_thread, ompt_state_work_serial);
4020 }
4021#endif
4022#if OMPD_SUPPORT1
4023 if (ompd_state & OMPD_ENABLE_BP0x1)
4024 ompd_bp_thread_begin();
4025#endif
4026
4027 KMP_MB();
4028 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4029
4030 return gtid;
4031}
4032
4033#if KMP_NESTED_HOT_TEAMS1
4034static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr, int level,
4035 const int max_level) {
4036 int i, n, nth;
4037 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
4038 if (!hot_teams || !hot_teams[level].hot_team) {
4039 return 0;
4040 }
4041 KMP_DEBUG_ASSERT(level < max_level)if (!(level < max_level)) { __kmp_debug_assert("level < max_level"
, "openmp/runtime/src/kmp_runtime.cpp", 4041); }
;
4042 kmp_team_t *team = hot_teams[level].hot_team;
4043 nth = hot_teams[level].hot_team_nth;
4044 n = nth - 1; // primary thread is not freed
4045 if (level < max_level - 1) {
4046 for (i = 0; i < nth; ++i) {
4047 kmp_info_t *th = team->t.t_threads[i];
4048 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
4049 if (i > 0 && th->th.th_hot_teams) {
4050 __kmp_free(th->th.th_hot_teams)___kmp_free((th->th.th_hot_teams), "openmp/runtime/src/kmp_runtime.cpp"
, 4050)
;
4051 th->th.th_hot_teams = NULL__null;
4052 }
4053 }
4054 }
4055 __kmp_free_team(root, team, NULL__null);
4056 return n;
4057}
4058#endif
4059
4060// Resets a root thread and clear its root and hot teams.
4061// Returns the number of __kmp_threads entries directly and indirectly freed.
4062static int __kmp_reset_root(int gtid, kmp_root_t *root) {
4063 kmp_team_t *root_team = root->r.r_root_team;
4064 kmp_team_t *hot_team = root->r.r_hot_team;
4065 int n = hot_team->t.t_nproc;
4066 int i;
4067
4068 KMP_DEBUG_ASSERT(!root->r.r_active)if (!(!root->r.r_active)) { __kmp_debug_assert("!root->r.r_active"
, "openmp/runtime/src/kmp_runtime.cpp", 4068); }
;
4069
4070 root->r.r_root_team = NULL__null;
4071 root->r.r_hot_team = NULL__null;
4072 // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team
4073 // before call to __kmp_free_team().
4074 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL), __null);
4075#if KMP_NESTED_HOT_TEAMS1
4076 if (__kmp_hot_teams_max_level >
4077 0) { // need to free nested hot teams and their threads if any
4078 for (i = 0; i < hot_team->t.t_nproc; ++i) {
4079 kmp_info_t *th = hot_team->t.t_threads[i];
4080 if (__kmp_hot_teams_max_level > 1) {
4081 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
4082 }
4083 if (th->th.th_hot_teams) {
4084 __kmp_free(th->th.th_hot_teams)___kmp_free((th->th.th_hot_teams), "openmp/runtime/src/kmp_runtime.cpp"
, 4084)
;
4085 th->th.th_hot_teams = NULL__null;
4086 }
4087 }
4088 }
4089#endif
4090 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL), __null);
4091
4092 // Before we can reap the thread, we need to make certain that all other
4093 // threads in the teams that had this root as ancestor have stopped trying to
4094 // steal tasks.
4095 if (__kmp_tasking_mode != tskm_immediate_exec) {
4096 __kmp_wait_to_unref_task_teams();
4097 }
4098
4099#if KMP_OS_WINDOWS0
4100 /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */
4101 KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_reset_root: free handle, th = %p, handle = %"
"lu" "\n", (LPVOID) & (root->r.r_uber_thread->th),
root->r.r_uber_thread->th.th_info.ds.ds_thread); }
4102 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPECif (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_reset_root: free handle, th = %p, handle = %"
"lu" "\n", (LPVOID) & (root->r.r_uber_thread->th),
root->r.r_uber_thread->th.th_info.ds.ds_thread); }
4103 "\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_reset_root: free handle, th = %p, handle = %"
"lu" "\n", (LPVOID) & (root->r.r_uber_thread->th),
root->r.r_uber_thread->th.th_info.ds.ds_thread); }
4104 (LPVOID) & (root->r.r_uber_thread->th),if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_reset_root: free handle, th = %p, handle = %"
"lu" "\n", (LPVOID) & (root->r.r_uber_thread->th),
root->r.r_uber_thread->th.th_info.ds.ds_thread); }
4105 root->r.r_uber_thread->th.th_info.ds.ds_thread))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_reset_root: free handle, th = %p, handle = %"
"lu" "\n", (LPVOID) & (root->r.r_uber_thread->th),
root->r.r_uber_thread->th.th_info.ds.ds_thread); }
;
4106 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
4107#endif /* KMP_OS_WINDOWS */
4108
4109#if OMPD_SUPPORT1
4110 if (ompd_state & OMPD_ENABLE_BP0x1)
4111 ompd_bp_thread_end();
4112#endif
4113
4114#if OMPT_SUPPORT1
4115 ompt_data_t *task_data;
4116 ompt_data_t *parallel_data;
4117 __ompt_get_task_info_internal(0, NULL__null, &task_data, NULL__null, &parallel_data,
4118 NULL__null);
4119 if (ompt_enabled.ompt_callback_implicit_task) {
4120 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
4121 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
4122 }
4123 if (ompt_enabled.ompt_callback_thread_end) {
4124 ompt_callbacks.ompt_callback(ompt_callback_thread_end)ompt_callback_thread_end_callback(
4125 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4126 }
4127#endif
4128
4129 TCW_4(__kmp_nth,(__kmp_nth) = (__kmp_nth - 1)
4130 __kmp_nth - 1)(__kmp_nth) = (__kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
4131 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4132 KA_TRACE(100, ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
" to %d\n", root->r.r_uber_thread, root->r.r_uber_thread
->th.th_cg_roots, root->r.r_uber_thread->th.th_cg_roots
->cg_nthreads); }
4133 " to %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
" to %d\n", root->r.r_uber_thread, root->r.r_uber_thread
->th.th_cg_roots, root->r.r_uber_thread->th.th_cg_roots
->cg_nthreads); }
4134 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
" to %d\n", root->r.r_uber_thread, root->r.r_uber_thread
->th.th_cg_roots, root->r.r_uber_thread->th.th_cg_roots
->cg_nthreads); }
4135 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
" to %d\n", root->r.r_uber_thread, root->r.r_uber_thread
->th.th_cg_roots, root->r.r_uber_thread->th.th_cg_roots
->cg_nthreads); }
;
4136 if (i == 1) {
4137 // need to free contention group structure
4138 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==if (!(root->r.r_uber_thread == root->r.r_uber_thread->
th.th_cg_roots->cg_root)) { __kmp_debug_assert("root->r.r_uber_thread == root->r.r_uber_thread->th.th_cg_roots->cg_root"
, "openmp/runtime/src/kmp_runtime.cpp", 4139); }
4139 root->r.r_uber_thread->th.th_cg_roots->cg_root)if (!(root->r.r_uber_thread == root->r.r_uber_thread->
th.th_cg_roots->cg_root)) { __kmp_debug_assert("root->r.r_uber_thread == root->r.r_uber_thread->th.th_cg_roots->cg_root"
, "openmp/runtime/src/kmp_runtime.cpp", 4139); }
;
4140 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL)if (!(root->r.r_uber_thread->th.th_cg_roots->up == __null
)) { __kmp_debug_assert("root->r.r_uber_thread->th.th_cg_roots->up == __null"
, "openmp/runtime/src/kmp_runtime.cpp", 4140); }
;
4141 __kmp_free(root->r.r_uber_thread->th.th_cg_roots)___kmp_free((root->r.r_uber_thread->th.th_cg_roots), "openmp/runtime/src/kmp_runtime.cpp"
, 4141)
;
4142 root->r.r_uber_thread->th.th_cg_roots = NULL__null;
4143 }
4144 __kmp_reap_thread(root->r.r_uber_thread, 1);
4145
4146 // We canot put root thread to __kmp_thread_pool, so we have to reap it
4147 // instead of freeing.
4148 root->r.r_uber_thread = NULL__null;
4149 /* mark root as no longer in use */
4150 root->r.r_begin = FALSE0;
4151
4152 return n;
4153}
4154
4155void __kmp_unregister_root_current_thread(int gtid) {
4156 KA_TRACE(1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid))if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_unregister_root_current_thread: enter T#%d\n"
, gtid); }
;
4157 /* this lock should be ok, since unregister_root_current_thread is never
4158 called during an abort, only during a normal close. furthermore, if you
4159 have the forkjoin lock, you should never try to get the initz lock */
4160 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
4161 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done) || !__kmp_init_serial) {
4162 KC_TRACE(10, ("__kmp_unregister_root_current_thread: already finished, "if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_current_thread: already finished, "
"exiting T#%d\n", gtid); }
4163 "exiting T#%d\n",if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_current_thread: already finished, "
"exiting T#%d\n", gtid); }
4164 gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_current_thread: already finished, "
"exiting T#%d\n", gtid); }
;
4165 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4166 return;
4167 }
4168 kmp_root_t *root = __kmp_root[gtid];
4169
4170 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid])if (!(__kmp_threads && __kmp_threads[gtid])) { __kmp_debug_assert
("__kmp_threads && __kmp_threads[gtid]", "openmp/runtime/src/kmp_runtime.cpp"
, 4170); }
;
4171 KMP_ASSERT(KMP_UBER_GTID(gtid))if (!(KMP_UBER_GTID(gtid))) { __kmp_debug_assert("KMP_UBER_GTID(gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 4171); }
;
4172 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root)if (!(root == __kmp_threads[gtid]->th.th_root)) { __kmp_debug_assert
("root == __kmp_threads[gtid]->th.th_root", "openmp/runtime/src/kmp_runtime.cpp"
, 4172); }
;
4173 KMP_ASSERT(root->r.r_active == FALSE)if (!(root->r.r_active == 0)) { __kmp_debug_assert("root->r.r_active == FALSE"
, "openmp/runtime/src/kmp_runtime.cpp", 4173); }
;
4174
4175 KMP_MB();
4176
4177 kmp_info_t *thread = __kmp_threads[gtid];
4178 kmp_team_t *team = thread->th.th_team;
4179 kmp_task_team_t *task_team = thread->th.th_task_team;
4180
4181 // we need to wait for the proxy tasks before finishing the thread
4182 if (task_team != NULL__null && (task_team->tt.tt_found_proxy_tasks ||
4183 task_team->tt.tt_hidden_helper_task_encountered)) {
4184#if OMPT_SUPPORT1
4185 // the runtime is shutting down so we won't report any events
4186 thread->th.ompt_thread_info.state = ompt_state_undefined;
4187#endif
4188 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL), __null);
4189 }
4190
4191 __kmp_reset_root(gtid, root);
4192
4193 KMP_MB();
4194 KC_TRACE(10,if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_current_thread: T#%d unregistered\n"
, gtid); }
4195 ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_current_thread: T#%d unregistered\n"
, gtid); }
;
4196
4197 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4198}
4199
4200#if KMP_OS_WINDOWS0
4201/* __kmp_forkjoin_lock must be already held
4202 Unregisters a root thread that is not the current thread. Returns the number
4203 of __kmp_threads entries freed as a result. */
4204static int __kmp_unregister_root_other_thread(int gtid) {
4205 kmp_root_t *root = __kmp_root[gtid];
4206 int r;
4207
4208 KA_TRACE(1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid))if (kmp_a_debug >= 1) { __kmp_debug_printf ("__kmp_unregister_root_other_thread: enter T#%d\n"
, gtid); }
;
4209 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid])if (!(__kmp_threads && __kmp_threads[gtid])) { __kmp_debug_assert
("__kmp_threads && __kmp_threads[gtid]", "openmp/runtime/src/kmp_runtime.cpp"
, 4209); }
;
4210 KMP_ASSERT(KMP_UBER_GTID(gtid))if (!(KMP_UBER_GTID(gtid))) { __kmp_debug_assert("KMP_UBER_GTID(gtid)"
, "openmp/runtime/src/kmp_runtime.cpp", 4210); }
;
4211 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root)if (!(root == __kmp_threads[gtid]->th.th_root)) { __kmp_debug_assert
("root == __kmp_threads[gtid]->th.th_root", "openmp/runtime/src/kmp_runtime.cpp"
, 4211); }
;
4212 KMP_ASSERT(root->r.r_active == FALSE)if (!(root->r.r_active == 0)) { __kmp_debug_assert("root->r.r_active == FALSE"
, "openmp/runtime/src/kmp_runtime.cpp", 4212); }
;
4213
4214 r = __kmp_reset_root(gtid, root);
4215 KC_TRACE(10,if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_other_thread: T#%d unregistered\n"
, gtid); }
4216 ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid))if (kmp_c_debug >= 10) { __kmp_debug_printf ("__kmp_unregister_root_other_thread: T#%d unregistered\n"
, gtid); }
;
4217 return r;
4218}
4219#endif
4220
4221#if KMP_DEBUG1
4222void __kmp_task_info() {
4223
4224 kmp_int32 gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
4225 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4226 kmp_info_t *this_thr = __kmp_threads[gtid];
4227 kmp_team_t *steam = this_thr->th.th_serial_team;
4228 kmp_team_t *team = this_thr->th.th_team;
4229
4230 __kmp_printf(
4231 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4232 "ptask=%p\n",
4233 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4234 team->t.t_implicit_task_taskdata[tid].td_parent);
4235}
4236#endif // KMP_DEBUG
4237
4238/* TODO optimize with one big memclr, take out what isn't needed, split
4239 responsibility to workers as much as possible, and delay initialization of
4240 features as much as possible */
4241static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4242 int tid, int gtid) {
4243 /* this_thr->th.th_info.ds.ds_gtid is setup in
4244 kmp_allocate_thread/create_worker.
4245 this_thr->th.th_serial_team is setup in __kmp_allocate_thread */
4246 KMP_DEBUG_ASSERT(this_thr != NULL)if (!(this_thr != __null)) { __kmp_debug_assert("this_thr != __null"
, "openmp/runtime/src/kmp_runtime.cpp", 4246); }
;
4247 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team)if (!(this_thr->th.th_serial_team)) { __kmp_debug_assert("this_thr->th.th_serial_team"
, "openmp/runtime/src/kmp_runtime.cpp", 4247); }
;
4248 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 4248); }
;
4249 KMP_DEBUG_ASSERT(team->t.t_threads)if (!(team->t.t_threads)) { __kmp_debug_assert("team->t.t_threads"
, "openmp/runtime/src/kmp_runtime.cpp", 4249); }
;
4250 KMP_DEBUG_ASSERT(team->t.t_dispatch)if (!(team->t.t_dispatch)) { __kmp_debug_assert("team->t.t_dispatch"
, "openmp/runtime/src/kmp_runtime.cpp", 4250); }
;
4251 kmp_info_t *master = team->t.t_threads[0];
4252 KMP_DEBUG_ASSERT(master)if (!(master)) { __kmp_debug_assert("master", "openmp/runtime/src/kmp_runtime.cpp"
, 4252); }
;
4253 KMP_DEBUG_ASSERT(master->th.th_root)if (!(master->th.th_root)) { __kmp_debug_assert("master->th.th_root"
, "openmp/runtime/src/kmp_runtime.cpp", 4253); }
;
4254
4255 KMP_MB();
4256
4257 TCW_SYNC_PTR(this_thr->th.th_team, team)((this_thr->th.th_team)) = ((team));
4258
4259 this_thr->th.th_info.ds.ds_tid = tid;
4260 this_thr->th.th_set_nproc = 0;
4261 if (__kmp_tasking_mode != tskm_immediate_exec)
4262 // When tasking is possible, threads are not safe to reap until they are
4263 // done tasking; this will be set when tasking code is exited in wait
4264 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP0;
4265 else // no tasking --> always safe to reap
4266 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP1;
4267 this_thr->th.th_set_proc_bind = proc_bind_default;
4268#if KMP_AFFINITY_SUPPORTED1
4269 this_thr->th.th_new_place = this_thr->th.th_current_place;
4270#endif
4271 this_thr->th.th_root = master->th.th_root;
4272
4273 /* setup the thread's cache of the team structure */
4274 this_thr->th.th_team_nproc = team->t.t_nproc;
4275 this_thr->th.th_team_master = master;
4276 this_thr->th.th_team_serialized = team->t.t_serialized;
4277
4278 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata)if (!(team->t.t_implicit_task_taskdata)) { __kmp_debug_assert
("team->t.t_implicit_task_taskdata", "openmp/runtime/src/kmp_runtime.cpp"
, 4278); }
;
4279
4280 KF_TRACE(10, ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n"
, tid, gtid, this_thr, this_thr->th.th_current_task); }
4281 tid, gtid, this_thr, this_thr->th.th_current_task))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n"
, tid, gtid, this_thr, this_thr->th.th_current_task); }
;
4282
4283 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4284 team, tid, TRUE(!0));
4285
4286 KF_TRACE(10, ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n"
, tid, gtid, this_thr, this_thr->th.th_current_task); }
4287 tid, gtid, this_thr, this_thr->th.th_current_task))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n"
, tid, gtid, this_thr, this_thr->th.th_current_task); }
;
4288 // TODO: Initialize ICVs from parent; GEH - isn't that already done in
4289 // __kmp_initialize_team()?
4290
4291 /* TODO no worksharing in speculative threads */
4292 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4293
4294 this_thr->th.th_local.this_construct = 0;
4295
4296 if (!this_thr->th.th_pri_common) {
4297 this_thr->th.th_pri_common =
4298 (struct common_table *)__kmp_allocate(sizeof(struct common_table))___kmp_allocate((sizeof(struct common_table)), "openmp/runtime/src/kmp_runtime.cpp"
, 4298)
;
4299 if (__kmp_storage_map) {
4300 __kmp_print_storage_map_gtid(
4301 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4302 sizeof(struct common_table), "th_%d.th_pri_common\n", gtid);
4303 }
4304 this_thr->th.th_pri_head = NULL__null;
4305 }
4306
4307 if (this_thr != master && // Primary thread's CG root is initialized elsewhere
4308 this_thr->th.th_cg_roots != master->th.th_cg_roots) { // CG root not set
4309 // Make new thread's CG root same as primary thread's
4310 KMP_DEBUG_ASSERT(master->th.th_cg_roots)if (!(master->th.th_cg_roots)) { __kmp_debug_assert("master->th.th_cg_roots"
, "openmp/runtime/src/kmp_runtime.cpp", 4310); }
;
4311 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4312 if (tmp) {
4313 // worker changes CG, need to check if old CG should be freed
4314 int i = tmp->cg_nthreads--;
4315 KA_TRACE(100, ("__kmp_initialize_info: Thread %p decrement cg_nthreads"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p decrement cg_nthreads"
" on node %p of thread %p to %d\n", this_thr, tmp, tmp->cg_root
, tmp->cg_nthreads); }
4316 " on node %p of thread %p to %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p decrement cg_nthreads"
" on node %p of thread %p to %d\n", this_thr, tmp, tmp->cg_root
, tmp->cg_nthreads); }
4317 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p decrement cg_nthreads"
" on node %p of thread %p to %d\n", this_thr, tmp, tmp->cg_root
, tmp->cg_nthreads); }
;
4318 if (i == 1) {
4319 __kmp_free(tmp)___kmp_free((tmp), "openmp/runtime/src/kmp_runtime.cpp", 4319
)
; // last thread left CG --> free it
4320 }
4321 }
4322 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4323 // Increment new thread's CG root's counter to add the new thread
4324 this_thr->th.th_cg_roots->cg_nthreads++;
4325 KA_TRACE(100, ("__kmp_initialize_info: Thread %p increment cg_nthreads on"if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p increment cg_nthreads on"
" node %p of thread %p to %d\n", this_thr, this_thr->th.th_cg_roots
, this_thr->th.th_cg_roots->cg_root, this_thr->th.th_cg_roots
->cg_nthreads); }
4326 " node %p of thread %p to %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p increment cg_nthreads on"
" node %p of thread %p to %d\n", this_thr, this_thr->th.th_cg_roots
, this_thr->th.th_cg_roots->cg_root, this_thr->th.th_cg_roots
->cg_nthreads); }
4327 this_thr, this_thr->th.th_cg_roots,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p increment cg_nthreads on"
" node %p of thread %p to %d\n", this_thr, this_thr->th.th_cg_roots
, this_thr->th.th_cg_roots->cg_root, this_thr->th.th_cg_roots
->cg_nthreads); }
4328 this_thr->th.th_cg_roots->cg_root,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p increment cg_nthreads on"
" node %p of thread %p to %d\n", this_thr, this_thr->th.th_cg_roots
, this_thr->th.th_cg_roots->cg_root, this_thr->th.th_cg_roots
->cg_nthreads); }
4329 this_thr->th.th_cg_roots->cg_nthreads))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_initialize_info: Thread %p increment cg_nthreads on"
" node %p of thread %p to %d\n", this_thr, this_thr->th.th_cg_roots
, this_thr->th.th_cg_roots->cg_root, this_thr->th.th_cg_roots
->cg_nthreads); }
;
4330 this_thr->th.th_current_task->td_icvs.thread_limit =
4331 this_thr->th.th_cg_roots->cg_thread_limit;
4332 }
4333
4334 /* Initialize dynamic dispatch */
4335 {
4336 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4337 // Use team max_nproc since this will never change for the team.
4338 size_t disp_size =
4339 sizeof(dispatch_private_info_t) *
4340 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4341 KD_TRACE(10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_info: T#%d max_nproc: %d\n"
, gtid, team->t.t_max_nproc); }
4342 team->t.t_max_nproc))if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_info: T#%d max_nproc: %d\n"
, gtid, team->t.t_max_nproc); }
;
4343 KMP_ASSERT(dispatch)if (!(dispatch)) { __kmp_debug_assert("dispatch", "openmp/runtime/src/kmp_runtime.cpp"
, 4343); }
;
4344 KMP_DEBUG_ASSERT(team->t.t_dispatch)if (!(team->t.t_dispatch)) { __kmp_debug_assert("team->t.t_dispatch"
, "openmp/runtime/src/kmp_runtime.cpp", 4344); }
;
4345 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid])if (!(dispatch == &team->t.t_dispatch[tid])) { __kmp_debug_assert
("dispatch == &team->t.t_dispatch[tid]", "openmp/runtime/src/kmp_runtime.cpp"
, 4345); }
;
4346
4347 dispatch->th_disp_index = 0;
4348 dispatch->th_doacross_buf_idx = 0;
4349 if (!dispatch->th_disp_buffer) {
4350 dispatch->th_disp_buffer =
4351 (dispatch_private_info_t *)__kmp_allocate(disp_size)___kmp_allocate((disp_size), "openmp/runtime/src/kmp_runtime.cpp"
, 4351)
;
4352
4353 if (__kmp_storage_map) {
4354 __kmp_print_storage_map_gtid(
4355 gtid, &dispatch->th_disp_buffer[0],
4356 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4357 ? 1
4358 : __kmp_dispatch_num_buffers],
4359 disp_size,
4360 "th_%d.th_dispatch.th_disp_buffer "
4361 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4362 gtid, team->t.t_id, gtid);
4363 }
4364 } else {
4365 memset(&dispatch->th_disp_buffer[0], '\0', disp_size);
4366 }
4367
4368 dispatch->th_dispatch_pr_current = 0;
4369 dispatch->th_dispatch_sh_current = 0;
4370
4371 dispatch->th_deo_fcn = 0; /* ORDERED */
4372 dispatch->th_dxo_fcn = 0; /* END ORDERED */
4373 }
4374
4375 this_thr->th.th_next_pool = NULL__null;
4376
4377 if (!this_thr->th.th_task_state_memo_stack) {
4378 size_t i;
4379 this_thr->th.th_task_state_memo_stack =
4380 (kmp_uint8 *)__kmp_allocate(4 * sizeof(kmp_uint8))___kmp_allocate((4 * sizeof(kmp_uint8)), "openmp/runtime/src/kmp_runtime.cpp"
, 4380)
;
4381 this_thr->th.th_task_state_top = 0;
4382 this_thr->th.th_task_state_stack_sz = 4;
4383 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4384 ++i) // zero init the stack
4385 this_thr->th.th_task_state_memo_stack[i] = 0;
4386 }
4387
4388 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here)if (!(!this_thr->th.th_spin_here)) { __kmp_debug_assert("!this_thr->th.th_spin_here"
, "openmp/runtime/src/kmp_runtime.cpp", 4388); }
;
4389 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0)if (!(this_thr->th.th_next_waiting == 0)) { __kmp_debug_assert
("this_thr->th.th_next_waiting == 0", "openmp/runtime/src/kmp_runtime.cpp"
, 4389); }
;
4390
4391 KMP_MB();
4392}
4393
4394/* allocate a new thread for the requesting team. this is only called from
4395 within a forkjoin critical section. we will first try to get an available
4396 thread from the thread pool. if none is available, we will fork a new one
4397 assuming we are able to create a new one. this should be assured, as the
4398 caller should check on this first. */
4399kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4400 int new_tid) {
4401 kmp_team_t *serial_team;
4402 kmp_info_t *new_thr;
4403 int new_gtid;
4404
4405 KA_TRACE(20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d\n"
, __kmp_get_global_thread_id()); }
;
4406 KMP_DEBUG_ASSERT(root && team)if (!(root && team)) { __kmp_debug_assert("root && team"
, "openmp/runtime/src/kmp_runtime.cpp", 4406); }
;
4407#if !KMP_NESTED_HOT_TEAMS1
4408 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()))if (!((0 == __kmp_tid_from_gtid((__kmp_get_global_thread_id()
))))) { __kmp_debug_assert("(0 == __kmp_tid_from_gtid((__kmp_get_global_thread_id())))"
, "openmp/runtime/src/kmp_runtime.cpp", 4408); }
;
4409#endif
4410 KMP_MB();
4411
4412 /* first, try to get one from the thread pool */
4413 if (__kmp_thread_pool) {
4414 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool)const_cast<kmp_info_t *>(__kmp_thread_pool);
4415 __kmp_thread_pool = (volatile kmp_info_t *)new_thr->th.th_next_pool;
4416 if (new_thr == __kmp_thread_pool_insert_pt) {
4417 __kmp_thread_pool_insert_pt = NULL__null;
4418 }
4419 TCW_4(new_thr->th.th_in_pool, FALSE)(new_thr->th.th_in_pool) = (0);
4420 __kmp_suspend_initialize_thread(new_thr);
4421 __kmp_lock_suspend_mx(new_thr);
4422 if (new_thr->th.th_active_in_pool == TRUE(!0)) {
4423 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE)if (!(new_thr->th.th_active == (!0))) { __kmp_debug_assert
("new_thr->th.th_active == (!0)", "openmp/runtime/src/kmp_runtime.cpp"
, 4423); }
;
4424 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth)(&__kmp_thread_pool_active_nth)->fetch_sub(1, std::memory_order_acq_rel
)
;
4425 new_thr->th.th_active_in_pool = FALSE0;
4426 }
4427 __kmp_unlock_suspend_mx(new_thr);
4428
4429 KA_TRACE(20, ("__kmp_allocate_thread: T#%d using thread T#%d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d using thread T#%d\n"
, __kmp_get_global_thread_id(), new_thr->th.th_info.ds.ds_gtid
); }
4430 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d using thread T#%d\n"
, __kmp_get_global_thread_id(), new_thr->th.th_info.ds.ds_gtid
); }
;
4431 KMP_ASSERT(!new_thr->th.th_team)if (!(!new_thr->th.th_team)) { __kmp_debug_assert("!new_thr->th.th_team"
, "openmp/runtime/src/kmp_runtime.cpp", 4431); }
;
4432 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity)if (!(__kmp_nth < __kmp_threads_capacity)) { __kmp_debug_assert
("__kmp_nth < __kmp_threads_capacity", "openmp/runtime/src/kmp_runtime.cpp"
, 4432); }
;
4433
4434 /* setup the thread structure */
4435 __kmp_initialize_info(new_thr, team, new_tid,
4436 new_thr->th.th_info.ds.ds_gtid);
4437 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team)if (!(new_thr->th.th_serial_team)) { __kmp_debug_assert("new_thr->th.th_serial_team"
, "openmp/runtime/src/kmp_runtime.cpp", 4437); }
;
4438
4439 TCW_4(__kmp_nth, __kmp_nth + 1)(__kmp_nth) = (__kmp_nth + 1);
4440
4441 new_thr->th.th_task_state = 0;
4442 new_thr->th.th_task_state_top = 0;
4443 new_thr->th.th_task_state_stack_sz = 4;
4444
4445 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
4446 // Make sure pool thread has transitioned to waiting on own thread struct
4447 KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0)if (!(new_thr->th.th_used_in_team.load() == 0)) { __kmp_debug_assert
("new_thr->th.th_used_in_team.load() == 0", "openmp/runtime/src/kmp_runtime.cpp"
, 4447); }
;
4448 // Thread activated in __kmp_allocate_team when increasing team size
4449 }
4450
4451#ifdef KMP_ADJUST_BLOCKTIME1
4452 /* Adjust blocktime back to zero if necessary */
4453 /* Middle initialization might not have occurred yet */
4454 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4455 if (__kmp_nth > __kmp_avail_proc) {
4456 __kmp_zero_bt = TRUE(!0);
4457 }
4458 }
4459#endif /* KMP_ADJUST_BLOCKTIME */
4460
4461#if KMP_DEBUG1
4462 // If thread entered pool via __kmp_free_thread, wait_flag should !=
4463 // KMP_BARRIER_PARENT_FLAG.
4464 int b;
4465 kmp_balign_t *balign = new_thr->th.th_bar;
4466 for (b = 0; b < bs_last_barrier; ++b)
4467 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG)if (!(balign[b].bb.wait_flag != 2)) { __kmp_debug_assert("balign[b].bb.wait_flag != 2"
, "openmp/runtime/src/kmp_runtime.cpp", 4467); }
;
4468#endif
4469
4470 KF_TRACE(10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d using thread %p T#%d\n"
, __kmp_get_global_thread_id(), new_thr, new_thr->th.th_info
.ds.ds_gtid); }
4471 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d using thread %p T#%d\n"
, __kmp_get_global_thread_id(), new_thr, new_thr->th.th_info
.ds.ds_gtid); }
;
4472
4473 KMP_MB();
4474 return new_thr;
4475 }
4476
4477 /* no, well fork a new one */
4478 KMP_ASSERT(__kmp_nth == __kmp_all_nth)if (!(__kmp_nth == __kmp_all_nth)) { __kmp_debug_assert("__kmp_nth == __kmp_all_nth"
, "openmp/runtime/src/kmp_runtime.cpp", 4478); }
;
4479 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity)if (!(__kmp_all_nth < __kmp_threads_capacity)) { __kmp_debug_assert
("__kmp_all_nth < __kmp_threads_capacity", "openmp/runtime/src/kmp_runtime.cpp"
, 4479); }
;
4480
4481#if KMP_USE_MONITOR
4482 // If this is the first worker thread the RTL is creating, then also
4483 // launch the monitor thread. We try to do this as early as possible.
4484 if (!TCR_4(__kmp_init_monitor)(__kmp_init_monitor)) {
4485 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4486 if (!TCR_4(__kmp_init_monitor)(__kmp_init_monitor)) {
4487 KF_TRACE(10, ("before __kmp_create_monitor\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("before __kmp_create_monitor\n"
); }
;
4488 TCW_4(__kmp_init_monitor, 1)(__kmp_init_monitor) = (1);
4489 __kmp_create_monitor(&__kmp_monitor);
4490 KF_TRACE(10, ("after __kmp_create_monitor\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("after __kmp_create_monitor\n"
); }
;
4491#if KMP_OS_WINDOWS0
4492 // AC: wait until monitor has started. This is a fix for CQ232808.
4493 // The reason is that if the library is loaded/unloaded in a loop with
4494 // small (parallel) work in between, then there is high probability that
4495 // monitor thread started after the library shutdown. At shutdown it is
4496 // too late to cope with the problem, because when the primary thread is
4497 // in DllMain (process detach) the monitor has no chances to start (it is
4498 // blocked), and primary thread has no means to inform the monitor that
4499 // the library has gone, because all the memory which the monitor can
4500 // access is going to be released/reset.
4501 while (TCR_4(__kmp_init_monitor)(__kmp_init_monitor) < 2) {
4502 KMP_YIELD(TRUE){ __kmp_x86_pause(); if (((!0)) && (((__kmp_use_yield
== 1) || (__kmp_use_yield == 2 && (((__kmp_nth) >
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))))))) __kmp_yield
(); }
;
4503 }
4504 KF_TRACE(10, ("after monitor thread has started\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("after monitor thread has started\n"
); }
;
4505#endif
4506 }
4507 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4508 }
4509#endif
4510
4511 KMP_MB();
4512
4513 {
4514 int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)(__kmp_init_hidden_helper_threads)
4515 ? 1
4516 : __kmp_hidden_helper_threads_num + 1;
4517
4518 for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid])((void *)(__kmp_threads[new_gtid])) != NULL__null;
4519 ++new_gtid) {
4520 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity)if (!(new_gtid < __kmp_threads_capacity)) { __kmp_debug_assert
("new_gtid < __kmp_threads_capacity", "openmp/runtime/src/kmp_runtime.cpp"
, 4520); }
;
4521 }
4522
4523 if (TCR_4(__kmp_init_hidden_helper_threads)(__kmp_init_hidden_helper_threads)) {
4524 KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num)if (!(new_gtid <= __kmp_hidden_helper_threads_num)) { __kmp_debug_assert
("new_gtid <= __kmp_hidden_helper_threads_num", "openmp/runtime/src/kmp_runtime.cpp"
, 4524); }
;
4525 }
4526 }
4527
4528 /* allocate space for it. */
4529 new_thr = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t))___kmp_allocate((sizeof(kmp_info_t)), "openmp/runtime/src/kmp_runtime.cpp"
, 4529)
;
4530
4531 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr)((__kmp_threads[new_gtid])) = ((new_thr));
4532
4533#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 && KMP_DEBUG1
4534 // suppress race conditions detection on synchronization flags in debug mode
4535 // this helps to analyze library internals eliminating false positives
4536 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(
4537 __itt_suppress_range, __itt_suppress_threading_errors0x000000ff,
4538 &new_thr->th.th_sleep_loc, sizeof(new_thr->th.th_sleep_loc));
4539 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(
4540 __itt_suppress_range, __itt_suppress_threading_errors0x000000ff,
4541 &new_thr->th.th_reap_state, sizeof(new_thr->th.th_reap_state));
4542#if KMP_OS_WINDOWS0
4543 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(
4544 __itt_suppress_range, __itt_suppress_threading_errors0x000000ff,
4545 &new_thr->th.th_suspend_init, sizeof(new_thr->th.th_suspend_init));
4546#else
4547 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(__itt_suppress_range,
4548 __itt_suppress_threading_errors0x000000ff,
4549 &new_thr->th.th_suspend_init_count,
4550 sizeof(new_thr->th.th_suspend_init_count));
4551#endif
4552 // TODO: check if we need to also suppress b_arrived flags
4553 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(__itt_suppress_range,
4554 __itt_suppress_threading_errors0x000000ff,
4555 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go)const_cast<kmp_uint64 *>(&new_thr->th.th_bar[0].
bb.b_go)
,
4556 sizeof(new_thr->th.th_bar[0].bb.b_go));
4557 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(__itt_suppress_range,
4558 __itt_suppress_threading_errors0x000000ff,
4559 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go)const_cast<kmp_uint64 *>(&new_thr->th.th_bar[1].
bb.b_go)
,
4560 sizeof(new_thr->th.th_bar[1].bb.b_go));
4561 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(__itt_suppress_range,
4562 __itt_suppress_threading_errors0x000000ff,
4563 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go)const_cast<kmp_uint64 *>(&new_thr->th.th_bar[2].
bb.b_go)
,
4564 sizeof(new_thr->th.th_bar[2].bb.b_go));
4565#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG */
4566 if (__kmp_storage_map) {
4567 __kmp_print_thread_storage_map(new_thr, new_gtid);
4568 }
4569
4570 // add the reserve serialized team, initialized from the team's primary thread
4571 {
4572 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4573 KF_TRACE(10, ("__kmp_allocate_thread: before th_serial/serial_team\n"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: before th_serial/serial_team\n"
); }
;
4574 new_thr->th.th_serial_team = serial_team =
4575 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4576#if OMPT_SUPPORT1
4577 ompt_data_none{0}, // root parallel id
4578#endif
4579 proc_bind_default, &r_icvs,
4580 0 USE_NESTED_HOT_ARG(NULL), __null);
4581 }
4582 KMP_ASSERT(serial_team)if (!(serial_team)) { __kmp_debug_assert("serial_team", "openmp/runtime/src/kmp_runtime.cpp"
, 4582); }
;
4583 serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for
4584 // execution (it is unused for now).
4585 serial_team->t.t_threads[0] = new_thr;
4586 KF_TRACE(10,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n"
, new_thr); }
4587 ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n"
, new_thr); }
4588 new_thr))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n"
, new_thr); }
;
4589
4590 /* setup the thread structures */
4591 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4592
4593#if USE_FAST_MEMORY3
4594 __kmp_initialize_fast_memory(new_thr);
4595#endif /* USE_FAST_MEMORY */
4596
4597#if KMP_USE_BGET1
4598 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL)if (!(new_thr->th.th_local.bget_data == __null)) { __kmp_debug_assert
("new_thr->th.th_local.bget_data == __null", "openmp/runtime/src/kmp_runtime.cpp"
, 4598); }
;
4599 __kmp_initialize_bget(new_thr);
4600#endif
4601
4602 __kmp_init_random(new_thr); // Initialize random number generator
4603
4604 /* Initialize these only once when thread is grabbed for a team allocation */
4605 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n"
, __kmp_get_global_thread_id(), 0, 0); }
4606 ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n"
, __kmp_get_global_thread_id(), 0, 0); }
4607 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n"
, __kmp_get_global_thread_id(), 0, 0); }
;
4608
4609 int b;
4610 kmp_balign_t *balign = new_thr->th.th_bar;
4611 for (b = 0; b < bs_last_barrier; ++b) {
4612 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE0;
4613 balign[b].bb.team = NULL__null;
4614 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING0;
4615 balign[b].bb.use_oncore_barrier = 0;
4616 }
4617
4618 TCW_PTR(new_thr->th.th_sleep_loc, NULL)((new_thr->th.th_sleep_loc)) = ((__null));
4619 new_thr->th.th_sleep_loc_type = flag_unset;
4620
4621 new_thr->th.th_spin_here = FALSE0;
4622 new_thr->th.th_next_waiting = 0;
4623#if KMP_OS_UNIX1
4624 new_thr->th.th_blocking = false;
4625#endif
4626
4627#if KMP_AFFINITY_SUPPORTED1
4628 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED(-2);
4629 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED(-2);
4630 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED(-2);
4631 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED(-2);
4632#endif
4633 new_thr->th.th_def_allocator = __kmp_def_allocator;
4634 new_thr->th.th_prev_level = 0;
4635 new_thr->th.th_prev_num_threads = 1;
4636
4637 TCW_4(new_thr->th.th_in_pool, FALSE)(new_thr->th.th_in_pool) = (0);
4638 new_thr->th.th_active_in_pool = FALSE0;
4639 TCW_4(new_thr->th.th_active, TRUE)(new_thr->th.th_active) = ((!0));
4640
4641 /* adjust the global counters */
4642 __kmp_all_nth++;
4643 __kmp_nth++;
4644
4645 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
4646 // numbers of procs, and method #2 (keyed API call) for higher numbers.
4647 if (__kmp_adjust_gtid_mode) {
4648 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4649 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) != 2) {
4650 TCW_4(__kmp_gtid_mode, 2)(__kmp_gtid_mode) = (2);
4651 }
4652 } else {
4653 if (TCR_4(__kmp_gtid_mode)(__kmp_gtid_mode) != 1) {
4654 TCW_4(__kmp_gtid_mode, 1)(__kmp_gtid_mode) = (1);
4655 }
4656 }
4657 }
4658
4659#ifdef KMP_ADJUST_BLOCKTIME1
4660 /* Adjust blocktime back to zero if necessary */
4661 /* Middle initialization might not have occurred yet */
4662 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4663 if (__kmp_nth > __kmp_avail_proc) {
4664 __kmp_zero_bt = TRUE(!0);
4665 }
4666 }
4667#endif /* KMP_ADJUST_BLOCKTIME */
4668
4669 /* actually fork it and create the new worker thread */
4670 KF_TRACE(if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: before __kmp_create_worker: %p\n"
, new_thr); }
4671 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: before __kmp_create_worker: %p\n"
, new_thr); }
;
4672 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4673 KF_TRACE(10,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: after __kmp_create_worker: %p\n"
, new_thr); }
4674 ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_thread: after __kmp_create_worker: %p\n"
, new_thr); }
;
4675
4676 KA_TRACE(20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d forked T#%d\n"
, __kmp_get_global_thread_id(), new_gtid); }
4677 new_gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_thread: T#%d forked T#%d\n"
, __kmp_get_global_thread_id(), new_gtid); }
;
4678 KMP_MB();
4679 return new_thr;
4680}
4681
4682/* Reinitialize team for reuse.
4683 The hot team code calls this case at every fork barrier, so EPCC barrier
4684 test are extremely sensitive to changes in it, esp. writes to the team
4685 struct, which cause a cache invalidation in all threads.
4686 IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! */
4687static void __kmp_reinitialize_team(kmp_team_t *team,
4688 kmp_internal_control_t *new_icvs,
4689 ident_t *loc) {
4690 KF_TRACE(10, ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n"
, team->t.t_threads[0], team); }
4691 team->t.t_threads[0], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n"
, team->t.t_threads[0], team); }
;
4692 KMP_DEBUG_ASSERT(team && new_icvs)if (!(team && new_icvs)) { __kmp_debug_assert("team && new_icvs"
, "openmp/runtime/src/kmp_runtime.cpp", 4692); }
;
4693 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc)if (!((!(__kmp_init_parallel)) || new_icvs->nproc)) { __kmp_debug_assert
("(!(__kmp_init_parallel)) || new_icvs->nproc", "openmp/runtime/src/kmp_runtime.cpp"
, 4693); }
;
4694 KMP_CHECK_UPDATE(team->t.t_ident, loc)if ((team->t.t_ident) != (loc)) (team->t.t_ident) = (loc
)
;
4695
4696 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID())if ((team->t.t_id) != ((~0))) (team->t.t_id) = ((~0));
4697 // Copy ICVs to the primary thread's implicit taskdata
4698 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE0);
4699 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4700
4701 KF_TRACE(10, ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n"
, team->t.t_threads[0], team); }
4702 team->t.t_threads[0], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n"
, team->t.t_threads[0], team); }
;
4703}
4704
4705/* Initialize the team data structure.
4706 This assumes the t_threads and t_max_nproc are already set.
4707 Also, we don't touch the arguments */
4708static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
4709 kmp_internal_control_t *new_icvs,
4710 ident_t *loc) {
4711 KF_TRACE(10, ("__kmp_initialize_team: enter: team=%p\n", team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_team: enter: team=%p\n"
, team); }
;
4712
4713 /* verify */
4714 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_runtime.cpp"
, 4714); }
;
4715 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc)if (!(new_nproc <= team->t.t_max_nproc)) { __kmp_debug_assert
("new_nproc <= team->t.t_max_nproc", "openmp/runtime/src/kmp_runtime.cpp"
, 4715); }
;
4716 KMP_DEBUG_ASSERT(team->t.t_threads)if (!(team->t.t_threads)) { __kmp_debug_assert("team->t.t_threads"
, "openmp/runtime/src/kmp_runtime.cpp", 4716); }
;
4717 KMP_MB();
4718
4719 team->t.t_master_tid = 0; /* not needed */
4720 /* team->t.t_master_bar; not needed */
4721 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4722 team->t.t_nproc = new_nproc;
4723
4724 /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */
4725 team->t.t_next_pool = NULL__null;
4726 /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess
4727 * up hot team */
4728
4729 TCW_SYNC_PTR(team->t.t_pkfn, NULL)((team->t.t_pkfn)) = ((__null)); /* not needed */
4730 team->t.t_invoke = NULL__null; /* not needed */
4731
4732 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4733 team->t.t_sched.sched = new_icvs->sched.sched;
4734
4735#if KMP_ARCH_X860 || KMP_ARCH_X86_641
4736 team->t.t_fp_control_saved = FALSE0; /* not needed */
4737 team->t.t_x87_fpu_control_word = 0; /* not needed */
4738 team->t.t_mxcsr = 0; /* not needed */
4739#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
4740
4741 team->t.t_construct = 0;
4742
4743 team->t.t_ordered.dt.t_value = 0;
4744 team->t.t_master_active = FALSE0;
4745
4746#ifdef KMP_DEBUG1
4747 team->t.t_copypriv_data = NULL__null; /* not necessary, but nice for debugging */
4748#endif
4749#if KMP_OS_WINDOWS0
4750 team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */
4751#endif
4752
4753 team->t.t_control_stack_top = NULL__null;
4754
4755 __kmp_reinitialize_team(team, new_icvs, loc);
4756
4757 KMP_MB();
4758 KF_TRACE(10, ("__kmp_initialize_team: exit: team=%p\n", team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_initialize_team: exit: team=%p\n"
, team); }
;
4759}
4760
4761#if (KMP_OS_LINUX1 || KMP_OS_FREEBSD0) && KMP_AFFINITY_SUPPORTED1
4762/* Sets full mask for thread and returns old mask, no changes to structures. */
4763static void
4764__kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4765 if (KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {
4766 int status;
4767 if (old_mask != NULL__null) {
4768 status = __kmp_get_system_affinity(old_mask, TRUE)(old_mask)->get_system_affinity((!0));
4769 int error = errno(*__errno_location ());
4770 if (status != 0) {
4771 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError)__kmp_msg_format(kmp_i18n_msg_ChangeThreadAffMaskError), KMP_ERR(error)__kmp_msg_error_code(error),
4772 __kmp_msg_null);
4773 }
4774 }
4775 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE)(__kmp_affin_fullMask)->set_system_affinity((!0));
4776 }
4777}
4778#endif
4779
4780#if KMP_AFFINITY_SUPPORTED1
4781
4782// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
4783// It calculates the worker + primary thread's partition based upon the parent
4784// thread's partition, and binds each worker to a thread in their partition.
4785// The primary thread's partition should already include its current binding.
4786static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
4787 // Do not partition places for the hidden helper team
4788 if (KMP_HIDDEN_HELPER_TEAM(team)(team->t.t_threads[0] == __kmp_hidden_helper_main_thread))
4789 return;
4790 // Copy the primary thread's place partition to the team struct
4791 kmp_info_t *master_th = team->t.t_threads[0];
4792 KMP_DEBUG_ASSERT(master_th != NULL)if (!(master_th != __null)) { __kmp_debug_assert("master_th != __null"
, "openmp/runtime/src/kmp_runtime.cpp", 4792); }
;
4793 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4794 int first_place = master_th->th.th_first_place;
4795 int last_place = master_th->th.th_last_place;
4796 int masters_place = master_th->th.th_current_place;
4797 int num_masks = __kmp_affinity.num_masks;
4798 team->t.t_first_place = first_place;
4799 team->t.t_last_place = last_place;
4800
4801 KA_TRACE(20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
"bound to place %d partition = [%d,%d]\n", proc_bind, __kmp_gtid_from_thread
(team->t.t_threads[0]), team->t.t_id, masters_place, first_place
, last_place); }
4802 "bound to place %d partition = [%d,%d]\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
"bound to place %d partition = [%d,%d]\n", proc_bind, __kmp_gtid_from_thread
(team->t.t_threads[0]), team->t.t_id, masters_place, first_place
, last_place); }
4803 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
"bound to place %d partition = [%d,%d]\n", proc_bind, __kmp_gtid_from_thread
(team->t.t_threads[0]), team->t.t_id, masters_place, first_place
, last_place); }
4804 team->t.t_id, masters_place, first_place, last_place))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
"bound to place %d partition = [%d,%d]\n", proc_bind, __kmp_gtid_from_thread
(team->t.t_threads[0]), team->t.t_id, masters_place, first_place
, last_place); }
;
4805
4806 switch (proc_bind) {
4807
4808 case proc_bind_default:
4809 // Serial teams might have the proc_bind policy set to proc_bind_default.
4810 // Not an issue -- we don't rebind primary thread for any proc_bind policy.
4811 KMP_DEBUG_ASSERT(team->t.t_nproc == 1)if (!(team->t.t_nproc == 1)) { __kmp_debug_assert("team->t.t_nproc == 1"
, "openmp/runtime/src/kmp_runtime.cpp", 4811); }
;
4812 break;
4813
4814 case proc_bind_primary: {
4815 int f;
4816 int n_th = team->t.t_nproc;
4817 for (f = 1; f < n_th; f++) {
4818 kmp_info_t *th = team->t.t_threads[f];
4819 KMP_DEBUG_ASSERT(th != NULL)if (!(th != __null)) { __kmp_debug_assert("th != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 4819); }
;
4820 th->th.th_first_place = first_place;
4821 th->th.th_last_place = last_place;
4822 th->th.th_new_place = masters_place;
4823 if (__kmp_display_affinity && masters_place != th->th.th_current_place &&
4824 team->t.t_display_affinity != 1) {
4825 team->t.t_display_affinity = 1;
4826 }
4827
4828 KA_TRACE(100, ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, masters_place, first_place, last_place
); }
4829 "partition = [%d,%d]\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, masters_place, first_place, last_place
); }
4830 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, masters_place, first_place, last_place
); }
4831 f, masters_place, first_place, last_place))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, masters_place, first_place, last_place
); }
;
4832 }
4833 } break;
4834
4835 case proc_bind_close: {
4836 int f;
4837 int n_th = team->t.t_nproc;
4838 int n_places;
4839 if (first_place <= last_place) {
4840 n_places = last_place - first_place + 1;
4841 } else {
4842 n_places = num_masks - first_place + last_place + 1;
4843 }
4844 if (n_th <= n_places) {
4845 int place = masters_place;
4846 for (f = 1; f < n_th; f++) {
4847 kmp_info_t *th = team->t.t_threads[f];
4848 KMP_DEBUG_ASSERT(th != NULL)if (!(th != __null)) { __kmp_debug_assert("th != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 4848); }
;
4849
4850 if (place == last_place) {
4851 place = first_place;
4852 } else if (place == (num_masks - 1)) {
4853 place = 0;
4854 } else {
4855 place++;
4856 }
4857 th->th.th_first_place = first_place;
4858 th->th.th_last_place = last_place;
4859 th->th.th_new_place = place;
4860 if (__kmp_display_affinity && place != th->th.th_current_place &&
4861 team->t.t_display_affinity != 1) {
4862 team->t.t_display_affinity = 1;
4863 }
4864
4865 KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, place, first_place, last_place); }
4866 "partition = [%d,%d]\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, place, first_place, last_place); }
4867 __kmp_gtid_from_thread(team->t.t_threads[f]),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, place, first_place, last_place); }
4868 team->t.t_id, f, place, first_place, last_place))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, place, first_place, last_place); }
;
4869 }
4870 } else {
4871 int S, rem, gap, s_count;
4872 S = n_th / n_places;
4873 s_count = 0;
4874 rem = n_th - (S * n_places);
4875 gap = rem > 0 ? n_places / rem : n_places;
4876 int place = masters_place;
4877 int gap_ct = gap;
4878 for (f = 0; f < n_th; f++) {
4879 kmp_info_t *th = team->t.t_threads[f];
4880 KMP_DEBUG_ASSERT(th != NULL)if (!(th != __null)) { __kmp_debug_assert("th != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 4880); }
;
4881
4882 th->th.th_first_place = first_place;
4883 th->th.th_last_place = last_place;
4884 th->th.th_new_place = place;
4885 if (__kmp_display_affinity && place != th->th.th_current_place &&
4886 team->t.t_display_affinity != 1) {
4887 team->t.t_display_affinity = 1;
4888 }
4889 s_count++;
4890
4891 if ((s_count == S) && rem && (gap_ct == gap)) {
4892 // do nothing, add an extra thread to place on next iteration
4893 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4894 // we added an extra thread to this place; move to next place
4895 if (place == last_place) {
4896 place = first_place;
4897 } else if (place == (num_masks - 1)) {
4898 place = 0;
4899 } else {
4900 place++;
4901 }
4902 s_count = 0;
4903 gap_ct = 1;
4904 rem--;
4905 } else if (s_count == S) { // place full; don't add extra
4906 if (place == last_place) {
4907 place = first_place;
4908 } else if (place == (num_masks - 1)) {
4909 place = 0;
4910 } else {
4911 place++;
4912 }
4913 gap_ct++;
4914 s_count = 0;
4915 }
4916
4917 KA_TRACE(100,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, first_place
, last_place); }
4918 ("__kmp_partition_places: close: T#%d(%d:%d) place %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, first_place
, last_place); }
4919 "partition = [%d,%d]\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, first_place
, last_place); }
4920 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, first_place
, last_place); }
4921 th->th.th_new_place, first_place, last_place))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, first_place
, last_place); }
;
4922 }
4923 KMP_DEBUG_ASSERT(place == masters_place)if (!(place == masters_place)) { __kmp_debug_assert("place == masters_place"
, "openmp/runtime/src/kmp_runtime.cpp", 4923); }
;
4924 }
4925 } break;
4926
4927 case proc_bind_spread: {
4928 int f;
4929 int n_th = team->t.t_nproc;
4930 int n_places;
4931 int thidx;
4932 if (first_place <= last_place) {
4933 n_places = last_place - first_place + 1;
4934 } else {
4935 n_places = num_masks - first_place + last_place + 1;
4936 }
4937 if (n_th <= n_places) {
4938 int place = -1;
4939
4940 if (n_places != num_masks) {
4941 int S = n_places / n_th;
4942 int s_count, rem, gap, gap_ct;
4943
4944 place = masters_place;
4945 rem = n_places - n_th * S;
4946 gap = rem ? n_th / rem : 1;
4947 gap_ct = gap;
4948 thidx = n_th;
4949 if (update_master_only == 1)
4950 thidx = 1;
4951 for (f = 0; f < thidx; f++) {
4952 kmp_info_t *th = team->t.t_threads[f];
4953 KMP_DEBUG_ASSERT(th != NULL)if (!(th != __null)) { __kmp_debug_assert("th != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 4953); }
;
4954
4955 th->th.th_first_place = place;
4956 th->th.th_new_place = place;
4957 if (__kmp_display_affinity && place != th->th.th_current_place &&
4958 team->t.t_display_affinity != 1) {
4959 team->t.t_display_affinity = 1;
4960 }
4961 s_count = 1;
4962 while (s_count < S) {
4963 if (place == last_place) {
4964 place = first_place;
4965 } else if (place == (num_masks - 1)) {
4966 place = 0;
4967 } else {
4968 place++;
4969 }
4970 s_count++;
4971 }
4972 if (rem && (gap_ct == gap)) {
4973 if (place == last_place) {
4974 place = first_place;
4975 } else if (place == (num_masks - 1)) {
4976 place = 0;
4977 } else {
4978 place++;
4979 }
4980 rem--;
4981 gap_ct = 0;
4982 }
4983 th->th.th_last_place = place;
4984 gap_ct++;
4985
4986 if (place == last_place) {
4987 place = first_place;
4988 } else if (place == (num_masks - 1)) {
4989 place = 0;
4990 } else {
4991 place++;
4992 }
4993
4994 KA_TRACE(100,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], num_masks: %u\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, num_masks
); }
4995 ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], num_masks: %u\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, num_masks
); }
4996 "partition = [%d,%d], num_masks: %u\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], num_masks: %u\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, num_masks
); }
4997 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], num_masks: %u\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, num_masks
); }
4998 f, th->th.th_new_place, th->th.th_first_place,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], num_masks: %u\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, num_masks
); }
4999 th->th.th_last_place, num_masks))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], num_masks: %u\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, num_masks
); }
;
5000 }
5001 } else {
5002 /* Having uniform space of available computation places I can create
5003 T partitions of round(P/T) size and put threads into the first
5004 place of each partition. */
5005 double current = static_cast<double>(masters_place);
5006 double spacing =
5007 (static_cast<double>(n_places + 1) / static_cast<double>(n_th));
5008 int first, last;
5009 kmp_info_t *th;
5010
5011 thidx = n_th + 1;
5012 if (update_master_only == 1)
5013 thidx = 1;
5014 for (f = 0; f < thidx; f++) {
5015 first = static_cast<int>(current);
5016 last = static_cast<int>(current + spacing) - 1;
5017 KMP_DEBUG_ASSERT(last >= first)if (!(last >= first)) { __kmp_debug_assert("last >= first"
, "openmp/runtime/src/kmp_runtime.cpp", 5017); }
;
5018 if (first >= n_places) {
5019 if (masters_place) {
5020 first -= n_places;
5021 last -= n_places;
5022 if (first == (masters_place + 1)) {
5023 KMP_DEBUG_ASSERT(f == n_th)if (!(f == n_th)) { __kmp_debug_assert("f == n_th", "openmp/runtime/src/kmp_runtime.cpp"
, 5023); }
;
5024 first--;
5025 }
5026 if (last == masters_place) {
5027 KMP_DEBUG_ASSERT(f == (n_th - 1))if (!(f == (n_th - 1))) { __kmp_debug_assert("f == (n_th - 1)"
, "openmp/runtime/src/kmp_runtime.cpp", 5027); }
;
5028 last--;
5029 }
5030 } else {
5031 KMP_DEBUG_ASSERT(f == n_th)if (!(f == n_th)) { __kmp_debug_assert("f == n_th", "openmp/runtime/src/kmp_runtime.cpp"
, 5031); }
;
5032 first = 0;
5033 last = 0;
5034 }
5035 }
5036 if (last >= n_places) {
5037 last = (n_places - 1);
5038 }
5039 place = first;
5040 current += spacing;
5041 if (f < n_th) {
5042 KMP_DEBUG_ASSERT(0 <= first)if (!(0 <= first)) { __kmp_debug_assert("0 <= first", "openmp/runtime/src/kmp_runtime.cpp"
, 5042); }
;
5043 KMP_DEBUG_ASSERT(n_places > first)if (!(n_places > first)) { __kmp_debug_assert("n_places > first"
, "openmp/runtime/src/kmp_runtime.cpp", 5043); }
;
5044 KMP_DEBUG_ASSERT(0 <= last)if (!(0 <= last)) { __kmp_debug_assert("0 <= last", "openmp/runtime/src/kmp_runtime.cpp"
, 5044); }
;
5045 KMP_DEBUG_ASSERT(n_places > last)if (!(n_places > last)) { __kmp_debug_assert("n_places > last"
, "openmp/runtime/src/kmp_runtime.cpp", 5045); }
;
5046 KMP_DEBUG_ASSERT(last_place >= first_place)if (!(last_place >= first_place)) { __kmp_debug_assert("last_place >= first_place"
, "openmp/runtime/src/kmp_runtime.cpp", 5046); }
;
5047 th = team->t.t_threads[f];
5048 KMP_DEBUG_ASSERT(th)if (!(th)) { __kmp_debug_assert("th", "openmp/runtime/src/kmp_runtime.cpp"
, 5048); }
;
5049 th->th.th_first_place = first;
5050 th->th.th_new_place = place;
5051 th->th.th_last_place = last;
5052 if (__kmp_display_affinity && place != th->th.th_current_place &&
5053 team->t.t_display_affinity != 1) {
5054 team->t.t_display_affinity = 1;
5055 }
5056 KA_TRACE(100,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, spacing)
; }
5057 ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, spacing)
; }
5058 "partition = [%d,%d], spacing = %.4f\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, spacing)
; }
5059 __kmp_gtid_from_thread(team->t.t_threads[f]),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, spacing)
; }
5060 team->t.t_id, f, th->th.th_new_place,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, spacing)
; }
5061 th->th.th_first_place, th->th.th_last_place, spacing))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n", __kmp_gtid_from_thread
(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place
, th->th.th_first_place, th->th.th_last_place, spacing)
; }
;
5062 }
5063 }
5064 }
5065 KMP_DEBUG_ASSERT(update_master_only || place == masters_place)if (!(update_master_only || place == masters_place)) { __kmp_debug_assert
("update_master_only || place == masters_place", "openmp/runtime/src/kmp_runtime.cpp"
, 5065); }
;
5066 } else {
5067 int S, rem, gap, s_count;
5068 S = n_th / n_places;
5069 s_count = 0;
5070 rem = n_th - (S * n_places);
5071 gap = rem > 0 ? n_places / rem : n_places;
5072 int place = masters_place;
5073 int gap_ct = gap;
5074 thidx = n_th;
5075 if (update_master_only == 1)
5076 thidx = 1;
5077 for (f = 0; f < thidx; f++) {
5078 kmp_info_t *th = team->t.t_threads[f];
5079 KMP_DEBUG_ASSERT(th != NULL)if (!(th != __null)) { __kmp_debug_assert("th != __null", "openmp/runtime/src/kmp_runtime.cpp"
, 5079); }
;
5080
5081 th->th.th_first_place = place;
5082 th->th.th_last_place = place;
5083 th->th.th_new_place = place;
5084 if (__kmp_display_affinity && place != th->th.th_current_place &&
5085 team->t.t_display_affinity != 1) {
5086 team->t.t_display_affinity = 1;
5087 }
5088 s_count++;
5089
5090 if ((s_count == S) && rem && (gap_ct == gap)) {
5091 // do nothing, add an extra thread to place on next iteration
5092 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
5093 // we added an extra thread to this place; move on to next place
5094 if (place == last_place) {
5095 place = first_place;
5096 } else if (place == (num_masks - 1)) {
5097 place = 0;
5098 } else {
5099 place++;
5100 }
5101 s_count = 0;
5102 gap_ct = 1;
5103 rem--;
5104 } else if (s_count == S) { // place is full; don't add extra thread
5105 if (place == last_place) {
5106 place = first_place;
5107 } else if (place == (num_masks - 1)) {
5108 place = 0;
5109 } else {
5110 place++;
5111 }
5112 gap_ct++;
5113 s_count = 0;
5114 }
5115
5116 KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, th->th.th_first_place
, th->th.th_last_place); }
5117 "partition = [%d,%d]\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, th->th.th_first_place
, th->th.th_last_place); }
5118 __kmp_gtid_from_thread(team->t.t_threads[f]),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, th->th.th_first_place
, th->th.th_last_place); }
5119 team->t.t_id, f, th->th.th_new_place,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, th->th.th_first_place
, th->th.th_last_place); }
5120 th->th.th_first_place, th->th.th_last_place))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n", __kmp_gtid_from_thread(team->t.t_threads
[f]), team->t.t_id, f, th->th.th_new_place, th->th.th_first_place
, th->th.th_last_place); }
;
5121 }
5122 KMP_DEBUG_ASSERT(update_master_only || place == masters_place)if (!(update_master_only || place == masters_place)) { __kmp_debug_assert
("update_master_only || place == masters_place", "openmp/runtime/src/kmp_runtime.cpp"
, 5122); }
;
5123 }
5124 } break;
5125
5126 default:
5127 break;
5128 }
5129
5130 KA_TRACE(20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_partition_places: exit T#%d\n"
, team->t.t_id); }
;
5131}
5132
5133#endif // KMP_AFFINITY_SUPPORTED
5134
5135/* allocate a new team data structure to use. take one off of the free pool if
5136 available */
5137kmp_team_t *
5138__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
5139#if OMPT_SUPPORT1
5140 ompt_data_t ompt_parallel_data,
5141#endif
5142 kmp_proc_bind_t new_proc_bind,
5143 kmp_internal_control_t *new_icvs,
5144 int argc USE_NESTED_HOT_ARG(kmp_info_t *master), kmp_info_t *master) {
5145 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team)((void)0);
5146 int f;
5147 kmp_team_t *team;
5148 int use_hot_team = !root->r.r_active;
5149 int level = 0;
5150 int do_place_partition = 1;
5151
5152 KA_TRACE(20, ("__kmp_allocate_team: called\n"))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: called\n"
); }
;
5153 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0)if (!(new_nproc >= 1 && argc >= 0)) { __kmp_debug_assert
("new_nproc >= 1 && argc >= 0", "openmp/runtime/src/kmp_runtime.cpp"
, 5153); }
;
5154 KMP_DEBUG_ASSERT(max_nproc >= new_nproc)if (!(max_nproc >= new_nproc)) { __kmp_debug_assert("max_nproc >= new_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5154); }
;
5155 KMP_MB();
5156
5157#if KMP_NESTED_HOT_TEAMS1
5158 kmp_hot_team_ptr_t *hot_teams;
5159 if (master) {
5160 team = master->th.th_team;
5161 level = team->t.t_active_level;
5162 if (master->th.th_teams_microtask) { // in teams construct?
5163 if (master->th.th_teams_size.nteams > 1 &&
5164 ( // #teams > 1
5165 team->t.t_pkfn ==
5166 (microtask_t)__kmp_teams_master || // inner fork of the teams
5167 master->th.th_teams_level <
5168 team->t.t_level)) { // or nested parallel inside the teams
5169 ++level; // not increment if #teams==1, or for outer fork of the teams;
5170 // increment otherwise
5171 }
5172 // Do not perform the place partition if inner fork of the teams
5173 // Wait until nested parallel region encountered inside teams construct
5174 if ((master->th.th_teams_size.nteams == 1 &&
5175 master->th.th_teams_level >= team->t.t_level) ||
5176 (team->t.t_pkfn == (microtask_t)__kmp_teams_master))
5177 do_place_partition = 0;
5178 }
5179 hot_teams = master->th.th_hot_teams;
5180 if (level < __kmp_hot_teams_max_level && hot_teams &&
5181 hot_teams[level].hot_team) {
5182 // hot team has already been allocated for given level
5183 use_hot_team = 1;
5184 } else {
5185 use_hot_team = 0;
5186 }
5187 } else {
5188 // check we won't access uninitialized hot_teams, just in case
5189 KMP_DEBUG_ASSERT(new_nproc == 1)if (!(new_nproc == 1)) { __kmp_debug_assert("new_nproc == 1",
"openmp/runtime/src/kmp_runtime.cpp", 5189); }
;
5190 }
5191#endif
5192 // Optimization to use a "hot" team
5193 if (use_hot_team && new_nproc > 1) {
5194 KMP_DEBUG_ASSERT(new_nproc <= max_nproc)if (!(new_nproc <= max_nproc)) { __kmp_debug_assert("new_nproc <= max_nproc"
, "openmp/runtime/src/kmp_runtime.cpp", 5194); }
;
5195#if KMP_NESTED_HOT_TEAMS1
5196 team = hot_teams[level].hot_team;
5197#else
5198 team = root->r.r_hot_team;
5199#endif
5200#if KMP_DEBUG1
5201 if (__kmp_tasking_mode != tskm_immediate_exec) {
5202 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: hot team task_team[0] = %p "
"task_team[1] = %p before reinit\n", team->t.t_task_team[
0], team->t.t_task_team[1]); }
5203 "task_team[1] = %p before reinit\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: hot team task_team[0] = %p "
"task_team[1] = %p before reinit\n", team->t.t_task_team[
0], team->t.t_task_team[1]); }
5204 team->t.t_task_team[0], team->t.t_task_team[1]))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: hot team task_team[0] = %p "
"task_team[1] = %p before reinit\n", team->t.t_task_team[
0], team->t.t_task_team[1]); }
;
5205 }
5206#endif
5207
5208 if (team->t.t_nproc != new_nproc &&
5209 __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5210 // Distributed barrier may need a resize
5211 int old_nthr = team->t.t_nproc;
5212 __kmp_resize_dist_barrier(team, old_nthr, new_nproc);
5213 }
5214
5215 // If not doing the place partition, then reset the team's proc bind
5216 // to indicate that partitioning of all threads still needs to take place
5217 if (do_place_partition == 0)
5218 team->t.t_proc_bind = proc_bind_default;
5219 // Has the number of threads changed?
5220 /* Let's assume the most common case is that the number of threads is
5221 unchanged, and put that case first. */
5222 if (team->t.t_nproc == new_nproc) { // Check changes in number of threads
5223 KA_TRACE(20, ("__kmp_allocate_team: reusing hot team\n"))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_team: reusing hot team\n"
); }
;
5224 // This case can mean that omp_set_num_threads() was called and the hot
5225 // team size was already reduced, so we check the special flag
5226 if (team->t.t_size_changed == -1) {
5227 team->t.t_size_changed = 1;
5228 } else {
5229 KMP_CHECK_UPDATE(team->t.t_size_changed, 0)if ((team->t.t_size_changed) != (0)) (team->t.t_size_changed
) = (0)
;
5230 }
5231
5232 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
5233 kmp_r_sched_t new_sched = new_icvs->sched;
5234 // set primary thread's schedule as new run-time schedule
5235 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched)if ((team->t.t_sched.sched) != (new_sched.sched)) (team->
t.t_sched.sched) = (new_sched.sched)
;
5236
5237 __kmp_reinitialize_team(team, new_icvs,
5238 root->r.r_uber_thread->th.th_ident);
5239
5240 KF_TRACE(10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n"
, 0, team->t.t_threads[0], team); }
5241 team->t.t_threads[0], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n"
, 0, team->t.t_threads[0], team); }
;
5242 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5243
5244#if KMP_AFFINITY_SUPPORTED1
5245 if ((team->t.t_size_changed == 0) &&
5246 (team->t.t_proc_bind == new_proc_bind)) {
5247 if (new_proc_bind == proc_bind_spread) {
5248 if (do_place_partition) {
5249 // add flag to update only master for spread
5250 __kmp_partition_places(team, 1);
5251 }
5252 }
5253 KA_TRACE(200, ("__kmp_allocate_team: reusing hot team #%d bindings: "if (kmp_a_debug >= 200) { __kmp_debug_printf ("__kmp_allocate_team: reusing hot team #%d bindings: "
"proc_bind = %d, partition = [%d,%d]\n", team->t.t_id, new_proc_bind
, team->t.t_first_place, team->t.t_last_place); }
5254 "proc_bind = %d, partition = [%d,%d]\n",if (kmp_a_debug >= 200) { __kmp_debug_printf ("__kmp_allocate_team: reusing hot team #%d bindings: "
"proc_bind = %d, partition = [%d,%d]\n", team->t.t_id, new_proc_bind
, team->t.t_first_place, team->t.t_last_place); }
5255 team->t.t_id, new_proc_bind, team->t.t_first_place,if (kmp_a_debug >= 200) { __kmp_debug_printf ("__kmp_allocate_team: reusing hot team #%d bindings: "
"proc_bind = %d, partition = [%d,%d]\n", team->t.t_id, new_proc_bind
, team->t.t_first_place, team->t.t_last_place); }
5256 team->t.t_last_place))if (kmp_a_debug >= 200) { __kmp_debug_printf ("__kmp_allocate_team: reusing hot team #%d bindings: "
"proc_bind = %d, partition = [%d,%d]\n", team->t.t_id, new_proc_bind
, team->t.t_first_place, team->t.t_last_place); }
;