Bug Summary

File:build/source/openmp/runtime/src/kmp_dispatch.cpp
Warning:line 335, column 26
Division by zero

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name kmp_dispatch.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-16/lib/clang/16.0.0 -I projects/openmp/runtime/src -I /build/source/openmp/runtime/src -I include -I /build/source/llvm/include -I /build/source/openmp/runtime/src/i18n -I /build/source/openmp/runtime/src/include -I /build/source/openmp/runtime/src/thirdparty/ittnotify -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D omp_EXPORTS -D _FORTIFY_SOURCE=2 -D NDEBUG -D _GNU_SOURCE -D _REENTRANT -D _FORTIFY_SOURCE=2 -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-16/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/source/= -source-date-epoch 1670066131 -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -Wno-enum-constexpr-conversion -Wno-extra -Wno-pedantic -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-covered-switch-default -Wno-frame-address -Wno-strict-aliasing -Wno-stringop-truncation -Wno-switch -Wno-uninitialized -Wno-return-type-c-linkage -Wno-cast-qual -Wno-int-to-void-pointer-cast -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fno-rtti -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-12-03-132955-15984-1 -x c++ /build/source/openmp/runtime/src/kmp_dispatch.cpp
1/*
2 * kmp_dispatch.cpp: dynamic scheduling - iteration initialization and dispatch.
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13/* Dynamic scheduling initialization and dispatch.
14 *
15 * NOTE: __kmp_nth is a constant inside of any dispatch loop, however
16 * it may change values between parallel regions. __kmp_max_nth
17 * is the largest value __kmp_nth may take, 1 is the smallest.
18 */
19
20#include "kmp.h"
21#include "kmp_error.h"
22#include "kmp_i18n.h"
23#include "kmp_itt.h"
24#include "kmp_stats.h"
25#include "kmp_str.h"
26#if KMP_USE_X87CONTROL0
27#include <float.h>
28#endif
29#include "kmp_lock.h"
30#include "kmp_dispatch.h"
31#if KMP_USE_HIER_SCHED0
32#include "kmp_dispatch_hier.h"
33#endif
34
35#if OMPT_SUPPORT1
36#include "ompt-specific.h"
37#endif
38
39/* ------------------------------------------------------------------------ */
40/* ------------------------------------------------------------------------ */
41
42void __kmp_dispatch_deo_error(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
43 kmp_info_t *th;
44
45 KMP_DEBUG_ASSERT(gtid_ref)if (!(gtid_ref)) { __kmp_debug_assert("gtid_ref", "openmp/runtime/src/kmp_dispatch.cpp"
, 45); }
;
46
47 if (__kmp_env_consistency_check) {
48 th = __kmp_threads[*gtid_ref];
49 if (th->th.th_root->r.r_active &&
50 (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws != ct_none)) {
51#if KMP_USE_DYNAMIC_LOCK1
52 __kmp_push_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref, NULL__null, 0);
53#else
54 __kmp_push_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref, NULL__null);
55#endif
56 }
57 }
58}
59
60void __kmp_dispatch_dxo_error(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
61 kmp_info_t *th;
62
63 if (__kmp_env_consistency_check) {
64 th = __kmp_threads[*gtid_ref];
65 if (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws != ct_none) {
66 __kmp_pop_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref);
67 }
68 }
69}
70
71// Returns either SCHEDULE_MONOTONIC or SCHEDULE_NONMONOTONIC
72static inline int __kmp_get_monotonicity(ident_t *loc, enum sched_type schedule,
73 bool use_hier = false) {
74 // Pick up the nonmonotonic/monotonic bits from the scheduling type
75 // Nonmonotonic as default for dynamic schedule when no modifier is specified
76 int monotonicity = SCHEDULE_NONMONOTONIC0;
77
78 // Let default be monotonic for executables
79 // compiled with OpenMP* 4.5 or less compilers
80 if (loc != NULL__null && loc->get_openmp_version() < 50)
81 monotonicity = SCHEDULE_MONOTONIC1;
82
83 if (use_hier || __kmp_force_monotonic)
84 monotonicity = SCHEDULE_MONOTONIC1;
85 else if (SCHEDULE_HAS_NONMONOTONIC(schedule)(((schedule)&kmp_sch_modifier_nonmonotonic) != 0))
86 monotonicity = SCHEDULE_NONMONOTONIC0;
87 else if (SCHEDULE_HAS_MONOTONIC(schedule)(((schedule)&kmp_sch_modifier_monotonic) != 0))
88 monotonicity = SCHEDULE_MONOTONIC1;
89
90 return monotonicity;
91}
92
93#if KMP_STATIC_STEAL_ENABLED1
94enum { // values for steal_flag (possible states of private per-loop buffer)
95 UNUSED = 0,
96 CLAIMED = 1, // owner thread started initialization
97 READY = 2, // available for stealing
98 THIEF = 3 // finished by owner, or claimed by thief
99 // possible state changes:
100 // 0 -> 1 owner only, sync
101 // 0 -> 3 thief only, sync
102 // 1 -> 2 owner only, async
103 // 2 -> 3 owner only, async
104 // 3 -> 2 owner only, async
105 // 3 -> 0 last thread finishing the loop, async
106};
107#endif
108
109// Initialize a dispatch_private_info_template<T> buffer for a particular
110// type of schedule,chunk. The loop description is found in lb (lower bound),
111// ub (upper bound), and st (stride). nproc is the number of threads relevant
112// to the scheduling (often the number of threads in a team, but not always if
113// hierarchical scheduling is used). tid is the id of the thread calling
114// the function within the group of nproc threads. It will have a value
115// between 0 and nproc - 1. This is often just the thread id within a team, but
116// is not necessarily the case when using hierarchical scheduling.
117// loc is the source file location of the corresponding loop
118// gtid is the global thread id
119template <typename T>
120void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid,
121 dispatch_private_info_template<T> *pr,
122 enum sched_type schedule, T lb, T ub,
123 typename traits_t<T>::signed_t st,
124#if USE_ITT_BUILD1
125 kmp_uint64 *cur_chunk,
126#endif
127 typename traits_t<T>::signed_t chunk,
128 T nproc, T tid) {
129 typedef typename traits_t<T>::unsigned_t UT;
130 typedef typename traits_t<T>::floating_t DBL;
131
132 int active;
133 T tc;
134 kmp_info_t *th;
135 kmp_team_t *team;
136 int monotonicity;
137 bool use_hier;
138
139#ifdef KMP_DEBUG1
140 typedef typename traits_t<T>::signed_t ST;
141 {
142 char *buff;
143 // create format specifiers before the debug output
144 buff = __kmp_str_format("__kmp_dispatch_init_algorithm: T#%%d called "
145 "pr:%%p lb:%%%s ub:%%%s st:%%%s "
146 "schedule:%%d chunk:%%%s nproc:%%%s tid:%%%s\n",
147 traits_t<T>::spec, traits_t<T>::spec,
148 traits_t<ST>::spec, traits_t<ST>::spec,
149 traits_t<T>::spec, traits_t<T>::spec);
150 KD_TRACE(10, (buff, gtid, pr, lb, ub, st, schedule, chunk, nproc, tid))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, pr
, lb, ub, st, schedule, chunk, nproc, tid); }
;
1
Assuming 'kmp_d_debug' is < 10
2
Taking false branch
151 __kmp_str_free(&buff);
152 }
153#endif
154 /* setup data */
155 th = __kmp_threads[gtid];
156 team = th->th.th_team;
157 active = !team->t.t_serialized;
3
Assuming field 't_serialized' is not equal to 0
158
159#if USE_ITT_BUILD1
160 int itt_need_metadata_reporting =
161 __itt_metadata_add_ptr__kmp_itt_metadata_add_ptr__3_0 && __kmp_forkjoin_frames_mode == 3 &&
4
Assuming '__kmp_itt_metadata_add_ptr__3_0' is null
162 KMP_MASTER_GTID(gtid)(0 == __kmp_tid_from_gtid((gtid))) && th->th.th_teams_microtask == NULL__null &&
163 team->t.t_active_level == 1;
164#endif
165
166#if KMP_USE_HIER_SCHED0
167 use_hier = pr->flags.use_hier;
168#else
169 use_hier = false;
170#endif
171
172 /* Pick up the nonmonotonic/monotonic bits from the scheduling type */
173 monotonicity = __kmp_get_monotonicity(loc, schedule, use_hier);
174 schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule)(enum sched_type)( (schedule) & ~(kmp_sch_modifier_nonmonotonic
| kmp_sch_modifier_monotonic))
;
175
176 /* Pick up the nomerge/ordered bits from the scheduling type */
177 if ((schedule >= kmp_nm_lower) && (schedule < kmp_nm_upper)) {
5
Assuming 'schedule' is < kmp_nm_lower
178 pr->flags.nomerge = TRUE(!0);
179 schedule =
180 (enum sched_type)(((int)schedule) - (kmp_nm_lower - kmp_sch_lower));
181 } else {
182 pr->flags.nomerge = FALSE0;
183 }
184 pr->type_size = traits_t<T>::type_size; // remember the size of variables
185 if (kmp_ord_lower & schedule) {
6
Assuming the condition is false
7
Taking false branch
186 pr->flags.ordered = TRUE(!0);
187 schedule =
188 (enum sched_type)(((int)schedule) - (kmp_ord_lower - kmp_sch_lower));
189 } else {
190 pr->flags.ordered = FALSE0;
191 }
192 // Ordered overrides nonmonotonic
193 if (pr->flags.ordered
7.1
Field 'ordered' is 0
) {
8
Taking false branch
194 monotonicity = SCHEDULE_MONOTONIC1;
195 }
196
197 if (schedule == kmp_sch_static) {
9
Assuming 'schedule' is equal to kmp_sch_static
10
Taking true branch
198 schedule = __kmp_static;
199 } else {
200 if (schedule == kmp_sch_runtime) {
201 // Use the scheduling specified by OMP_SCHEDULE (or __kmp_sch_default if
202 // not specified)
203 schedule = team->t.t_sched.r_sched_type;
204 monotonicity = __kmp_get_monotonicity(loc, schedule, use_hier);
205 schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule)(enum sched_type)( (schedule) & ~(kmp_sch_modifier_nonmonotonic
| kmp_sch_modifier_monotonic))
;
206 if (pr->flags.ordered) // correct monotonicity for ordered loop if needed
207 monotonicity = SCHEDULE_MONOTONIC1;
208 // Detail the schedule if needed (global controls are differentiated
209 // appropriately)
210 if (schedule == kmp_sch_guided_chunked) {
211 schedule = __kmp_guided;
212 } else if (schedule == kmp_sch_static) {
213 schedule = __kmp_static;
214 }
215 // Use the chunk size specified by OMP_SCHEDULE (or default if not
216 // specified)
217 chunk = team->t.t_sched.chunk;
218#if USE_ITT_BUILD1
219 if (cur_chunk)
220 *cur_chunk = chunk;
221#endif
222#ifdef KMP_DEBUG1
223 {
224 char *buff;
225 // create format specifiers before the debug output
226 buff = __kmp_str_format("__kmp_dispatch_init_algorithm: T#%%d new: "
227 "schedule:%%d chunk:%%%s\n",
228 traits_t<ST>::spec);
229 KD_TRACE(10, (buff, gtid, schedule, chunk))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, schedule
, chunk); }
;
230 __kmp_str_free(&buff);
231 }
232#endif
233 } else {
234 if (schedule == kmp_sch_guided_chunked) {
235 schedule = __kmp_guided;
236 }
237 if (chunk <= 0) {
238 chunk = KMP_DEFAULT_CHUNK1;
239 }
240 }
241
242 if (schedule == kmp_sch_auto) {
243 // mapping and differentiation: in the __kmp_do_serial_initialize()
244 schedule = __kmp_auto;
245#ifdef KMP_DEBUG1
246 {
247 char *buff;
248 // create format specifiers before the debug output
249 buff = __kmp_str_format(
250 "__kmp_dispatch_init_algorithm: kmp_sch_auto: T#%%d new: "
251 "schedule:%%d chunk:%%%s\n",
252 traits_t<ST>::spec);
253 KD_TRACE(10, (buff, gtid, schedule, chunk))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, schedule
, chunk); }
;
254 __kmp_str_free(&buff);
255 }
256#endif
257 }
258#if KMP_STATIC_STEAL_ENABLED1
259 // map nonmonotonic:dynamic to static steal
260 if (schedule == kmp_sch_dynamic_chunked) {
261 if (monotonicity == SCHEDULE_NONMONOTONIC0)
262 schedule = kmp_sch_static_steal;
263 }
264#endif
265 /* guided analytical not safe for too many threads */
266 if (schedule == kmp_sch_guided_analytical_chunked && nproc > 1 << 20) {
267 schedule = kmp_sch_guided_iterative_chunked;
268 KMP_WARNING(DispatchManyThreads)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_DispatchManyThreads
), __kmp_msg_null)
;
269 }
270 if (schedule == kmp_sch_runtime_simd) {
271 // compiler provides simd_width in the chunk parameter
272 schedule = team->t.t_sched.r_sched_type;
273 monotonicity = __kmp_get_monotonicity(loc, schedule, use_hier);
274 schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule)(enum sched_type)( (schedule) & ~(kmp_sch_modifier_nonmonotonic
| kmp_sch_modifier_monotonic))
;
275 // Detail the schedule if needed (global controls are differentiated
276 // appropriately)
277 if (schedule == kmp_sch_static || schedule == kmp_sch_auto ||
278 schedule == __kmp_static) {
279 schedule = kmp_sch_static_balanced_chunked;
280 } else {
281 if (schedule == kmp_sch_guided_chunked || schedule == __kmp_guided) {
282 schedule = kmp_sch_guided_simd;
283 }
284 chunk = team->t.t_sched.chunk * chunk;
285 }
286#if USE_ITT_BUILD1
287 if (cur_chunk)
288 *cur_chunk = chunk;
289#endif
290#ifdef KMP_DEBUG1
291 {
292 char *buff;
293 // create format specifiers before the debug output
294 buff = __kmp_str_format(
295 "__kmp_dispatch_init_algorithm: T#%%d new: schedule:%%d"
296 " chunk:%%%s\n",
297 traits_t<ST>::spec);
298 KD_TRACE(10, (buff, gtid, schedule, chunk))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, schedule
, chunk); }
;
299 __kmp_str_free(&buff);
300 }
301#endif
302 }
303 pr->u.p.parm1 = chunk;
304 }
305 KMP_ASSERT2((kmp_sch_lower < schedule && schedule < kmp_sch_upper),if (!((kmp_sch_lower < schedule && schedule < kmp_sch_upper
))) { __kmp_debug_assert(("unknown scheduling type"), "openmp/runtime/src/kmp_dispatch.cpp"
, 306); }
11
Assuming 'schedule' is > kmp_sch_lower
12
Assuming 'schedule' is < kmp_sch_upper
13
Taking false branch
306 "unknown scheduling type")if (!((kmp_sch_lower < schedule && schedule < kmp_sch_upper
))) { __kmp_debug_assert(("unknown scheduling type"), "openmp/runtime/src/kmp_dispatch.cpp"
, 306); }
;
307
308 pr->u.p.count = 0;
309
310 if (__kmp_env_consistency_check) {
14
Assuming '__kmp_env_consistency_check' is not equal to 0
15
Taking true branch
311 if (st == 0) {
16
Assuming 'st' is equal to 0
17
Taking true branch
312 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited,
313 (pr->flags.ordered
17.1
Field 'ordered' is 0
? ct_pdo_ordered : ct_pdo), loc);
18
'?' condition is false
314 }
315 }
316 // compute trip count
317 if (st
18.1
'st' is not equal to 1
== 1) { // most common case
19
Taking false branch
318 if (ub >= lb) {
319 tc = ub - lb + 1;
320 } else { // ub < lb
321 tc = 0; // zero-trip
322 }
323 } else if (st
19.1
'st' is >= 0
< 0) {
20
Taking false branch
324 if (lb >= ub) {
325 // AC: cast to unsigned is needed for loops like (i=2B; i>-2B; i-=1B),
326 // where the division needs to be unsigned regardless of the result type
327 tc = (UT)(lb - ub) / (-st) + 1;
328 } else { // lb < ub
329 tc = 0; // zero-trip
330 }
331 } else { // st > 0
332 if (ub >= lb) {
21
Assuming 'ub' is >= 'lb'
22
Taking true branch
333 // AC: cast to unsigned is needed for loops like (i=-2B; i<2B; i+=1B),
334 // where the division needs to be unsigned regardless of the result type
335 tc = (UT)(ub - lb) / st + 1;
23
Division by zero
336 } else { // ub < lb
337 tc = 0; // zero-trip
338 }
339 }
340
341#if KMP_STATS_ENABLED0
342 if (KMP_MASTER_GTID(gtid)(0 == __kmp_tid_from_gtid((gtid)))) {
343 KMP_COUNT_VALUE(OMP_loop_dynamic_total_iterations, tc)((void)0);
344 }
345#endif
346
347 pr->u.p.lb = lb;
348 pr->u.p.ub = ub;
349 pr->u.p.st = st;
350 pr->u.p.tc = tc;
351
352#if KMP_OS_WINDOWS0
353 pr->u.p.last_upper = ub + st;
354#endif /* KMP_OS_WINDOWS */
355
356 /* NOTE: only the active parallel region(s) has active ordered sections */
357
358 if (active) {
359 if (pr->flags.ordered) {
360 pr->ordered_bumped = 0;
361 pr->u.p.ordered_lower = 1;
362 pr->u.p.ordered_upper = 0;
363 }
364 }
365
366 switch (schedule) {
367#if KMP_STATIC_STEAL_ENABLED1
368 case kmp_sch_static_steal: {
369 T ntc, init;
370
371 KD_TRACE(100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_steal case\n"
, gtid); }
372 ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_steal case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_steal case\n"
, gtid); }
373 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_steal case\n"
, gtid); }
;
374
375 ntc = (tc % chunk ? 1 : 0) + tc / chunk;
376 if (nproc > 1 && ntc >= nproc) {
377 KMP_COUNT_BLOCK(OMP_LOOP_STATIC_STEAL)((void)0);
378 T id = tid;
379 T small_chunk, extras;
380 kmp_uint32 old = UNUSED;
381 int claimed = pr->steal_flag.compare_exchange_strong(old, CLAIMED);
382 if (traits_t<T>::type_size > 4) {
383 // AC: TODO: check if 16-byte CAS available and use it to
384 // improve performance (probably wait for explicit request
385 // before spending time on this).
386 // For now use dynamically allocated per-private-buffer lock,
387 // free memory in __kmp_dispatch_next when status==0.
388 pr->u.p.steal_lock = (kmp_lock_t *)__kmp_allocate(sizeof(kmp_lock_t))___kmp_allocate((sizeof(kmp_lock_t)), "openmp/runtime/src/kmp_dispatch.cpp"
, 388)
;
389 __kmp_init_lock(pr->u.p.steal_lock);
390 }
391 small_chunk = ntc / nproc;
392 extras = ntc % nproc;
393
394 init = id * small_chunk + (id < extras ? id : extras);
395 pr->u.p.count = init;
396 if (claimed) { // are we succeeded in claiming own buffer?
397 pr->u.p.ub = init + small_chunk + (id < extras ? 1 : 0);
398 // Other threads will inspect steal_flag when searching for a victim.
399 // READY means other threads may steal from this thread from now on.
400 KMP_ATOMIC_ST_REL(&pr->steal_flag, READY)(&pr->steal_flag)->store(READY, std::memory_order_release
)
;
401 } else {
402 // other thread has stolen whole our range
403 KMP_DEBUG_ASSERT(pr->steal_flag == THIEF)if (!(pr->steal_flag == THIEF)) { __kmp_debug_assert("pr->steal_flag == THIEF"
, "openmp/runtime/src/kmp_dispatch.cpp", 403); }
;
404 pr->u.p.ub = init; // mark there is no iterations to work on
405 }
406 pr->u.p.parm2 = ntc; // save number of chunks
407 // parm3 is the number of times to attempt stealing which is
408 // nproc (just a heuristics, could be optimized later on).
409 pr->u.p.parm3 = nproc;
410 pr->u.p.parm4 = (id + 1) % nproc; // remember neighbour tid
411 break;
412 } else {
413 /* too few chunks: switching to kmp_sch_dynamic_chunked */
414 schedule = kmp_sch_dynamic_chunked;
415 KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d switching to "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d switching to "
"kmp_sch_dynamic_chunked\n", gtid); }
416 "kmp_sch_dynamic_chunked\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d switching to "
"kmp_sch_dynamic_chunked\n", gtid); }
417 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d switching to "
"kmp_sch_dynamic_chunked\n", gtid); }
;
418 goto dynamic_init;
419 break;
420 } // if
421 } // case
422#endif
423 case kmp_sch_static_balanced: {
424 T init, limit;
425
426 KD_TRACE(if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n"
, gtid); }
427 100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n"
, gtid); }
428 ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n"
, gtid); }
429 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n"
, gtid); }
;
430
431 if (nproc > 1) {
432 T id = tid;
433
434 if (tc < nproc) {
435 if (id < tc) {
436 init = id;
437 limit = id;
438 pr->u.p.parm1 = (id == tc - 1); /* parm1 stores *plastiter */
439 } else {
440 pr->u.p.count = 1; /* means no more chunks to execute */
441 pr->u.p.parm1 = FALSE0;
442 break;
443 }
444 } else {
445 T small_chunk = tc / nproc;
446 T extras = tc % nproc;
447 init = id * small_chunk + (id < extras ? id : extras);
448 limit = init + small_chunk - (id < extras ? 0 : 1);
449 pr->u.p.parm1 = (id == nproc - 1);
450 }
451 } else {
452 if (tc > 0) {
453 init = 0;
454 limit = tc - 1;
455 pr->u.p.parm1 = TRUE(!0);
456 } else {
457 // zero trip count
458 pr->u.p.count = 1; /* means no more chunks to execute */
459 pr->u.p.parm1 = FALSE0;
460 break;
461 }
462 }
463#if USE_ITT_BUILD1
464 // Calculate chunk for metadata report
465 if (itt_need_metadata_reporting)
466 if (cur_chunk)
467 *cur_chunk = limit - init + 1;
468#endif
469 if (st == 1) {
470 pr->u.p.lb = lb + init;
471 pr->u.p.ub = lb + limit;
472 } else {
473 // calculated upper bound, "ub" is user-defined upper bound
474 T ub_tmp = lb + limit * st;
475 pr->u.p.lb = lb + init * st;
476 // adjust upper bound to "ub" if needed, so that MS lastprivate will match
477 // it exactly
478 if (st > 0) {
479 pr->u.p.ub = (ub_tmp + st > ub ? ub : ub_tmp);
480 } else {
481 pr->u.p.ub = (ub_tmp + st < ub ? ub : ub_tmp);
482 }
483 }
484 if (pr->flags.ordered) {
485 pr->u.p.ordered_lower = init;
486 pr->u.p.ordered_upper = limit;
487 }
488 break;
489 } // case
490 case kmp_sch_static_balanced_chunked: {
491 // similar to balanced, but chunk adjusted to multiple of simd width
492 T nth = nproc;
493 KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d runtime(simd:static)"if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d runtime(simd:static)"
" -> falling-through to static_greedy\n", gtid); }
494 " -> falling-through to static_greedy\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d runtime(simd:static)"
" -> falling-through to static_greedy\n", gtid); }
495 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d runtime(simd:static)"
" -> falling-through to static_greedy\n", gtid); }
;
496 schedule = kmp_sch_static_greedy;
497 if (nth > 1)
498 pr->u.p.parm1 = ((tc + nth - 1) / nth + chunk - 1) & ~(chunk - 1);
499 else
500 pr->u.p.parm1 = tc;
501 break;
502 } // case
503 case kmp_sch_guided_simd:
504 case kmp_sch_guided_iterative_chunked: {
505 KD_TRACE(if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked"
" case\n", gtid); }
506 100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked"
" case\n", gtid); }
507 ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked"if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked"
" case\n", gtid); }
508 " case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked"
" case\n", gtid); }
509 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked"
" case\n", gtid); }
;
510
511 if (nproc > 1) {
512 if ((2L * chunk + 1) * nproc >= tc) {
513 /* chunk size too large, switch to dynamic */
514 schedule = kmp_sch_dynamic_chunked;
515 goto dynamic_init;
516 } else {
517 // when remaining iters become less than parm2 - switch to dynamic
518 pr->u.p.parm2 = guided_int_param * nproc * (chunk + 1);
519 *(double *)&pr->u.p.parm3 =
520 guided_flt_param / (double)nproc; // may occupy parm3 and parm4
521 }
522 } else {
523 KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d falling-through to "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to "
"kmp_sch_static_greedy\n", gtid); }
524 "kmp_sch_static_greedy\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to "
"kmp_sch_static_greedy\n", gtid); }
525 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to "
"kmp_sch_static_greedy\n", gtid); }
;
526 schedule = kmp_sch_static_greedy;
527 /* team->t.t_nproc == 1: fall-through to kmp_sch_static_greedy */
528 KD_TRACE(if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n"
, gtid); }
529 100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n"
, gtid); }
530 ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n"
, gtid); }
531 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n"
, gtid); }
;
532 pr->u.p.parm1 = tc;
533 } // if
534 } // case
535 break;
536 case kmp_sch_guided_analytical_chunked: {
537 KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d "
"kmp_sch_guided_analytical_chunked case\n", gtid); }
538 "kmp_sch_guided_analytical_chunked case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d "
"kmp_sch_guided_analytical_chunked case\n", gtid); }
539 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d "
"kmp_sch_guided_analytical_chunked case\n", gtid); }
;
540
541 if (nproc > 1) {
542 if ((2L * chunk + 1) * nproc >= tc) {
543 /* chunk size too large, switch to dynamic */
544 schedule = kmp_sch_dynamic_chunked;
545 goto dynamic_init;
546 } else {
547 /* commonly used term: (2 nproc - 1)/(2 nproc) */
548 DBL x;
549
550#if KMP_USE_X87CONTROL0
551 /* Linux* OS already has 64-bit computation by default for long double,
552 and on Windows* OS on Intel(R) 64, /Qlong_double doesn't work. On
553 Windows* OS on IA-32 architecture, we need to set precision to 64-bit
554 instead of the default 53-bit. Even though long double doesn't work
555 on Windows* OS on Intel(R) 64, the resulting lack of precision is not
556 expected to impact the correctness of the algorithm, but this has not
557 been mathematically proven. */
558 // save original FPCW and set precision to 64-bit, as
559 // Windows* OS on IA-32 architecture defaults to 53-bit
560 unsigned int oldFpcw = _control87(0, 0);
561 _control87(_PC_64, _MCW_PC); // 0,0x30000
562#endif
563 /* value used for comparison in solver for cross-over point */
564 KMP_ASSERT(tc > 0)if (!(tc > 0)) { __kmp_debug_assert("tc > 0", "openmp/runtime/src/kmp_dispatch.cpp"
, 564); }
;
565 long double target = ((long double)chunk * 2 + 1) * nproc / tc;
566
567 /* crossover point--chunk indexes equal to or greater than
568 this point switch to dynamic-style scheduling */
569 UT cross;
570
571 /* commonly used term: (2 nproc - 1)/(2 nproc) */
572 x = 1.0 - 0.5 / (double)nproc;
573
574#ifdef KMP_DEBUG1
575 { // test natural alignment
576 struct _test_a {
577 char a;
578 union {
579 char b;
580 DBL d;
581 };
582 } t;
583 ptrdiff_t natural_alignment =
584 (ptrdiff_t)&t.b - (ptrdiff_t)&t - (ptrdiff_t)1;
585 //__kmp_warn( " %llx %llx %lld", (long long)&t.d, (long long)&t, (long
586 // long)natural_alignment );
587 KMP_DEBUG_ASSERT(if (!((((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment
)) == 0)) { __kmp_debug_assert("(((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment)) == 0"
, "openmp/runtime/src/kmp_dispatch.cpp", 588); }
588 (((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment)) == 0)if (!((((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment
)) == 0)) { __kmp_debug_assert("(((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment)) == 0"
, "openmp/runtime/src/kmp_dispatch.cpp", 588); }
;
589 }
590#endif // KMP_DEBUG
591
592 /* save the term in thread private dispatch structure */
593 *(DBL *)&pr->u.p.parm3 = x;
594
595 /* solve for the crossover point to the nearest integer i for which C_i
596 <= chunk */
597 {
598 UT left, right, mid;
599 long double p;
600
601 /* estimate initial upper and lower bound */
602
603 /* doesn't matter what value right is as long as it is positive, but
604 it affects performance of the solver */
605 right = 229;
606 p = __kmp_pow<UT>(x, right);
607 if (p > target) {
608 do {
609 p *= p;
610 right <<= 1;
611 } while (p > target && right < (1 << 27));
612 /* lower bound is previous (failed) estimate of upper bound */
613 left = right >> 1;
614 } else {
615 left = 0;
616 }
617
618 /* bisection root-finding method */
619 while (left + 1 < right) {
620 mid = (left + right) / 2;
621 if (__kmp_pow<UT>(x, mid) > target) {
622 left = mid;
623 } else {
624 right = mid;
625 }
626 } // while
627 cross = right;
628 }
629 /* assert sanity of computed crossover point */
630 KMP_ASSERT(cross && __kmp_pow<UT>(x, cross - 1) > target &&if (!(cross && __kmp_pow<UT>(x, cross - 1) >
target && __kmp_pow<UT>(x, cross) <= target
)) { __kmp_debug_assert("cross && __kmp_pow<UT>(x, cross - 1) > target && __kmp_pow<UT>(x, cross) <= target"
, "openmp/runtime/src/kmp_dispatch.cpp", 631); }
631 __kmp_pow<UT>(x, cross) <= target)if (!(cross && __kmp_pow<UT>(x, cross - 1) >
target && __kmp_pow<UT>(x, cross) <= target
)) { __kmp_debug_assert("cross && __kmp_pow<UT>(x, cross - 1) > target && __kmp_pow<UT>(x, cross) <= target"
, "openmp/runtime/src/kmp_dispatch.cpp", 631); }
;
632
633 /* save the crossover point in thread private dispatch structure */
634 pr->u.p.parm2 = cross;
635
636// C75803
637#if ((KMP_OS_LINUX1 || KMP_OS_WINDOWS0) && KMP_ARCH_X860) && (!defined(KMP_I8))
638#define GUIDED_ANALYTICAL_WORKAROUND(x) (*(DBL *)&pr->u.p.parm3)
639#else
640#define GUIDED_ANALYTICAL_WORKAROUND(x) (x)
641#endif
642 /* dynamic-style scheduling offset */
643 pr->u.p.count = tc -
644 __kmp_dispatch_guided_remaining(
645 tc, GUIDED_ANALYTICAL_WORKAROUND(x), cross) -
646 cross * chunk;
647#if KMP_USE_X87CONTROL0
648 // restore FPCW
649 _control87(oldFpcw, _MCW_PC);
650#endif
651 } // if
652 } else {
653 KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d falling-through to "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to "
"kmp_sch_static_greedy\n", gtid); }
654 "kmp_sch_static_greedy\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to "
"kmp_sch_static_greedy\n", gtid); }
655 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to "
"kmp_sch_static_greedy\n", gtid); }
;
656 schedule = kmp_sch_static_greedy;
657 /* team->t.t_nproc == 1: fall-through to kmp_sch_static_greedy */
658 pr->u.p.parm1 = tc;
659 } // if
660 } // case
661 break;
662 case kmp_sch_static_greedy:
663 KD_TRACE(if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n"
, gtid); }
664 100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n"
, gtid); }
665 ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n"
, gtid); }
666 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n"
, gtid); }
;
667 pr->u.p.parm1 = (nproc > 1) ? (tc + nproc - 1) / nproc : tc;
668 break;
669 case kmp_sch_static_chunked:
670 case kmp_sch_dynamic_chunked:
671 dynamic_init:
672 if (tc == 0)
673 break;
674 if (pr->u.p.parm1 <= 0)
675 pr->u.p.parm1 = KMP_DEFAULT_CHUNK1;
676 else if (pr->u.p.parm1 > tc)
677 pr->u.p.parm1 = tc;
678 // Store the total number of chunks to prevent integer overflow during
679 // bounds calculations in the get next chunk routine.
680 pr->u.p.parm2 = (tc / pr->u.p.parm1) + (tc % pr->u.p.parm1 ? 1 : 0);
681 KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d "
"kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", gtid
); }
682 "kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d "
"kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", gtid
); }
683 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d "
"kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", gtid
); }
;
684 break;
685 case kmp_sch_trapezoidal: {
686 /* TSS: trapezoid self-scheduling, minimum chunk_size = parm1 */
687
688 T parm1, parm2, parm3, parm4;
689 KD_TRACE(100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_trapezoidal case\n"
, gtid); }
690 ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_trapezoidal case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_trapezoidal case\n"
, gtid); }
691 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_trapezoidal case\n"
, gtid); }
;
692
693 parm1 = chunk;
694
695 /* F : size of the first cycle */
696 parm2 = (tc / (2 * nproc));
697
698 if (parm2 < 1) {
699 parm2 = 1;
700 }
701
702 /* L : size of the last cycle. Make sure the last cycle is not larger
703 than the first cycle. */
704 if (parm1 < 1) {
705 parm1 = 1;
706 } else if (parm1 > parm2) {
707 parm1 = parm2;
708 }
709
710 /* N : number of cycles */
711 parm3 = (parm2 + parm1);
712 parm3 = (2 * tc + parm3 - 1) / parm3;
713
714 if (parm3 < 2) {
715 parm3 = 2;
716 }
717
718 /* sigma : decreasing incr of the trapezoid */
719 parm4 = (parm3 - 1);
720 parm4 = (parm2 - parm1) / parm4;
721
722 // pointless check, because parm4 >= 0 always
723 // if ( parm4 < 0 ) {
724 // parm4 = 0;
725 //}
726
727 pr->u.p.parm1 = parm1;
728 pr->u.p.parm2 = parm2;
729 pr->u.p.parm3 = parm3;
730 pr->u.p.parm4 = parm4;
731 } // case
732 break;
733
734 default: {
735 __kmp_fatal(KMP_MSG(UnknownSchedTypeDetected)__kmp_msg_format(kmp_i18n_msg_UnknownSchedTypeDetected), // Primary message
736 KMP_HNT(GetNewerLibrary)__kmp_msg_format(kmp_i18n_hnt_GetNewerLibrary), // Hint
737 __kmp_msg_null // Variadic argument list terminator
738 );
739 } break;
740 } // switch
741 pr->schedule = schedule;
742}
743
744#if KMP_USE_HIER_SCHED0
745template <typename T>
746inline void __kmp_dispatch_init_hier_runtime(ident_t *loc, T lb, T ub,
747 typename traits_t<T>::signed_t st);
748template <>
749inline void
750__kmp_dispatch_init_hier_runtime<kmp_int32>(ident_t *loc, kmp_int32 lb,
751 kmp_int32 ub, kmp_int32 st) {
752 __kmp_dispatch_init_hierarchy<kmp_int32>(
753 loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers,
754 __kmp_hier_scheds.scheds, __kmp_hier_scheds.small_chunks, lb, ub, st);
755}
756template <>
757inline void
758__kmp_dispatch_init_hier_runtime<kmp_uint32>(ident_t *loc, kmp_uint32 lb,
759 kmp_uint32 ub, kmp_int32 st) {
760 __kmp_dispatch_init_hierarchy<kmp_uint32>(
761 loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers,
762 __kmp_hier_scheds.scheds, __kmp_hier_scheds.small_chunks, lb, ub, st);
763}
764template <>
765inline void
766__kmp_dispatch_init_hier_runtime<kmp_int64>(ident_t *loc, kmp_int64 lb,
767 kmp_int64 ub, kmp_int64 st) {
768 __kmp_dispatch_init_hierarchy<kmp_int64>(
769 loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers,
770 __kmp_hier_scheds.scheds, __kmp_hier_scheds.large_chunks, lb, ub, st);
771}
772template <>
773inline void
774__kmp_dispatch_init_hier_runtime<kmp_uint64>(ident_t *loc, kmp_uint64 lb,
775 kmp_uint64 ub, kmp_int64 st) {
776 __kmp_dispatch_init_hierarchy<kmp_uint64>(
777 loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers,
778 __kmp_hier_scheds.scheds, __kmp_hier_scheds.large_chunks, lb, ub, st);
779}
780
781// free all the hierarchy scheduling memory associated with the team
782void __kmp_dispatch_free_hierarchies(kmp_team_t *team) {
783 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
784 for (int i = 0; i < num_disp_buff; ++i) {
785 // type does not matter here so use kmp_int32
786 auto sh =
787 reinterpret_cast<dispatch_shared_info_template<kmp_int32> volatile *>(
788 &team->t.t_disp_buffer[i]);
789 if (sh->hier) {
790 sh->hier->deallocate();
791 __kmp_free(sh->hier)___kmp_free((sh->hier), "openmp/runtime/src/kmp_dispatch.cpp"
, 791)
;
792 }
793 }
794}
795#endif
796
797// UT - unsigned flavor of T, ST - signed flavor of T,
798// DBL - double if sizeof(T)==4, or long double if sizeof(T)==8
799template <typename T>
800static void
801__kmp_dispatch_init(ident_t *loc, int gtid, enum sched_type schedule, T lb,
802 T ub, typename traits_t<T>::signed_t st,
803 typename traits_t<T>::signed_t chunk, int push_ws) {
804 typedef typename traits_t<T>::unsigned_t UT;
805
806 int active;
807 kmp_info_t *th;
808 kmp_team_t *team;
809 kmp_uint32 my_buffer_index;
810 dispatch_private_info_template<T> *pr;
811 dispatch_shared_info_template<T> volatile *sh;
812
813 KMP_BUILD_ASSERT(sizeof(dispatch_private_info_template<T>) ==static_assert(sizeof(dispatch_private_info_template<T>)
== sizeof(dispatch_private_info), "Build condition error")
814 sizeof(dispatch_private_info))static_assert(sizeof(dispatch_private_info_template<T>)
== sizeof(dispatch_private_info), "Build condition error")
;
815 KMP_BUILD_ASSERT(sizeof(dispatch_shared_info_template<UT>) ==static_assert(sizeof(dispatch_shared_info_template<UT>)
== sizeof(dispatch_shared_info), "Build condition error")
816 sizeof(dispatch_shared_info))static_assert(sizeof(dispatch_shared_info_template<UT>)
== sizeof(dispatch_shared_info), "Build condition error")
;
817 __kmp_assert_valid_gtid(gtid);
818
819 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
820 __kmp_parallel_initialize();
821
822 __kmp_resume_if_soft_paused();
823
824#if INCLUDE_SSC_MARKS(1 && 1)
825 SSC_MARK_DISPATCH_INIT()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0xd696) : "%ebx")
;
826#endif
827#ifdef KMP_DEBUG1
828 typedef typename traits_t<T>::signed_t ST;
829 {
830 char *buff;
831 // create format specifiers before the debug output
832 buff = __kmp_str_format("__kmp_dispatch_init: T#%%d called: schedule:%%d "
833 "chunk:%%%s lb:%%%s ub:%%%s st:%%%s\n",
834 traits_t<ST>::spec, traits_t<T>::spec,
835 traits_t<T>::spec, traits_t<ST>::spec);
836 KD_TRACE(10, (buff, gtid, schedule, chunk, lb, ub, st))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, schedule
, chunk, lb, ub, st); }
;
837 __kmp_str_free(&buff);
838 }
839#endif
840 /* setup data */
841 th = __kmp_threads[gtid];
842 team = th->th.th_team;
843 active = !team->t.t_serialized;
844 th->th.th_ident = loc;
845
846 // Any half-decent optimizer will remove this test when the blocks are empty
847 // since the macros expand to nothing
848 // when statistics are disabled.
849 if (schedule == __kmp_static) {
850 KMP_COUNT_BLOCK(OMP_LOOP_STATIC)((void)0);
851 } else {
852 KMP_COUNT_BLOCK(OMP_LOOP_DYNAMIC)((void)0);
853 }
854
855#if KMP_USE_HIER_SCHED0
856 // Initialize the scheduling hierarchy if requested in OMP_SCHEDULE envirable
857 // Hierarchical scheduling does not work with ordered, so if ordered is
858 // detected, then revert back to threaded scheduling.
859 bool ordered;
860 enum sched_type my_sched = schedule;
861 my_buffer_index = th->th.th_dispatch->th_disp_index;
862 pr = reinterpret_cast<dispatch_private_info_template<T> *>(
863 &th->th.th_dispatch
864 ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
865 my_sched = SCHEDULE_WITHOUT_MODIFIERS(my_sched)(enum sched_type)( (my_sched) & ~(kmp_sch_modifier_nonmonotonic
| kmp_sch_modifier_monotonic))
;
866 if ((my_sched >= kmp_nm_lower) && (my_sched < kmp_nm_upper))
867 my_sched =
868 (enum sched_type)(((int)my_sched) - (kmp_nm_lower - kmp_sch_lower));
869 ordered = (kmp_ord_lower & my_sched);
870 if (pr->flags.use_hier) {
871 if (ordered) {
872 KD_TRACE(100, ("__kmp_dispatch_init: T#%d ordered loop detected. "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d ordered loop detected. "
"Disabling hierarchical scheduling.\n", gtid); }
873 "Disabling hierarchical scheduling.\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d ordered loop detected. "
"Disabling hierarchical scheduling.\n", gtid); }
874 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d ordered loop detected. "
"Disabling hierarchical scheduling.\n", gtid); }
;
875 pr->flags.use_hier = FALSE0;
876 }
877 }
878 if (schedule == kmp_sch_runtime && __kmp_hier_scheds.size > 0) {
879 // Don't use hierarchical for ordered parallel loops and don't
880 // use the runtime hierarchy if one was specified in the program
881 if (!ordered && !pr->flags.use_hier)
882 __kmp_dispatch_init_hier_runtime<T>(loc, lb, ub, st);
883 }
884#endif // KMP_USE_HIER_SCHED
885
886#if USE_ITT_BUILD1
887 kmp_uint64 cur_chunk = chunk;
888 int itt_need_metadata_reporting =
889 __itt_metadata_add_ptr__kmp_itt_metadata_add_ptr__3_0 && __kmp_forkjoin_frames_mode == 3 &&
890 KMP_MASTER_GTID(gtid)(0 == __kmp_tid_from_gtid((gtid))) && th->th.th_teams_microtask == NULL__null &&
891 team->t.t_active_level == 1;
892#endif
893 if (!active) {
894 pr = reinterpret_cast<dispatch_private_info_template<T> *>(
895 th->th.th_dispatch->th_disp_buffer); /* top of the stack */
896 } else {
897 KMP_DEBUG_ASSERT(th->th.th_dispatch ==if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "openmp/runtime/src/kmp_dispatch.cpp", 898); }
898 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid])if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "openmp/runtime/src/kmp_dispatch.cpp", 898); }
;
899
900 my_buffer_index = th->th.th_dispatch->th_disp_index++;
901
902 /* What happens when number of threads changes, need to resize buffer? */
903 pr = reinterpret_cast<dispatch_private_info_template<T> *>(
904 &th->th.th_dispatch
905 ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
906 sh = reinterpret_cast<dispatch_shared_info_template<T> volatile *>(
907 &team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
908 KD_TRACE(10, ("__kmp_dispatch_init: T#%d my_buffer_index:%d\n", gtid,if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d my_buffer_index:%d\n"
, gtid, my_buffer_index); }
909 my_buffer_index))if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d my_buffer_index:%d\n"
, gtid, my_buffer_index); }
;
910 if (sh->buffer_index != my_buffer_index) { // too many loops in progress?
911 KD_TRACE(100, ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d"if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d"
" sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index
); }
912 " sh->buffer_index:%d\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d"
" sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index
); }
913 gtid, my_buffer_index, sh->buffer_index))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d"
" sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index
); }
;
914 __kmp_wait<kmp_uint32>(&sh->buffer_index, my_buffer_index,
915 __kmp_eq<kmp_uint32> USE_ITT_BUILD_ARG(NULL), __null);
916 // Note: KMP_WAIT() cannot be used there: buffer index and
917 // my_buffer_index are *always* 32-bit integers.
918 KD_TRACE(100, ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d "
"sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index
); }
919 "sh->buffer_index:%d\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d "
"sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index
); }
920 gtid, my_buffer_index, sh->buffer_index))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d "
"sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index
); }
;
921 }
922 }
923
924 __kmp_dispatch_init_algorithm(loc, gtid, pr, schedule, lb, ub, st,
925#if USE_ITT_BUILD1
926 &cur_chunk,
927#endif
928 chunk, (T)th->th.th_team_nproc,
929 (T)th->th.th_info.ds.ds_tid);
930 if (active) {
931 if (pr->flags.ordered == 0) {
932 th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo_error;
933 th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo_error;
934 } else {
935 th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo<UT>;
936 th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo<UT>;
937 }
938 th->th.th_dispatch->th_dispatch_pr_current = (dispatch_private_info_t *)pr;
939 th->th.th_dispatch->th_dispatch_sh_current =
940 CCAST(dispatch_shared_info_t *, (volatile dispatch_shared_info_t *)sh)const_cast<dispatch_shared_info_t *>((volatile dispatch_shared_info_t
*)sh)
;
941#if USE_ITT_BUILD1
942 if (pr->flags.ordered) {
943 __kmp_itt_ordered_init(gtid);
944 }
945 // Report loop metadata
946 if (itt_need_metadata_reporting) {
947 // Only report metadata by primary thread of active team at level 1
948 kmp_uint64 schedtype = 0;
949 switch (schedule) {
950 case kmp_sch_static_chunked:
951 case kmp_sch_static_balanced: // Chunk is calculated in the switch above
952 break;
953 case kmp_sch_static_greedy:
954 cur_chunk = pr->u.p.parm1;
955 break;
956 case kmp_sch_dynamic_chunked:
957 schedtype = 1;
958 break;
959 case kmp_sch_guided_iterative_chunked:
960 case kmp_sch_guided_analytical_chunked:
961 case kmp_sch_guided_simd:
962 schedtype = 2;
963 break;
964 default:
965 // Should we put this case under "static"?
966 // case kmp_sch_static_steal:
967 schedtype = 3;
968 break;
969 }
970 __kmp_itt_metadata_loop(loc, schedtype, pr->u.p.tc, cur_chunk);
971 }
972#if KMP_USE_HIER_SCHED0
973 if (pr->flags.use_hier) {
974 pr->u.p.count = 0;
975 pr->u.p.ub = pr->u.p.lb = pr->u.p.st = pr->u.p.tc = 0;
976 }
977#endif // KMP_USER_HIER_SCHED
978#endif /* USE_ITT_BUILD */
979 }
980
981#ifdef KMP_DEBUG1
982 {
983 char *buff;
984 // create format specifiers before the debug output
985 buff = __kmp_str_format(
986 "__kmp_dispatch_init: T#%%d returning: schedule:%%d ordered:%%%s "
987 "lb:%%%s ub:%%%s"
988 " st:%%%s tc:%%%s count:%%%s\n\tordered_lower:%%%s ordered_upper:%%%s"
989 " parm1:%%%s parm2:%%%s parm3:%%%s parm4:%%%s\n",
990 traits_t<UT>::spec, traits_t<T>::spec, traits_t<T>::spec,
991 traits_t<ST>::spec, traits_t<UT>::spec, traits_t<UT>::spec,
992 traits_t<UT>::spec, traits_t<UT>::spec, traits_t<T>::spec,
993 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec);
994 KD_TRACE(10, (buff, gtid, pr->schedule, pr->flags.ordered, pr->u.p.lb,if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, pr
->schedule, pr->flags.ordered, pr->u.p.lb, pr->u.
p.ub, pr->u.p.st, pr->u.p.tc, pr->u.p.count, pr->
u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1
, pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4); }
995 pr->u.p.ub, pr->u.p.st, pr->u.p.tc, pr->u.p.count,if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, pr
->schedule, pr->flags.ordered, pr->u.p.lb, pr->u.
p.ub, pr->u.p.st, pr->u.p.tc, pr->u.p.count, pr->
u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1
, pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4); }
996 pr->u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1,if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, pr
->schedule, pr->flags.ordered, pr->u.p.lb, pr->u.
p.ub, pr->u.p.st, pr->u.p.tc, pr->u.p.count, pr->
u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1
, pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4); }
997 pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, pr
->schedule, pr->flags.ordered, pr->u.p.lb, pr->u.
p.ub, pr->u.p.st, pr->u.p.tc, pr->u.p.count, pr->
u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1
, pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4); }
;
998 __kmp_str_free(&buff);
999 }
1000#endif
1001#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1002 if (ompt_enabled.ompt_callback_work) {
1003 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL__null);
1004 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1005 ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback(
1006 ompt_work_loop, ompt_scope_begin, &(team_info->parallel_data),
1007 &(task_info->task_data), pr->u.p.tc, OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid));
1008 }
1009#endif
1010 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_dynamic)((void)0);
1011}
1012
1013/* For ordered loops, either __kmp_dispatch_finish() should be called after
1014 * every iteration, or __kmp_dispatch_finish_chunk() should be called after
1015 * every chunk of iterations. If the ordered section(s) were not executed
1016 * for this iteration (or every iteration in this chunk), we need to set the
1017 * ordered iteration counters so that the next thread can proceed. */
1018template <typename UT>
1019static void __kmp_dispatch_finish(int gtid, ident_t *loc) {
1020 typedef typename traits_t<UT>::signed_t ST;
1021 __kmp_assert_valid_gtid(gtid);
1022 kmp_info_t *th = __kmp_threads[gtid];
1023
1024 KD_TRACE(100, ("__kmp_dispatch_finish: T#%d called\n", gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d called\n"
, gtid); }
;
1025 if (!th->th.th_team->t.t_serialized) {
1026
1027 dispatch_private_info_template<UT> *pr =
1028 reinterpret_cast<dispatch_private_info_template<UT> *>(
1029 th->th.th_dispatch->th_dispatch_pr_current);
1030 dispatch_shared_info_template<UT> volatile *sh =
1031 reinterpret_cast<dispatch_shared_info_template<UT> volatile *>(
1032 th->th.th_dispatch->th_dispatch_sh_current);
1033 KMP_DEBUG_ASSERT(pr)if (!(pr)) { __kmp_debug_assert("pr", "openmp/runtime/src/kmp_dispatch.cpp"
, 1033); }
;
1034 KMP_DEBUG_ASSERT(sh)if (!(sh)) { __kmp_debug_assert("sh", "openmp/runtime/src/kmp_dispatch.cpp"
, 1034); }
;
1035 KMP_DEBUG_ASSERT(th->th.th_dispatch ==if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "openmp/runtime/src/kmp_dispatch.cpp", 1036); }
1036 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid])if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "openmp/runtime/src/kmp_dispatch.cpp", 1036); }
;
1037
1038 if (pr->ordered_bumped) {
1039 KD_TRACE(if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n"
, gtid); }
1040 1000,if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n"
, gtid); }
1041 ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n"
, gtid); }
1042 gtid))if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n"
, gtid); }
;
1043 pr->ordered_bumped = 0;
1044 } else {
1045 UT lower = pr->u.p.ordered_lower;
1046
1047#ifdef KMP_DEBUG1
1048 {
1049 char *buff;
1050 // create format specifiers before the debug output
1051 buff = __kmp_str_format("__kmp_dispatch_finish: T#%%d before wait: "
1052 "ordered_iteration:%%%s lower:%%%s\n",
1053 traits_t<UT>::spec, traits_t<UT>::spec);
1054 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower))if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid,
sh->u.s.ordered_iteration, lower); }
;
1055 __kmp_str_free(&buff);
1056 }
1057#endif
1058
1059 __kmp_wait<UT>(&sh->u.s.ordered_iteration, lower,
1060 __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL), __null);
1061 KMP_MB(); /* is this necessary? */
1062#ifdef KMP_DEBUG1
1063 {
1064 char *buff;
1065 // create format specifiers before the debug output
1066 buff = __kmp_str_format("__kmp_dispatch_finish: T#%%d after wait: "
1067 "ordered_iteration:%%%s lower:%%%s\n",
1068 traits_t<UT>::spec, traits_t<UT>::spec);
1069 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower))if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid,
sh->u.s.ordered_iteration, lower); }
;
1070 __kmp_str_free(&buff);
1071 }
1072#endif
1073
1074 test_then_inc<ST>((volatile ST *)&sh->u.s.ordered_iteration);
1075 } // if
1076 } // if
1077 KD_TRACE(100, ("__kmp_dispatch_finish: T#%d returned\n", gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d returned\n"
, gtid); }
;
1078}
1079
1080#ifdef KMP_GOMP_COMPAT
1081
1082template <typename UT>
1083static void __kmp_dispatch_finish_chunk(int gtid, ident_t *loc) {
1084 typedef typename traits_t<UT>::signed_t ST;
1085 __kmp_assert_valid_gtid(gtid);
1086 kmp_info_t *th = __kmp_threads[gtid];
1087
1088 KD_TRACE(100, ("__kmp_dispatch_finish_chunk: T#%d called\n", gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_finish_chunk: T#%d called\n"
, gtid); }
;
1089 if (!th->th.th_team->t.t_serialized) {
1090 dispatch_private_info_template<UT> *pr =
1091 reinterpret_cast<dispatch_private_info_template<UT> *>(
1092 th->th.th_dispatch->th_dispatch_pr_current);
1093 dispatch_shared_info_template<UT> volatile *sh =
1094 reinterpret_cast<dispatch_shared_info_template<UT> volatile *>(
1095 th->th.th_dispatch->th_dispatch_sh_current);
1096 KMP_DEBUG_ASSERT(pr)if (!(pr)) { __kmp_debug_assert("pr", "openmp/runtime/src/kmp_dispatch.cpp"
, 1096); }
;
1097 KMP_DEBUG_ASSERT(sh)if (!(sh)) { __kmp_debug_assert("sh", "openmp/runtime/src/kmp_dispatch.cpp"
, 1097); }
;
1098 KMP_DEBUG_ASSERT(th->th.th_dispatch ==if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "openmp/runtime/src/kmp_dispatch.cpp", 1099); }
1099 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid])if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "openmp/runtime/src/kmp_dispatch.cpp", 1099); }
;
1100
1101 UT lower = pr->u.p.ordered_lower;
1102 UT upper = pr->u.p.ordered_upper;
1103 UT inc = upper - lower + 1;
1104
1105 if (pr->ordered_bumped == inc) {
1106 KD_TRACE(if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n"
, gtid); }
1107 1000,if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n"
, gtid); }
1108 ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n"
, gtid); }
1109 gtid))if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n"
, gtid); }
;
1110 pr->ordered_bumped = 0;
1111 } else {
1112 inc -= pr->ordered_bumped;
1113
1114#ifdef KMP_DEBUG1
1115 {
1116 char *buff;
1117 // create format specifiers before the debug output
1118 buff = __kmp_str_format(
1119 "__kmp_dispatch_finish_chunk: T#%%d before wait: "
1120 "ordered_iteration:%%%s lower:%%%s upper:%%%s\n",
1121 traits_t<UT>::spec, traits_t<UT>::spec, traits_t<UT>::spec);
1122 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower, upper))if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid,
sh->u.s.ordered_iteration, lower, upper); }
;
1123 __kmp_str_free(&buff);
1124 }
1125#endif
1126
1127 __kmp_wait<UT>(&sh->u.s.ordered_iteration, lower,
1128 __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL), __null);
1129
1130 KMP_MB(); /* is this necessary? */
1131 KD_TRACE(1000, ("__kmp_dispatch_finish_chunk: T#%d resetting "if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish_chunk: T#%d resetting "
"ordered_bumped to zero\n", gtid); }
1132 "ordered_bumped to zero\n",if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish_chunk: T#%d resetting "
"ordered_bumped to zero\n", gtid); }
1133 gtid))if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish_chunk: T#%d resetting "
"ordered_bumped to zero\n", gtid); }
;
1134 pr->ordered_bumped = 0;
1135//!!!!! TODO check if the inc should be unsigned, or signed???
1136#ifdef KMP_DEBUG1
1137 {
1138 char *buff;
1139 // create format specifiers before the debug output
1140 buff = __kmp_str_format(
1141 "__kmp_dispatch_finish_chunk: T#%%d after wait: "
1142 "ordered_iteration:%%%s inc:%%%s lower:%%%s upper:%%%s\n",
1143 traits_t<UT>::spec, traits_t<UT>::spec, traits_t<UT>::spec,
1144 traits_t<UT>::spec);
1145 KD_TRACE(1000,if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid,
sh->u.s.ordered_iteration, inc, lower, upper); }
1146 (buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper))if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid,
sh->u.s.ordered_iteration, inc, lower, upper); }
;
1147 __kmp_str_free(&buff);
1148 }
1149#endif
1150
1151 test_then_add<ST>((volatile ST *)&sh->u.s.ordered_iteration, inc);
1152 }
1153 // }
1154 }
1155 KD_TRACE(100, ("__kmp_dispatch_finish_chunk: T#%d returned\n", gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_finish_chunk: T#%d returned\n"
, gtid); }
;
1156}
1157
1158#endif /* KMP_GOMP_COMPAT */
1159
1160template <typename T>
1161int __kmp_dispatch_next_algorithm(int gtid,
1162 dispatch_private_info_template<T> *pr,
1163 dispatch_shared_info_template<T> volatile *sh,
1164 kmp_int32 *p_last, T *p_lb, T *p_ub,
1165 typename traits_t<T>::signed_t *p_st, T nproc,
1166 T tid) {
1167 typedef typename traits_t<T>::unsigned_t UT;
1168 typedef typename traits_t<T>::signed_t ST;
1169 typedef typename traits_t<T>::floating_t DBL;
1170 int status = 0;
1171 bool last = false;
1172 T start;
1173 ST incr;
1174 UT limit, trip, init;
1175 kmp_info_t *th = __kmp_threads[gtid];
1176 kmp_team_t *team = th->th.th_team;
1177
1178 KMP_DEBUG_ASSERT(th->th.th_dispatch ==if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "openmp/runtime/src/kmp_dispatch.cpp", 1179); }
1179 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid])if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "openmp/runtime/src/kmp_dispatch.cpp", 1179); }
;
1180 KMP_DEBUG_ASSERT(pr)if (!(pr)) { __kmp_debug_assert("pr", "openmp/runtime/src/kmp_dispatch.cpp"
, 1180); }
;
1181 KMP_DEBUG_ASSERT(sh)if (!(sh)) { __kmp_debug_assert("sh", "openmp/runtime/src/kmp_dispatch.cpp"
, 1181); }
;
1182 KMP_DEBUG_ASSERT(tid >= 0 && tid < nproc)if (!(tid >= 0 && tid < nproc)) { __kmp_debug_assert
("tid >= 0 && tid < nproc", "openmp/runtime/src/kmp_dispatch.cpp"
, 1182); }
;
1183#ifdef KMP_DEBUG1
1184 {
1185 char *buff;
1186 // create format specifiers before the debug output
1187 buff =
1188 __kmp_str_format("__kmp_dispatch_next_algorithm: T#%%d called pr:%%p "
1189 "sh:%%p nproc:%%%s tid:%%%s\n",
1190 traits_t<T>::spec, traits_t<T>::spec);
1191 KD_TRACE(10, (buff, gtid, pr, sh, nproc, tid))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, pr
, sh, nproc, tid); }
;
1192 __kmp_str_free(&buff);
1193 }
1194#endif
1195
1196 // zero trip count
1197 if (pr->u.p.tc == 0) {
1198 KD_TRACE(10,if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d early exit trip count is "
"zero status:%d\n", gtid, status); }
1199 ("__kmp_dispatch_next_algorithm: T#%d early exit trip count is "if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d early exit trip count is "
"zero status:%d\n", gtid, status); }
1200 "zero status:%d\n",if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d early exit trip count is "
"zero status:%d\n", gtid, status); }
1201 gtid, status))if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d early exit trip count is "
"zero status:%d\n", gtid, status); }
;
1202 return 0;
1203 }
1204
1205 switch (pr->schedule) {
1206#if KMP_STATIC_STEAL_ENABLED1
1207 case kmp_sch_static_steal: {
1208 T chunk = pr->u.p.parm1;
1209 UT nchunks = pr->u.p.parm2;
1210 KD_TRACE(100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_steal case\n"
, gtid); }
1211 ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_steal case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_steal case\n"
, gtid); }
1212 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_steal case\n"
, gtid); }
;
1213
1214 trip = pr->u.p.tc - 1;
1215
1216 if (traits_t<T>::type_size > 4) {
1217 // use lock for 8-byte induction variable.
1218 // TODO (optional): check presence and use 16-byte CAS
1219 kmp_lock_t *lck = pr->u.p.steal_lock;
1220 KMP_DEBUG_ASSERT(lck != NULL)if (!(lck != __null)) { __kmp_debug_assert("lck != __null", "openmp/runtime/src/kmp_dispatch.cpp"
, 1220); }
;
1221 if (pr->u.p.count < (UT)pr->u.p.ub) {
1222 KMP_DEBUG_ASSERT(pr->steal_flag == READY)if (!(pr->steal_flag == READY)) { __kmp_debug_assert("pr->steal_flag == READY"
, "openmp/runtime/src/kmp_dispatch.cpp", 1222); }
;
1223 __kmp_acquire_lock(lck, gtid);
1224 // try to get own chunk of iterations
1225 init = (pr->u.p.count)++;
1226 status = (init < (UT)pr->u.p.ub);
1227 __kmp_release_lock(lck, gtid);
1228 } else {
1229 status = 0; // no own chunks
1230 }
1231 if (!status) { // try to steal
1232 kmp_lock_t *lckv; // victim buffer's lock
1233 T while_limit = pr->u.p.parm3;
1234 T while_index = 0;
1235 int idx = (th->th.th_dispatch->th_disp_index - 1) %
1236 __kmp_dispatch_num_buffers; // current loop index
1237 // note: victim thread can potentially execute another loop
1238 KMP_ATOMIC_ST_REL(&pr->steal_flag, THIEF)(&pr->steal_flag)->store(THIEF, std::memory_order_release
)
; // mark self buffer inactive
1239 while ((!status) && (while_limit != ++while_index)) {
1240 dispatch_private_info_template<T> *v;
1241 T remaining;
1242 T victimId = pr->u.p.parm4;
1243 T oldVictimId = victimId ? victimId - 1 : nproc - 1;
1244 v = reinterpret_cast<dispatch_private_info_template<T> *>(
1245 &team->t.t_dispatch[victimId].th_disp_buffer[idx]);
1246 KMP_DEBUG_ASSERT(v)if (!(v)) { __kmp_debug_assert("v", "openmp/runtime/src/kmp_dispatch.cpp"
, 1246); }
;
1247 while ((v == pr || KMP_ATOMIC_LD_RLX(&v->steal_flag)(&v->steal_flag)->load(std::memory_order_relaxed) == THIEF) &&
1248 oldVictimId != victimId) {
1249 victimId = (victimId + 1) % nproc;
1250 v = reinterpret_cast<dispatch_private_info_template<T> *>(
1251 &team->t.t_dispatch[victimId].th_disp_buffer[idx]);
1252 KMP_DEBUG_ASSERT(v)if (!(v)) { __kmp_debug_assert("v", "openmp/runtime/src/kmp_dispatch.cpp"
, 1252); }
;
1253 }
1254 if (v == pr || KMP_ATOMIC_LD_RLX(&v->steal_flag)(&v->steal_flag)->load(std::memory_order_relaxed) == THIEF) {
1255 continue; // try once more (nproc attempts in total)
1256 }
1257 if (KMP_ATOMIC_LD_RLX(&v->steal_flag)(&v->steal_flag)->load(std::memory_order_relaxed) == UNUSED) {
1258 kmp_uint32 old = UNUSED;
1259 // try to steal whole range from inactive victim
1260 status = v->steal_flag.compare_exchange_strong(old, THIEF);
1261 if (status) {
1262 // initialize self buffer with victim's whole range of chunks
1263 T id = victimId;
1264 T small_chunk, extras;
1265 small_chunk = nchunks / nproc; // chunks per thread
1266 extras = nchunks % nproc;
1267 init = id * small_chunk + (id < extras ? id : extras);
1268 __kmp_acquire_lock(lck, gtid);
1269 pr->u.p.count = init + 1; // exclude one we execute immediately
1270 pr->u.p.ub = init + small_chunk + (id < extras ? 1 : 0);
1271 __kmp_release_lock(lck, gtid);
1272 pr->u.p.parm4 = (id + 1) % nproc; // remember neighbour tid
1273 // no need to reinitialize other thread invariants: lb, st, etc.
1274#ifdef KMP_DEBUG1
1275 {
1276 char *buff;
1277 // create format specifiers before the debug output
1278 buff = __kmp_str_format(
1279 "__kmp_dispatch_next: T#%%d stolen chunks from T#%%d, "
1280 "count:%%%s ub:%%%s\n",
1281 traits_t<UT>::spec, traits_t<T>::spec);
1282 KD_TRACE(10, (buff, gtid, id, pr->u.p.count, pr->u.p.ub))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, id
, pr->u.p.count, pr->u.p.ub); }
;
1283 __kmp_str_free(&buff);
1284 }
1285#endif
1286 // activate non-empty buffer and let others steal from us
1287 if (pr->u.p.count < (UT)pr->u.p.ub)
1288 KMP_ATOMIC_ST_REL(&pr->steal_flag, READY)(&pr->steal_flag)->store(READY, std::memory_order_release
)
;
1289 break;
1290 }
1291 }
1292 if (KMP_ATOMIC_LD_RLX(&v->steal_flag)(&v->steal_flag)->load(std::memory_order_relaxed) != READY ||
1293 v->u.p.count >= (UT)v->u.p.ub) {
1294 pr->u.p.parm4 = (victimId + 1) % nproc; // shift start victim tid
1295 continue; // no chunks to steal, try next victim
1296 }
1297 lckv = v->u.p.steal_lock;
1298 KMP_ASSERT(lckv != NULL)if (!(lckv != __null)) { __kmp_debug_assert("lckv != NULL", "openmp/runtime/src/kmp_dispatch.cpp"
, 1298); }
;
1299 __kmp_acquire_lock(lckv, gtid);
1300 limit = v->u.p.ub; // keep initial ub
1301 if (v->u.p.count >= limit) {
1302 __kmp_release_lock(lckv, gtid);
1303 pr->u.p.parm4 = (victimId + 1) % nproc; // shift start victim tid
1304 continue; // no chunks to steal, try next victim
1305 }
1306
1307 // stealing succeded, reduce victim's ub by 1/4 of undone chunks
1308 // TODO: is this heuristics good enough??
1309 remaining = limit - v->u.p.count;
1310 if (remaining > 7) {
1311 // steal 1/4 of remaining
1312 KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen, remaining >> 2)((void)0);
1313 init = (v->u.p.ub -= (remaining >> 2));
1314 } else {
1315 // steal 1 chunk of 1..7 remaining
1316 KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen, 1)((void)0);
1317 init = (v->u.p.ub -= 1);
1318 }
1319 __kmp_release_lock(lckv, gtid);
1320#ifdef KMP_DEBUG1
1321 {
1322 char *buff;
1323 // create format specifiers before the debug output
1324 buff = __kmp_str_format(
1325 "__kmp_dispatch_next: T#%%d stolen chunks from T#%%d, "
1326 "count:%%%s ub:%%%s\n",
1327 traits_t<UT>::spec, traits_t<UT>::spec);
1328 KD_TRACE(10, (buff, gtid, victimId, init, limit))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, victimId
, init, limit); }
;
1329 __kmp_str_free(&buff);
1330 }
1331#endif
1332 KMP_DEBUG_ASSERT(init + 1 <= limit)if (!(init + 1 <= limit)) { __kmp_debug_assert("init + 1 <= limit"
, "openmp/runtime/src/kmp_dispatch.cpp", 1332); }
;
1333 pr->u.p.parm4 = victimId; // remember victim to steal from
1334 status = 1;
1335 // now update own count and ub with stolen range excluding init chunk
1336 __kmp_acquire_lock(lck, gtid);
1337 pr->u.p.count = init + 1;
1338 pr->u.p.ub = limit;
1339 __kmp_release_lock(lck, gtid);
1340 // activate non-empty buffer and let others steal from us
1341 if (init + 1 < limit)
1342 KMP_ATOMIC_ST_REL(&pr->steal_flag, READY)(&pr->steal_flag)->store(READY, std::memory_order_release
)
;
1343 } // while (search for victim)
1344 } // if (try to find victim and steal)
1345 } else {
1346 // 4-byte induction variable, use 8-byte CAS for pair (count, ub)
1347 // as all operations on pair (count, ub) must be done atomically
1348 typedef union {
1349 struct {
1350 UT count;
1351 T ub;
1352 } p;
1353 kmp_int64 b;
1354 } union_i4;
1355 union_i4 vold, vnew;
1356 if (pr->u.p.count < (UT)pr->u.p.ub) {
1357 KMP_DEBUG_ASSERT(pr->steal_flag == READY)if (!(pr->steal_flag == READY)) { __kmp_debug_assert("pr->steal_flag == READY"
, "openmp/runtime/src/kmp_dispatch.cpp", 1357); }
;
1358 vold.b = *(volatile kmp_int64 *)(&pr->u.p.count);
1359 vnew.b = vold.b;
1360 vnew.p.count++; // get chunk from head of self range
1361 while (!KMP_COMPARE_AND_STORE_REL64(__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile
kmp_int64 *)&pr->u.p.count), (kmp_uint64)(*(kmp_int64
*) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b))
1362 (volatile kmp_int64 *)&pr->u.p.count,__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile
kmp_int64 *)&pr->u.p.count), (kmp_uint64)(*(kmp_int64
*) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b))
1363 *VOLATILE_CAST(kmp_int64 *) & vold.b,__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile
kmp_int64 *)&pr->u.p.count), (kmp_uint64)(*(kmp_int64
*) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b))
1364 *VOLATILE_CAST(kmp_int64 *) & vnew.b)__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile
kmp_int64 *)&pr->u.p.count), (kmp_uint64)(*(kmp_int64
*) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b))
) {
1365 KMP_CPU_PAUSE()__kmp_x86_pause();
1366 vold.b = *(volatile kmp_int64 *)(&pr->u.p.count);
1367 vnew.b = vold.b;
1368 vnew.p.count++;
1369 }
1370 init = vold.p.count;
1371 status = (init < (UT)vold.p.ub);
1372 } else {
1373 status = 0; // no own chunks
1374 }
1375 if (!status) { // try to steal
1376 T while_limit = pr->u.p.parm3;
1377 T while_index = 0;
1378 int idx = (th->th.th_dispatch->th_disp_index - 1) %
1379 __kmp_dispatch_num_buffers; // current loop index
1380 // note: victim thread can potentially execute another loop
1381 KMP_ATOMIC_ST_REL(&pr->steal_flag, THIEF)(&pr->steal_flag)->store(THIEF, std::memory_order_release
)
; // mark self buffer inactive
1382 while ((!status) && (while_limit != ++while_index)) {
1383 dispatch_private_info_template<T> *v;
1384 T remaining;
1385 T victimId = pr->u.p.parm4;
1386 T oldVictimId = victimId ? victimId - 1 : nproc - 1;
1387 v = reinterpret_cast<dispatch_private_info_template<T> *>(
1388 &team->t.t_dispatch[victimId].th_disp_buffer[idx]);
1389 KMP_DEBUG_ASSERT(v)if (!(v)) { __kmp_debug_assert("v", "openmp/runtime/src/kmp_dispatch.cpp"
, 1389); }
;
1390 while ((v == pr || KMP_ATOMIC_LD_RLX(&v->steal_flag)(&v->steal_flag)->load(std::memory_order_relaxed) == THIEF) &&
1391 oldVictimId != victimId) {
1392 victimId = (victimId + 1) % nproc;
1393 v = reinterpret_cast<dispatch_private_info_template<T> *>(
1394 &team->t.t_dispatch[victimId].th_disp_buffer[idx]);
1395 KMP_DEBUG_ASSERT(v)if (!(v)) { __kmp_debug_assert("v", "openmp/runtime/src/kmp_dispatch.cpp"
, 1395); }
;
1396 }
1397 if (v == pr || KMP_ATOMIC_LD_RLX(&v->steal_flag)(&v->steal_flag)->load(std::memory_order_relaxed) == THIEF) {
1398 continue; // try once more (nproc attempts in total)
1399 }
1400 if (KMP_ATOMIC_LD_RLX(&v->steal_flag)(&v->steal_flag)->load(std::memory_order_relaxed) == UNUSED) {
1401 kmp_uint32 old = UNUSED;
1402 // try to steal whole range from inactive victim
1403 status = v->steal_flag.compare_exchange_strong(old, THIEF);
1404 if (status) {
1405 // initialize self buffer with victim's whole range of chunks
1406 T id = victimId;
1407 T small_chunk, extras;
1408 small_chunk = nchunks / nproc; // chunks per thread
1409 extras = nchunks % nproc;
1410 init = id * small_chunk + (id < extras ? id : extras);
1411 vnew.p.count = init + 1;
1412 vnew.p.ub = init + small_chunk + (id < extras ? 1 : 0);
1413 // write pair (count, ub) at once atomically
1414#if KMP_ARCH_X860
1415 KMP_XCHG_FIXED64((volatile kmp_int64 *)(&pr->u.p.count), vnew.b)__sync_lock_test_and_set((volatile kmp_uint64 *)((volatile kmp_int64
*)(&pr->u.p.count)), (kmp_uint64)(vnew.b))
;
1416#else
1417 *(volatile kmp_int64 *)(&pr->u.p.count) = vnew.b;
1418#endif
1419 pr->u.p.parm4 = (id + 1) % nproc; // remember neighbour tid
1420 // no need to initialize other thread invariants: lb, st, etc.
1421#ifdef KMP_DEBUG1
1422 {
1423 char *buff;
1424 // create format specifiers before the debug output
1425 buff = __kmp_str_format(
1426 "__kmp_dispatch_next: T#%%d stolen chunks from T#%%d, "
1427 "count:%%%s ub:%%%s\n",
1428 traits_t<UT>::spec, traits_t<T>::spec);
1429 KD_TRACE(10, (buff, gtid, id, pr->u.p.count, pr->u.p.ub))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, id
, pr->u.p.count, pr->u.p.ub); }
;
1430 __kmp_str_free(&buff);
1431 }
1432#endif
1433 // activate non-empty buffer and let others steal from us
1434 if (pr->u.p.count < (UT)pr->u.p.ub)
1435 KMP_ATOMIC_ST_REL(&pr->steal_flag, READY)(&pr->steal_flag)->store(READY, std::memory_order_release
)
;
1436 break;
1437 }
1438 }
1439 while (1) { // CAS loop with check if victim still has enough chunks
1440 // many threads may be stealing concurrently from same victim
1441 vold.b = *(volatile kmp_int64 *)(&v->u.p.count);
1442 if (KMP_ATOMIC_LD_ACQ(&v->steal_flag)(&v->steal_flag)->load(std::memory_order_acquire) != READY ||
1443 vold.p.count >= (UT)vold.p.ub) {
1444 pr->u.p.parm4 = (victimId + 1) % nproc; // shift start victim id
1445 break; // no chunks to steal, try next victim
1446 }
1447 vnew.b = vold.b;
1448 remaining = vold.p.ub - vold.p.count;
1449 // try to steal 1/4 of remaining
1450 // TODO: is this heuristics good enough??
1451 if (remaining > 7) {
1452 vnew.p.ub -= remaining >> 2; // steal from tail of victim's range
1453 } else {
1454 vnew.p.ub -= 1; // steal 1 chunk of 1..7 remaining
1455 }
1456 KMP_DEBUG_ASSERT(vnew.p.ub * (UT)chunk <= trip)if (!(vnew.p.ub * (UT)chunk <= trip)) { __kmp_debug_assert
("vnew.p.ub * (UT)chunk <= trip", "openmp/runtime/src/kmp_dispatch.cpp"
, 1456); }
;
1457 if (KMP_COMPARE_AND_STORE_REL64(__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile
kmp_int64 *)&v->u.p.count), (kmp_uint64)(*(kmp_int64 *
) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b))
1458 (volatile kmp_int64 *)&v->u.p.count,__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile
kmp_int64 *)&v->u.p.count), (kmp_uint64)(*(kmp_int64 *
) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b))
1459 *VOLATILE_CAST(kmp_int64 *) & vold.b,__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile
kmp_int64 *)&v->u.p.count), (kmp_uint64)(*(kmp_int64 *
) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b))
1460 *VOLATILE_CAST(kmp_int64 *) & vnew.b)__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile
kmp_int64 *)&v->u.p.count), (kmp_uint64)(*(kmp_int64 *
) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b))
) {
1461 // stealing succedded
1462#ifdef KMP_DEBUG1
1463 {
1464 char *buff;
1465 // create format specifiers before the debug output
1466 buff = __kmp_str_format(
1467 "__kmp_dispatch_next: T#%%d stolen chunks from T#%%d, "
1468 "count:%%%s ub:%%%s\n",
1469 traits_t<T>::spec, traits_t<T>::spec);
1470 KD_TRACE(10, (buff, gtid, victimId, vnew.p.ub, vold.p.ub))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, victimId
, vnew.p.ub, vold.p.ub); }
;
1471 __kmp_str_free(&buff);
1472 }
1473#endif
1474 KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen,((void)0)
1475 vold.p.ub - vnew.p.ub)((void)0);
1476 status = 1;
1477 pr->u.p.parm4 = victimId; // keep victim id
1478 // now update own count and ub
1479 init = vnew.p.ub;
1480 vold.p.count = init + 1;
1481#if KMP_ARCH_X860
1482 KMP_XCHG_FIXED64((volatile kmp_int64 *)(&pr->u.p.count), vold.b)__sync_lock_test_and_set((volatile kmp_uint64 *)((volatile kmp_int64
*)(&pr->u.p.count)), (kmp_uint64)(vold.b))
;
1483#else
1484 *(volatile kmp_int64 *)(&pr->u.p.count) = vold.b;
1485#endif
1486 // activate non-empty buffer and let others steal from us
1487 if (vold.p.count < (UT)vold.p.ub)
1488 KMP_ATOMIC_ST_REL(&pr->steal_flag, READY)(&pr->steal_flag)->store(READY, std::memory_order_release
)
;
1489 break;
1490 } // if (check CAS result)
1491 KMP_CPU_PAUSE()__kmp_x86_pause(); // CAS failed, repeatedly attempt
1492 } // while (try to steal from particular victim)
1493 } // while (search for victim)
1494 } // if (try to find victim and steal)
1495 } // if (4-byte induction variable)
1496 if (!status) {
1497 *p_lb = 0;
1498 *p_ub = 0;
1499 if (p_st != NULL__null)
1500 *p_st = 0;
1501 } else {
1502 start = pr->u.p.lb;
1503 init *= chunk;
1504 limit = chunk + init - 1;
1505 incr = pr->u.p.st;
1506 KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_chunks, 1)((void)0);
1507
1508 KMP_DEBUG_ASSERT(init <= trip)if (!(init <= trip)) { __kmp_debug_assert("init <= trip"
, "openmp/runtime/src/kmp_dispatch.cpp", 1508); }
;
1509 // keep track of done chunks for possible early exit from stealing
1510 // TODO: count executed chunks locally with rare update of shared location
1511 // test_then_inc<ST>((volatile ST *)&sh->u.s.iteration);
1512 if ((last = (limit >= trip)) != 0)
1513 limit = trip;
1514 if (p_st != NULL__null)
1515 *p_st = incr;
1516
1517 if (incr == 1) {
1518 *p_lb = start + init;
1519 *p_ub = start + limit;
1520 } else {
1521 *p_lb = start + init * incr;
1522 *p_ub = start + limit * incr;
1523 }
1524 } // if
1525 break;
1526 } // case
1527#endif // KMP_STATIC_STEAL_ENABLED
1528 case kmp_sch_static_balanced: {
1529 KD_TRACE(if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n"
, gtid); }
1530 10,if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n"
, gtid); }
1531 ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n",if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n"
, gtid); }
1532 gtid))if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n"
, gtid); }
;
1533 /* check if thread has any iteration to do */
1534 if ((status = !pr->u.p.count) != 0) {
1535 pr->u.p.count = 1;
1536 *p_lb = pr->u.p.lb;
1537 *p_ub = pr->u.p.ub;
1538 last = (pr->u.p.parm1 != 0);
1539 if (p_st != NULL__null)
1540 *p_st = pr->u.p.st;
1541 } else { /* no iterations to do */
1542 pr->u.p.lb = pr->u.p.ub + pr->u.p.st;
1543 }
1544 } // case
1545 break;
1546 case kmp_sch_static_greedy: /* original code for kmp_sch_static_greedy was
1547 merged here */
1548 case kmp_sch_static_chunked: {
1549 T parm1;
1550
1551 KD_TRACE(100, ("__kmp_dispatch_next_algorithm: T#%d "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d "
"kmp_sch_static_[affinity|chunked] case\n", gtid); }
1552 "kmp_sch_static_[affinity|chunked] case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d "
"kmp_sch_static_[affinity|chunked] case\n", gtid); }
1553 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d "
"kmp_sch_static_[affinity|chunked] case\n", gtid); }
;
1554 parm1 = pr->u.p.parm1;
1555
1556 trip = pr->u.p.tc - 1;
1557 init = parm1 * (pr->u.p.count + tid);
1558
1559 if ((status = (init <= trip)) != 0) {
1560 start = pr->u.p.lb;
1561 incr = pr->u.p.st;
1562 limit = parm1 + init - 1;
1563
1564 if ((last = (limit >= trip)) != 0)
1565 limit = trip;
1566
1567 if (p_st != NULL__null)
1568 *p_st = incr;
1569
1570 pr->u.p.count += nproc;
1571
1572 if (incr == 1) {
1573 *p_lb = start + init;
1574 *p_ub = start + limit;
1575 } else {
1576 *p_lb = start + init * incr;
1577 *p_ub = start + limit * incr;
1578 }
1579
1580 if (pr->flags.ordered) {
1581 pr->u.p.ordered_lower = init;
1582 pr->u.p.ordered_upper = limit;
1583 } // if
1584 } // if
1585 } // case
1586 break;
1587
1588 case kmp_sch_dynamic_chunked: {
1589 UT chunk_number;
1590 UT chunk_size = pr->u.p.parm1;
1591 UT nchunks = pr->u.p.parm2;
1592
1593 KD_TRACE(if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n"
, gtid); }
1594 100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n"
, gtid); }
1595 ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n"
, gtid); }
1596 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n"
, gtid); }
;
1597
1598 chunk_number = test_then_inc_acq<ST>((volatile ST *)&sh->u.s.iteration);
1599 status = (chunk_number < nchunks);
1600 if (!status) {
1601 *p_lb = 0;
1602 *p_ub = 0;
1603 if (p_st != NULL__null)
1604 *p_st = 0;
1605 } else {
1606 init = chunk_size * chunk_number;
1607 trip = pr->u.p.tc - 1;
1608 start = pr->u.p.lb;
1609 incr = pr->u.p.st;
1610
1611 if ((last = (trip - init < (UT)chunk_size)))
1612 limit = trip;
1613 else
1614 limit = chunk_size + init - 1;
1615
1616 if (p_st != NULL__null)
1617 *p_st = incr;
1618
1619 if (incr == 1) {
1620 *p_lb = start + init;
1621 *p_ub = start + limit;
1622 } else {
1623 *p_lb = start + init * incr;
1624 *p_ub = start + limit * incr;
1625 }
1626
1627 if (pr->flags.ordered) {
1628 pr->u.p.ordered_lower = init;
1629 pr->u.p.ordered_upper = limit;
1630 } // if
1631 } // if
1632 } // case
1633 break;
1634
1635 case kmp_sch_guided_iterative_chunked: {
1636 T chunkspec = pr->u.p.parm1;
1637 KD_TRACE(100, ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_chunked "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_chunked "
"iterative case\n", gtid); }
1638 "iterative case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_chunked "
"iterative case\n", gtid); }
1639 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_chunked "
"iterative case\n", gtid); }
;
1640 trip = pr->u.p.tc;
1641 // Start atomic part of calculations
1642 while (1) {
1643 ST remaining; // signed, because can be < 0
1644 init = sh->u.s.iteration; // shared value
1645 remaining = trip - init;
1646 if (remaining <= 0) { // AC: need to compare with 0 first
1647 // nothing to do, don't try atomic op
1648 status = 0;
1649 break;
1650 }
1651 if ((T)remaining <
1652 pr->u.p.parm2) { // compare with K*nproc*(chunk+1), K=2 by default
1653 // use dynamic-style schedule
1654 // atomically increment iterations, get old value
1655 init = test_then_add<ST>(RCAST(volatile ST *, &sh->u.s.iteration)reinterpret_cast<volatile ST *>(&sh->u.s.iteration
)
,
1656 (ST)chunkspec);
1657 remaining = trip - init;
1658 if (remaining <= 0) {
1659 status = 0; // all iterations got by other threads
1660 } else {
1661 // got some iterations to work on
1662 status = 1;
1663 if ((T)remaining > chunkspec) {
1664 limit = init + chunkspec - 1;
1665 } else {
1666 last = true; // the last chunk
1667 limit = init + remaining - 1;
1668 } // if
1669 } // if
1670 break;
1671 } // if
1672 limit = init + (UT)((double)remaining *
1673 *(double *)&pr->u.p.parm3); // divide by K*nproc
1674 if (compare_and_swap<ST>(RCAST(volatile ST *, &sh->u.s.iteration)reinterpret_cast<volatile ST *>(&sh->u.s.iteration
)
,
1675 (ST)init, (ST)limit)) {
1676 // CAS was successful, chunk obtained
1677 status = 1;
1678 --limit;
1679 break;
1680 } // if
1681 } // while
1682 if (status != 0) {
1683 start = pr->u.p.lb;
1684 incr = pr->u.p.st;
1685 if (p_st != NULL__null)
1686 *p_st = incr;
1687 *p_lb = start + init * incr;
1688 *p_ub = start + limit * incr;
1689 if (pr->flags.ordered) {
1690 pr->u.p.ordered_lower = init;
1691 pr->u.p.ordered_upper = limit;
1692 } // if
1693 } else {
1694 *p_lb = 0;
1695 *p_ub = 0;
1696 if (p_st != NULL__null)
1697 *p_st = 0;
1698 } // if
1699 } // case
1700 break;
1701
1702 case kmp_sch_guided_simd: {
1703 // same as iterative but curr-chunk adjusted to be multiple of given
1704 // chunk
1705 T chunk = pr->u.p.parm1;
1706 KD_TRACE(100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_simd case\n"
, gtid); }
1707 ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_simd case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_simd case\n"
, gtid); }
1708 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_simd case\n"
, gtid); }
;
1709 trip = pr->u.p.tc;
1710 // Start atomic part of calculations
1711 while (1) {
1712 ST remaining; // signed, because can be < 0
1713 init = sh->u.s.iteration; // shared value
1714 remaining = trip - init;
1715 if (remaining <= 0) { // AC: need to compare with 0 first
1716 status = 0; // nothing to do, don't try atomic op
1717 break;
1718 }
1719 KMP_DEBUG_ASSERT(chunk && init % chunk == 0)if (!(chunk && init % chunk == 0)) { __kmp_debug_assert
("chunk && init % chunk == 0", "openmp/runtime/src/kmp_dispatch.cpp"
, 1719); }
;
1720 // compare with K*nproc*(chunk+1), K=2 by default
1721 if ((T)remaining < pr->u.p.parm2) {
1722 // use dynamic-style schedule
1723 // atomically increment iterations, get old value
1724 init = test_then_add<ST>(RCAST(volatile ST *, &sh->u.s.iteration)reinterpret_cast<volatile ST *>(&sh->u.s.iteration
)
,
1725 (ST)chunk);
1726 remaining = trip - init;
1727 if (remaining <= 0) {
1728 status = 0; // all iterations got by other threads
1729 } else {
1730 // got some iterations to work on
1731 status = 1;
1732 if ((T)remaining > chunk) {
1733 limit = init + chunk - 1;
1734 } else {
1735 last = true; // the last chunk
1736 limit = init + remaining - 1;
1737 } // if
1738 } // if
1739 break;
1740 } // if
1741 // divide by K*nproc
1742 UT span;
1743 __kmp_type_convert((double)remaining * (*(double *)&pr->u.p.parm3),
1744 &span);
1745 UT rem = span % chunk;
1746 if (rem) // adjust so that span%chunk == 0
1747 span += chunk - rem;
1748 limit = init + span;
1749 if (compare_and_swap<ST>(RCAST(volatile ST *, &sh->u.s.iteration)reinterpret_cast<volatile ST *>(&sh->u.s.iteration
)
,
1750 (ST)init, (ST)limit)) {
1751 // CAS was successful, chunk obtained
1752 status = 1;
1753 --limit;
1754 break;
1755 } // if
1756 } // while
1757 if (status != 0) {
1758 start = pr->u.p.lb;
1759 incr = pr->u.p.st;
1760 if (p_st != NULL__null)
1761 *p_st = incr;
1762 *p_lb = start + init * incr;
1763 *p_ub = start + limit * incr;
1764 if (pr->flags.ordered) {
1765 pr->u.p.ordered_lower = init;
1766 pr->u.p.ordered_upper = limit;
1767 } // if
1768 } else {
1769 *p_lb = 0;
1770 *p_ub = 0;
1771 if (p_st != NULL__null)
1772 *p_st = 0;
1773 } // if
1774 } // case
1775 break;
1776
1777 case kmp_sch_guided_analytical_chunked: {
1778 T chunkspec = pr->u.p.parm1;
1779 UT chunkIdx;
1780#if KMP_USE_X87CONTROL0
1781 /* for storing original FPCW value for Windows* OS on
1782 IA-32 architecture 8-byte version */
1783 unsigned int oldFpcw;
1784 unsigned int fpcwSet = 0;
1785#endif
1786 KD_TRACE(100, ("__kmp_dispatch_next_algorithm: T#%d "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d "
"kmp_sch_guided_analytical_chunked case\n", gtid); }
1787 "kmp_sch_guided_analytical_chunked case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d "
"kmp_sch_guided_analytical_chunked case\n", gtid); }
1788 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d "
"kmp_sch_guided_analytical_chunked case\n", gtid); }
;
1789
1790 trip = pr->u.p.tc;
1791
1792 KMP_DEBUG_ASSERT(nproc > 1)if (!(nproc > 1)) { __kmp_debug_assert("nproc > 1", "openmp/runtime/src/kmp_dispatch.cpp"
, 1792); }
;
1793 KMP_DEBUG_ASSERT((2UL * chunkspec + 1) * (UT)nproc < trip)if (!((2UL * chunkspec + 1) * (UT)nproc < trip)) { __kmp_debug_assert
("(2UL * chunkspec + 1) * (UT)nproc < trip", "openmp/runtime/src/kmp_dispatch.cpp"
, 1793); }
;
1794
1795 while (1) { /* this while loop is a safeguard against unexpected zero
1796 chunk sizes */
1797 chunkIdx = test_then_inc_acq<ST>((volatile ST *)&sh->u.s.iteration);
1798 if (chunkIdx >= (UT)pr->u.p.parm2) {
1799 --trip;
1800 /* use dynamic-style scheduling */
1801 init = chunkIdx * chunkspec + pr->u.p.count;
1802 /* need to verify init > 0 in case of overflow in the above
1803 * calculation */
1804 if ((status = (init > 0 && init <= trip)) != 0) {
1805 limit = init + chunkspec - 1;
1806
1807 if ((last = (limit >= trip)) != 0)
1808 limit = trip;
1809 }
1810 break;
1811 } else {
1812/* use exponential-style scheduling */
1813/* The following check is to workaround the lack of long double precision on
1814 Windows* OS.
1815 This check works around the possible effect that init != 0 for chunkIdx == 0.
1816 */
1817#if KMP_USE_X87CONTROL0
1818 /* If we haven't already done so, save original
1819 FPCW and set precision to 64-bit, as Windows* OS
1820 on IA-32 architecture defaults to 53-bit */
1821 if (!fpcwSet) {
1822 oldFpcw = _control87(0, 0);
1823 _control87(_PC_64, _MCW_PC);
1824 fpcwSet = 0x30000;
1825 }
1826#endif
1827 if (chunkIdx) {
1828 init = __kmp_dispatch_guided_remaining<T>(
1829 trip, *(DBL *)&pr->u.p.parm3, chunkIdx);
1830 KMP_DEBUG_ASSERT(init)if (!(init)) { __kmp_debug_assert("init", "openmp/runtime/src/kmp_dispatch.cpp"
, 1830); }
;
1831 init = trip - init;
1832 } else
1833 init = 0;
1834 limit = trip - __kmp_dispatch_guided_remaining<T>(
1835 trip, *(DBL *)&pr->u.p.parm3, chunkIdx + 1);
1836 KMP_ASSERT(init <= limit)if (!(init <= limit)) { __kmp_debug_assert("init <= limit"
, "openmp/runtime/src/kmp_dispatch.cpp", 1836); }
;
1837 if (init < limit) {
1838 KMP_DEBUG_ASSERT(limit <= trip)if (!(limit <= trip)) { __kmp_debug_assert("limit <= trip"
, "openmp/runtime/src/kmp_dispatch.cpp", 1838); }
;
1839 --limit;
1840 status = 1;
1841 break;
1842 } // if
1843 } // if
1844 } // while (1)
1845#if KMP_USE_X87CONTROL0
1846 /* restore FPCW if necessary
1847 AC: check fpcwSet flag first because oldFpcw can be uninitialized here
1848 */
1849 if (fpcwSet && (oldFpcw & fpcwSet))
1850 _control87(oldFpcw, _MCW_PC);
1851#endif
1852 if (status != 0) {
1853 start = pr->u.p.lb;
1854 incr = pr->u.p.st;
1855 if (p_st != NULL__null)
1856 *p_st = incr;
1857 *p_lb = start + init * incr;
1858 *p_ub = start + limit * incr;
1859 if (pr->flags.ordered) {
1860 pr->u.p.ordered_lower = init;
1861 pr->u.p.ordered_upper = limit;
1862 }
1863 } else {
1864 *p_lb = 0;
1865 *p_ub = 0;
1866 if (p_st != NULL__null)
1867 *p_st = 0;
1868 }
1869 } // case
1870 break;
1871
1872 case kmp_sch_trapezoidal: {
1873 UT index;
1874 T parm2 = pr->u.p.parm2;
1875 T parm3 = pr->u.p.parm3;
1876 T parm4 = pr->u.p.parm4;
1877 KD_TRACE(100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_trapezoidal case\n"
, gtid); }
1878 ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_trapezoidal case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_trapezoidal case\n"
, gtid); }
1879 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_trapezoidal case\n"
, gtid); }
;
1880
1881 index = test_then_inc<ST>((volatile ST *)&sh->u.s.iteration);
1882
1883 init = (index * ((2 * parm2) - (index - 1) * parm4)) / 2;
1884 trip = pr->u.p.tc - 1;
1885
1886 if ((status = ((T)index < parm3 && init <= trip)) == 0) {
1887 *p_lb = 0;
1888 *p_ub = 0;
1889 if (p_st != NULL__null)
1890 *p_st = 0;
1891 } else {
1892 start = pr->u.p.lb;
1893 limit = ((index + 1) * (2 * parm2 - index * parm4)) / 2 - 1;
1894 incr = pr->u.p.st;
1895
1896 if ((last = (limit >= trip)) != 0)
1897 limit = trip;
1898
1899 if (p_st != NULL__null)
1900 *p_st = incr;
1901
1902 if (incr == 1) {
1903 *p_lb = start + init;
1904 *p_ub = start + limit;
1905 } else {
1906 *p_lb = start + init * incr;
1907 *p_ub = start + limit * incr;
1908 }
1909
1910 if (pr->flags.ordered) {
1911 pr->u.p.ordered_lower = init;
1912 pr->u.p.ordered_upper = limit;
1913 } // if
1914 } // if
1915 } // case
1916 break;
1917 default: {
1918 status = 0; // to avoid complaints on uninitialized variable use
1919 __kmp_fatal(KMP_MSG(UnknownSchedTypeDetected)__kmp_msg_format(kmp_i18n_msg_UnknownSchedTypeDetected), // Primary message
1920 KMP_HNT(GetNewerLibrary)__kmp_msg_format(kmp_i18n_hnt_GetNewerLibrary), // Hint
1921 __kmp_msg_null // Variadic argument list terminator
1922 );
1923 } break;
1924 } // switch
1925 if (p_last)
1926 *p_last = last;
1927#ifdef KMP_DEBUG1
1928 if (pr->flags.ordered) {
1929 char *buff;
1930 // create format specifiers before the debug output
1931 buff = __kmp_str_format("__kmp_dispatch_next_algorithm: T#%%d "
1932 "ordered_lower:%%%s ordered_upper:%%%s\n",
1933 traits_t<UT>::spec, traits_t<UT>::spec);
1934 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper))if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid,
pr->u.p.ordered_lower, pr->u.p.ordered_upper); }
;
1935 __kmp_str_free(&buff);
1936 }
1937 {
1938 char *buff;
1939 // create format specifiers before the debug output
1940 buff = __kmp_str_format(
1941 "__kmp_dispatch_next_algorithm: T#%%d exit status:%%d p_last:%%d "
1942 "p_lb:%%%s p_ub:%%%s p_st:%%%s\n",
1943 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
1944 KMP_DEBUG_ASSERT(p_last)if (!(p_last)) { __kmp_debug_assert("p_last", "openmp/runtime/src/kmp_dispatch.cpp"
, 1944); }
;
1945 KMP_DEBUG_ASSERT(p_st)if (!(p_st)) { __kmp_debug_assert("p_st", "openmp/runtime/src/kmp_dispatch.cpp"
, 1945); }
;
1946 KD_TRACE(10, (buff, gtid, status, *p_last, *p_lb, *p_ub, *p_st))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, status
, *p_last, *p_lb, *p_ub, *p_st); }
;
1947 __kmp_str_free(&buff);
1948 }
1949#endif
1950 return status;
1951}
1952
1953/* Define a macro for exiting __kmp_dispatch_next(). If status is 0 (no more
1954 work), then tell OMPT the loop is over. In some cases kmp_dispatch_fini()
1955 is not called. */
1956#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1957#define OMPT_LOOP_ENDif (status == 0) { if (ompt_enabled.ompt_callback_work) { ompt_team_info_t
*team_info = __ompt_get_teaminfo(0, __null); ompt_task_info_t
*task_info = __ompt_get_task_info_object(0); ompt_callbacks.
ompt_callback_work_callback( ompt_work_loop, ompt_scope_end, &
(team_info->parallel_data), &(task_info->task_data)
, 0, codeptr); } }
\
1958 if (status == 0) { \
1959 if (ompt_enabled.ompt_callback_work) { \
1960 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL__null); \
1961 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); \
1962 ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback( \
1963 ompt_work_loop, ompt_scope_end, &(team_info->parallel_data), \
1964 &(task_info->task_data), 0, codeptr); \
1965 } \
1966 }
1967#define OMPT_LOOP_DISPATCH(lb, ub, st, status)if (ompt_enabled.ompt_callback_dispatch && status) { ompt_team_info_t
*team_info = __ompt_get_teaminfo(0, __null); ompt_task_info_t
*task_info = __ompt_get_task_info_object(0); ompt_dispatch_chunk_t
chunk; ompt_data_t instance = {0}; do { if (st > 0) { chunk
.start = static_cast<uint64_t>(lb); chunk.iterations = static_cast
<uint64_t>(((ub) - (lb)) / (st) + 1); } else { chunk.start
= static_cast<uint64_t>(ub); chunk.iterations = static_cast
<uint64_t>(((lb) - (ub)) / -(st) + 1); } } while (0); instance
.ptr = &chunk; ompt_callbacks.ompt_callback_dispatch_callback
( &(team_info->parallel_data), &(task_info->task_data
), ompt_dispatch_ws_loop_chunk, instance); }
\
1968 if (ompt_enabled.ompt_callback_dispatch && status) { \
1969 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL__null); \
1970 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); \
1971 ompt_dispatch_chunk_t chunk; \
1972 ompt_data_t instance = ompt_data_none{0}; \
1973 OMPT_GET_DISPATCH_CHUNK(chunk, lb, ub, st)do { if (st > 0) { chunk.start = static_cast<uint64_t>
(lb); chunk.iterations = static_cast<uint64_t>(((ub) - (
lb)) / (st) + 1); } else { chunk.start = static_cast<uint64_t
>(ub); chunk.iterations = static_cast<uint64_t>(((lb
) - (ub)) / -(st) + 1); } } while (0)
; \
1974 instance.ptr = &chunk; \
1975 ompt_callbacks.ompt_callback(ompt_callback_dispatch)ompt_callback_dispatch_callback( \
1976 &(team_info->parallel_data), &(task_info->task_data), \
1977 ompt_dispatch_ws_loop_chunk, instance); \
1978 }
1979// TODO: implement count
1980#else
1981#define OMPT_LOOP_ENDif (status == 0) { if (ompt_enabled.ompt_callback_work) { ompt_team_info_t
*team_info = __ompt_get_teaminfo(0, __null); ompt_task_info_t
*task_info = __ompt_get_task_info_object(0); ompt_callbacks.
ompt_callback_work_callback( ompt_work_loop, ompt_scope_end, &
(team_info->parallel_data), &(task_info->task_data)
, 0, codeptr); } }
// no-op
1982#define OMPT_LOOP_DISPATCH(lb, ub, st, status)if (ompt_enabled.ompt_callback_dispatch && status) { ompt_team_info_t
*team_info = __ompt_get_teaminfo(0, __null); ompt_task_info_t
*task_info = __ompt_get_task_info_object(0); ompt_dispatch_chunk_t
chunk; ompt_data_t instance = {0}; do { if (st > 0) { chunk
.start = static_cast<uint64_t>(lb); chunk.iterations = static_cast
<uint64_t>(((ub) - (lb)) / (st) + 1); } else { chunk.start
= static_cast<uint64_t>(ub); chunk.iterations = static_cast
<uint64_t>(((lb) - (ub)) / -(st) + 1); } } while (0); instance
.ptr = &chunk; ompt_callbacks.ompt_callback_dispatch_callback
( &(team_info->parallel_data), &(task_info->task_data
), ompt_dispatch_ws_loop_chunk, instance); }
// no-op
1983#endif
1984
1985#if KMP_STATS_ENABLED0
1986#define KMP_STATS_LOOP_END \
1987 { \
1988 kmp_int64 u, l, t, i; \
1989 l = (kmp_int64)(*p_lb); \
1990 u = (kmp_int64)(*p_ub); \
1991 i = (kmp_int64)(pr->u.p.st); \
1992 if (status == 0) { \
1993 t = 0; \
1994 KMP_POP_PARTITIONED_TIMER()((void)0); \
1995 } else if (i == 1) { \
1996 if (u >= l) \
1997 t = u - l + 1; \
1998 else \
1999 t = 0; \
2000 } else if (i < 0) { \
2001 if (l >= u) \
2002 t = (l - u) / (-i) + 1; \
2003 else \
2004 t = 0; \
2005 } else { \
2006 if (u >= l) \
2007 t = (u - l) / i + 1; \
2008 else \
2009 t = 0; \
2010 } \
2011 KMP_COUNT_VALUE(OMP_loop_dynamic_iterations, t)((void)0); \
2012 }
2013#else
2014#define KMP_STATS_LOOP_END /* Nothing */
2015#endif
2016
2017template <typename T>
2018static int __kmp_dispatch_next(ident_t *loc, int gtid, kmp_int32 *p_last,
2019 T *p_lb, T *p_ub,
2020 typename traits_t<T>::signed_t *p_st
2021#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2022 ,
2023 void *codeptr
2024#endif
2025) {
2026
2027 typedef typename traits_t<T>::unsigned_t UT;
2028 typedef typename traits_t<T>::signed_t ST;
2029 // This is potentially slightly misleading, schedule(runtime) will appear here
2030 // even if the actual runtime schedule is static. (Which points out a
2031 // disadvantage of schedule(runtime): even when static scheduling is used it
2032 // costs more than a compile time choice to use static scheduling would.)
2033 KMP_TIME_PARTITIONED_BLOCK(OMP_loop_dynamic_scheduling)((void)0);
2034
2035 int status;
2036 dispatch_private_info_template<T> *pr;
2037 __kmp_assert_valid_gtid(gtid);
2038 kmp_info_t *th = __kmp_threads[gtid];
2039 kmp_team_t *team = th->th.th_team;
2040
2041 KMP_DEBUG_ASSERT(p_lb && p_ub && p_st)if (!(p_lb && p_ub && p_st)) { __kmp_debug_assert
("p_lb && p_ub && p_st", "openmp/runtime/src/kmp_dispatch.cpp"
, 2041); }
; // AC: these cannot be NULL
2042 KD_TRACE(if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n"
, gtid, p_lb, p_ub, p_st, p_last); }
2043 1000,if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n"
, gtid, p_lb, p_ub, p_st, p_last); }
2044 ("__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n",if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n"
, gtid, p_lb, p_ub, p_st, p_last); }
2045 gtid, p_lb, p_ub, p_st, p_last))if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n"
, gtid, p_lb, p_ub, p_st, p_last); }
;
2046
2047 if (team->t.t_serialized) {
2048 /* NOTE: serialize this dispatch because we are not at the active level */
2049 pr = reinterpret_cast<dispatch_private_info_template<T> *>(
2050 th->th.th_dispatch->th_disp_buffer); /* top of the stack */
2051 KMP_DEBUG_ASSERT(pr)if (!(pr)) { __kmp_debug_assert("pr", "openmp/runtime/src/kmp_dispatch.cpp"
, 2051); }
;
2052
2053 if ((status = (pr->u.p.tc != 0)) == 0) {
2054 *p_lb = 0;
2055 *p_ub = 0;
2056 // if ( p_last != NULL )
2057 // *p_last = 0;
2058 if (p_st != NULL__null)
2059 *p_st = 0;
2060 if (__kmp_env_consistency_check) {
2061 if (pr->pushed_ws != ct_none) {
2062 pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
2063 }
2064 }
2065 } else if (pr->flags.nomerge) {
2066 kmp_int32 last;
2067 T start;
2068 UT limit, trip, init;
2069 ST incr;
2070 T chunk = pr->u.p.parm1;
2071
2072 KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n"
, gtid); }
2073 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n"
, gtid); }
;
2074
2075 init = chunk * pr->u.p.count++;
2076 trip = pr->u.p.tc - 1;
2077
2078 if ((status = (init <= trip)) == 0) {
2079 *p_lb = 0;
2080 *p_ub = 0;
2081 // if ( p_last != NULL )
2082 // *p_last = 0;
2083 if (p_st != NULL__null)
2084 *p_st = 0;
2085 if (__kmp_env_consistency_check) {
2086 if (pr->pushed_ws != ct_none) {
2087 pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
2088 }
2089 }
2090 } else {
2091 start = pr->u.p.lb;
2092 limit = chunk + init - 1;
2093 incr = pr->u.p.st;
2094
2095 if ((last = (limit >= trip)) != 0) {
2096 limit = trip;
2097#if KMP_OS_WINDOWS0
2098 pr->u.p.last_upper = pr->u.p.ub;
2099#endif /* KMP_OS_WINDOWS */
2100 }
2101 if (p_last != NULL__null)
2102 *p_last = last;
2103 if (p_st != NULL__null)
2104 *p_st = incr;
2105 if (incr == 1) {
2106 *p_lb = start + init;
2107 *p_ub = start + limit;
2108 } else {
2109 *p_lb = start + init * incr;
2110 *p_ub = start + limit * incr;
2111 }
2112
2113 if (pr->flags.ordered) {
2114 pr->u.p.ordered_lower = init;
2115 pr->u.p.ordered_upper = limit;
2116#ifdef KMP_DEBUG1
2117 {
2118 char *buff;
2119 // create format specifiers before the debug output
2120 buff = __kmp_str_format("__kmp_dispatch_next: T#%%d "
2121 "ordered_lower:%%%s ordered_upper:%%%s\n",
2122 traits_t<UT>::spec, traits_t<UT>::spec);
2123 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid,
pr->u.p.ordered_lower, pr->u.p.ordered_upper); }
2124 pr->u.p.ordered_upper))if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid,
pr->u.p.ordered_lower, pr->u.p.ordered_upper); }
;
2125 __kmp_str_free(&buff);
2126 }
2127#endif
2128 } // if
2129 } // if
2130 } else {
2131 pr->u.p.tc = 0;
2132 *p_lb = pr->u.p.lb;
2133 *p_ub = pr->u.p.ub;
2134#if KMP_OS_WINDOWS0
2135 pr->u.p.last_upper = *p_ub;
2136#endif /* KMP_OS_WINDOWS */
2137 if (p_last != NULL__null)
2138 *p_last = TRUE(!0);
2139 if (p_st != NULL__null)
2140 *p_st = pr->u.p.st;
2141 } // if
2142#ifdef KMP_DEBUG1
2143 {
2144 char *buff;
2145 // create format specifiers before the debug output
2146 buff = __kmp_str_format(
2147 "__kmp_dispatch_next: T#%%d serialized case: p_lb:%%%s "
2148 "p_ub:%%%s p_st:%%%s p_last:%%p %%d returning:%%d\n",
2149 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
2150 KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, *p_st, p_last,if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, *
p_lb, *p_ub, *p_st, p_last, (p_last ? *p_last : 0), status); }
2151 (p_last ? *p_last : 0), status))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, *
p_lb, *p_ub, *p_st, p_last, (p_last ? *p_last : 0), status); }
;
2152 __kmp_str_free(&buff);
2153 }
2154#endif
2155#if INCLUDE_SSC_MARKS(1 && 1)
2156 SSC_MARK_DISPATCH_NEXT()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0xd697) : "%ebx")
;
2157#endif
2158 OMPT_LOOP_DISPATCH(*p_lb, *p_ub, pr->u.p.st, status)if (ompt_enabled.ompt_callback_dispatch && status) { ompt_team_info_t
*team_info = __ompt_get_teaminfo(0, __null); ompt_task_info_t
*task_info = __ompt_get_task_info_object(0); ompt_dispatch_chunk_t
chunk; ompt_data_t instance = {0}; do { if (pr->u.p.st >
0) { chunk.start = static_cast<uint64_t>(*p_lb); chunk
.iterations = static_cast<uint64_t>(((*p_ub) - (*p_lb))
/ (pr->u.p.st) + 1); } else { chunk.start = static_cast<
uint64_t>(*p_ub); chunk.iterations = static_cast<uint64_t
>(((*p_lb) - (*p_ub)) / -(pr->u.p.st) + 1); } } while (
0); instance.ptr = &chunk; ompt_callbacks.ompt_callback_dispatch_callback
( &(team_info->parallel_data), &(task_info->task_data
), ompt_dispatch_ws_loop_chunk, instance); }
;
2159 OMPT_LOOP_ENDif (status == 0) { if (ompt_enabled.ompt_callback_work) { ompt_team_info_t
*team_info = __ompt_get_teaminfo(0, __null); ompt_task_info_t
*task_info = __ompt_get_task_info_object(0); ompt_callbacks.
ompt_callback_work_callback( ompt_work_loop, ompt_scope_end, &
(team_info->parallel_data), &(task_info->task_data)
, 0, codeptr); } }
;
2160 KMP_STATS_LOOP_END;
2161 return status;
2162 } else {
2163 kmp_int32 last = 0;
2164 dispatch_shared_info_template<T> volatile *sh;
2165
2166 KMP_DEBUG_ASSERT(th->th.th_dispatch ==if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "openmp/runtime/src/kmp_dispatch.cpp", 2167); }
2167 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid])if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "openmp/runtime/src/kmp_dispatch.cpp", 2167); }
;
2168
2169 pr = reinterpret_cast<dispatch_private_info_template<T> *>(
2170 th->th.th_dispatch->th_dispatch_pr_current);
2171 KMP_DEBUG_ASSERT(pr)if (!(pr)) { __kmp_debug_assert("pr", "openmp/runtime/src/kmp_dispatch.cpp"
, 2171); }
;
2172 sh = reinterpret_cast<dispatch_shared_info_template<T> volatile *>(
2173 th->th.th_dispatch->th_dispatch_sh_current);
2174 KMP_DEBUG_ASSERT(sh)if (!(sh)) { __kmp_debug_assert("sh", "openmp/runtime/src/kmp_dispatch.cpp"
, 2174); }
;
2175
2176#if KMP_USE_HIER_SCHED0
2177 if (pr->flags.use_hier)
2178 status = sh->hier->next(loc, gtid, pr, &last, p_lb, p_ub, p_st);
2179 else
2180#endif // KMP_USE_HIER_SCHED
2181 status = __kmp_dispatch_next_algorithm<T>(gtid, pr, sh, &last, p_lb, p_ub,
2182 p_st, th->th.th_team_nproc,
2183 th->th.th_info.ds.ds_tid);
2184 // status == 0: no more iterations to execute
2185 if (status == 0) {
2186 ST num_done;
2187 num_done = test_then_inc<ST>(&sh->u.s.num_done);
2188#ifdef KMP_DEBUG1
2189 {
2190 char *buff;
2191 // create format specifiers before the debug output
2192 buff = __kmp_str_format(
2193 "__kmp_dispatch_next: T#%%d increment num_done:%%%s\n",
2194 traits_t<ST>::spec);
2195 KD_TRACE(10, (buff, gtid, sh->u.s.num_done))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, sh
->u.s.num_done); }
;
2196 __kmp_str_free(&buff);
2197 }
2198#endif
2199
2200#if KMP_USE_HIER_SCHED0
2201 pr->flags.use_hier = FALSE0;
2202#endif
2203 if (num_done == th->th.th_team_nproc - 1) {
2204#if KMP_STATIC_STEAL_ENABLED1
2205 if (pr->schedule == kmp_sch_static_steal) {
2206 int i;
2207 int idx = (th->th.th_dispatch->th_disp_index - 1) %
2208 __kmp_dispatch_num_buffers; // current loop index
2209 // loop complete, safe to destroy locks used for stealing
2210 for (i = 0; i < th->th.th_team_nproc; ++i) {
2211 dispatch_private_info_template<T> *buf =
2212 reinterpret_cast<dispatch_private_info_template<T> *>(
2213 &team->t.t_dispatch[i].th_disp_buffer[idx]);
2214 KMP_ASSERT(buf->steal_flag == THIEF)if (!(buf->steal_flag == THIEF)) { __kmp_debug_assert("buf->steal_flag == THIEF"
, "openmp/runtime/src/kmp_dispatch.cpp", 2214); }
; // buffer must be inactive
2215 KMP_ATOMIC_ST_RLX(&buf->steal_flag, UNUSED)(&buf->steal_flag)->store(UNUSED, std::memory_order_relaxed
)
;
2216 if (traits_t<T>::type_size > 4) {
2217 // destroy locks used for stealing
2218 kmp_lock_t *lck = buf->u.p.steal_lock;
2219 KMP_ASSERT(lck != NULL)if (!(lck != __null)) { __kmp_debug_assert("lck != NULL", "openmp/runtime/src/kmp_dispatch.cpp"
, 2219); }
;
2220 __kmp_destroy_lock(lck);
2221 __kmp_free(lck)___kmp_free((lck), "openmp/runtime/src/kmp_dispatch.cpp", 2221
)
;
2222 buf->u.p.steal_lock = NULL__null;
2223 }
2224 }
2225 }
2226#endif
2227 /* NOTE: release shared buffer to be reused */
2228
2229 KMP_MB(); /* Flush all pending memory write invalidates. */
2230
2231 sh->u.s.num_done = 0;
2232 sh->u.s.iteration = 0;
2233
2234 /* TODO replace with general release procedure? */
2235 if (pr->flags.ordered) {
2236 sh->u.s.ordered_iteration = 0;
2237 }
2238
2239 sh->buffer_index += __kmp_dispatch_num_buffers;
2240 KD_TRACE(100, ("__kmp_dispatch_next: T#%d change buffer_index:%d\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d change buffer_index:%d\n"
, gtid, sh->buffer_index); }
2241 gtid, sh->buffer_index))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d change buffer_index:%d\n"
, gtid, sh->buffer_index); }
;
2242
2243 KMP_MB(); /* Flush all pending memory write invalidates. */
2244
2245 } // if
2246 if (__kmp_env_consistency_check) {
2247 if (pr->pushed_ws != ct_none) {
2248 pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
2249 }
2250 }
2251
2252 th->th.th_dispatch->th_deo_fcn = NULL__null;
2253 th->th.th_dispatch->th_dxo_fcn = NULL__null;
2254 th->th.th_dispatch->th_dispatch_sh_current = NULL__null;
2255 th->th.th_dispatch->th_dispatch_pr_current = NULL__null;
2256 } // if (status == 0)
2257#if KMP_OS_WINDOWS0
2258 else if (last) {
2259 pr->u.p.last_upper = pr->u.p.ub;
2260 }
2261#endif /* KMP_OS_WINDOWS */
2262 if (p_last != NULL__null && status != 0)
2263 *p_last = last;
2264 } // if
2265
2266#ifdef KMP_DEBUG1
2267 {
2268 char *buff;
2269 // create format specifiers before the debug output
2270 buff = __kmp_str_format(
2271 "__kmp_dispatch_next: T#%%d normal case: "
2272 "p_lb:%%%s p_ub:%%%s p_st:%%%s p_last:%%p (%%d) returning:%%d\n",
2273 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
2274 KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last,if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, *
p_lb, *p_ub, p_st ? *p_st : 0, p_last, (p_last ? *p_last : 0)
, status); }
2275 (p_last ? *p_last : 0), status))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, *
p_lb, *p_ub, p_st ? *p_st : 0, p_last, (p_last ? *p_last : 0)
, status); }
;
2276 __kmp_str_free(&buff);
2277 }
2278#endif
2279#if INCLUDE_SSC_MARKS(1 && 1)
2280 SSC_MARK_DISPATCH_NEXT()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0xd697) : "%ebx")
;
2281#endif
2282 OMPT_LOOP_DISPATCH(*p_lb, *p_ub, pr->u.p.st, status)if (ompt_enabled.ompt_callback_dispatch && status) { ompt_team_info_t
*team_info = __ompt_get_teaminfo(0, __null); ompt_task_info_t
*task_info = __ompt_get_task_info_object(0); ompt_dispatch_chunk_t
chunk; ompt_data_t instance = {0}; do { if (pr->u.p.st >
0) { chunk.start = static_cast<uint64_t>(*p_lb); chunk
.iterations = static_cast<uint64_t>(((*p_ub) - (*p_lb))
/ (pr->u.p.st) + 1); } else { chunk.start = static_cast<
uint64_t>(*p_ub); chunk.iterations = static_cast<uint64_t
>(((*p_lb) - (*p_ub)) / -(pr->u.p.st) + 1); } } while (
0); instance.ptr = &chunk; ompt_callbacks.ompt_callback_dispatch_callback
( &(team_info->parallel_data), &(task_info->task_data
), ompt_dispatch_ws_loop_chunk, instance); }
;
2283 OMPT_LOOP_ENDif (status == 0) { if (ompt_enabled.ompt_callback_work) { ompt_team_info_t
*team_info = __ompt_get_teaminfo(0, __null); ompt_task_info_t
*task_info = __ompt_get_task_info_object(0); ompt_callbacks.
ompt_callback_work_callback( ompt_work_loop, ompt_scope_end, &
(team_info->parallel_data), &(task_info->task_data)
, 0, codeptr); } }
;
2284 KMP_STATS_LOOP_END;
2285 return status;
2286}
2287
2288/*!
2289@ingroup WORK_SHARING
2290@param loc source location information
2291@param global_tid global thread number
2292@return Zero if the parallel region is not active and this thread should execute
2293all sections, non-zero otherwise.
2294
2295Beginning of sections construct.
2296There are no implicit barriers in the "sections" calls, rather the compiler
2297should introduce an explicit barrier if it is required.
2298
2299This implementation is based on __kmp_dispatch_init, using same constructs for
2300shared data (we can't have sections nested directly in omp for loop, there
2301should be a parallel region in between)
2302*/
2303kmp_int32 __kmpc_sections_init(ident_t *loc, kmp_int32 gtid) {
2304
2305 int active;
2306 kmp_info_t *th;
2307 kmp_team_t *team;
2308 kmp_uint32 my_buffer_index;
2309 dispatch_shared_info_template<kmp_int32> volatile *sh;
2310
2311 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_dispatch.cpp", 2311); }
;
2312
2313 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
2314 __kmp_parallel_initialize();
2315 __kmp_resume_if_soft_paused();
2316
2317 /* setup data */
2318 th = __kmp_threads[gtid];
2319 team = th->th.th_team;
2320 active = !team->t.t_serialized;
2321 th->th.th_ident = loc;
2322
2323 KMP_COUNT_BLOCK(OMP_SECTIONS)((void)0);
2324 KD_TRACE(10, ("__kmpc_sections: called by T#%d\n", gtid))if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmpc_sections: called by T#%d\n"
, gtid); }
;
2325
2326 if (active) {
2327 // Setup sections in the same way as dynamic scheduled loops.
2328 // We need one shared data: which section is to execute next.
2329 // (in case parallel is not active, all sections will be executed on the
2330 // same thread)
2331 KMP_DEBUG_ASSERT(th->th.th_dispatch ==if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "openmp/runtime/src/kmp_dispatch.cpp", 2332); }
2332 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid])if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "openmp/runtime/src/kmp_dispatch.cpp", 2332); }
;
2333
2334 my_buffer_index = th->th.th_dispatch->th_disp_index++;
2335
2336 // reuse shared data structures from dynamic sched loops:
2337 sh = reinterpret_cast<dispatch_shared_info_template<kmp_int32> volatile *>(
2338 &team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
2339 KD_TRACE(10, ("__kmpc_sections_init: T#%d my_buffer_index:%d\n", gtid,if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmpc_sections_init: T#%d my_buffer_index:%d\n"
, gtid, my_buffer_index); }
2340 my_buffer_index))if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmpc_sections_init: T#%d my_buffer_index:%d\n"
, gtid, my_buffer_index); }
;
2341
2342 th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo_error;
2343 th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo_error;
2344
2345 KD_TRACE(100, ("__kmpc_sections_init: T#%d before wait: my_buffer_index:%d "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmpc_sections_init: T#%d before wait: my_buffer_index:%d "
"sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index
); }
2346 "sh->buffer_index:%d\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmpc_sections_init: T#%d before wait: my_buffer_index:%d "
"sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index
); }
2347 gtid, my_buffer_index, sh->buffer_index))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmpc_sections_init: T#%d before wait: my_buffer_index:%d "
"sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index
); }
;
2348 __kmp_wait<kmp_uint32>(&sh->buffer_index, my_buffer_index,
2349 __kmp_eq<kmp_uint32> USE_ITT_BUILD_ARG(NULL), __null);
2350 // Note: KMP_WAIT() cannot be used there: buffer index and
2351 // my_buffer_index are *always* 32-bit integers.
2352 KMP_MB();
2353 KD_TRACE(100, ("__kmpc_sections_init: T#%d after wait: my_buffer_index:%d "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmpc_sections_init: T#%d after wait: my_buffer_index:%d "
"sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index
); }
2354 "sh->buffer_index:%d\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmpc_sections_init: T#%d after wait: my_buffer_index:%d "
"sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index
); }
2355 gtid, my_buffer_index, sh->buffer_index))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmpc_sections_init: T#%d after wait: my_buffer_index:%d "
"sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index
); }
;
2356
2357 th->th.th_dispatch->th_dispatch_pr_current =
2358 nullptr; // sections construct doesn't need private data
2359 th->th.th_dispatch->th_dispatch_sh_current =
2360 CCAST(dispatch_shared_info_t *, (volatile dispatch_shared_info_t *)sh)const_cast<dispatch_shared_info_t *>((volatile dispatch_shared_info_t
*)sh)
;
2361 }
2362
2363#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2364 if (ompt_enabled.ompt_callback_work) {
2365 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL__null);
2366 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2367 ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback(
2368 ompt_work_sections, ompt_scope_begin, &(team_info->parallel_data),
2369 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
2370 }
2371#endif
2372 KMP_PUSH_PARTITIONED_TIMER(OMP_sections)((void)0);
2373
2374 return active;
2375}
2376
2377/*!
2378@ingroup WORK_SHARING
2379@param loc source location information
2380@param global_tid global thread number
2381@param numberOfSections number of sections in the 'sections' construct
2382@return unsigned [from 0 to n) - number (id) of the section to execute next on
2383this thread. n (or any other number not in range) - nothing to execute on this
2384thread
2385*/
2386
2387kmp_int32 __kmpc_next_section(ident_t *loc, kmp_int32 gtid,
2388 kmp_int32 numberOfSections) {
2389
2390 KMP_TIME_PARTITIONED_BLOCK(OMP_sections)((void)0);
2391
2392 kmp_info_t *th = __kmp_threads[gtid];
2393#ifdef KMP_DEBUG1
2394 kmp_team_t *team = th->th.th_team;
2395#endif
2396
2397 KD_TRACE(1000, ("__kmp_dispatch_next: T#%d; number of sections:%d\n", gtid,if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d; number of sections:%d\n"
, gtid, numberOfSections); }
2398 numberOfSections))if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d; number of sections:%d\n"
, gtid, numberOfSections); }
;
2399
2400 // For serialized case we should not call this function:
2401 KMP_DEBUG_ASSERT(!team->t.t_serialized)if (!(!team->t.t_serialized)) { __kmp_debug_assert("!team->t.t_serialized"
, "openmp/runtime/src/kmp_dispatch.cpp", 2401); }
;
2402
2403 dispatch_shared_info_template<kmp_int32> volatile *sh;
2404
2405 KMP_DEBUG_ASSERT(th->th.th_dispatch ==if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "openmp/runtime/src/kmp_dispatch.cpp", 2406); }
2406 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid])if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "openmp/runtime/src/kmp_dispatch.cpp", 2406); }
;
2407
2408 KMP_DEBUG_ASSERT(!(th->th.th_dispatch->th_dispatch_pr_current))if (!(!(th->th.th_dispatch->th_dispatch_pr_current))) {
__kmp_debug_assert("!(th->th.th_dispatch->th_dispatch_pr_current)"
, "openmp/runtime/src/kmp_dispatch.cpp", 2408); }
;
2409 sh = reinterpret_cast<dispatch_shared_info_template<kmp_int32> volatile *>(
2410 th->th.th_dispatch->th_dispatch_sh_current);
2411 KMP_DEBUG_ASSERT(sh)if (!(sh)) { __kmp_debug_assert("sh", "openmp/runtime/src/kmp_dispatch.cpp"
, 2411); }
;
2412
2413 kmp_int32 sectionIndex = 0;
2414 bool moreSectionsToExecute = true;
2415
2416 // Find section to execute:
2417 sectionIndex = test_then_inc<kmp_int32>((kmp_int32 *)&sh->u.s.iteration);
2418 if (sectionIndex >= numberOfSections) {
2419 moreSectionsToExecute = false;
2420 }
2421
2422 // status == 0: no more sections to execute;
2423 // OMPTODO: __kmpc_end_sections could be bypassed?
2424 if (!moreSectionsToExecute) {
2425 kmp_int32 num_done;
2426
2427 num_done = test_then_inc<kmp_int32>((kmp_int32 *)(&sh->u.s.num_done));
2428
2429 if (num_done == th->th.th_team_nproc - 1) {
2430 /* NOTE: release this buffer to be reused */
2431
2432 KMP_MB(); /* Flush all pending memory write invalidates. */
2433
2434 sh->u.s.num_done = 0;
2435 sh->u.s.iteration = 0;
2436
2437 KMP_MB(); /* Flush all pending memory write invalidates. */
2438
2439 sh->buffer_index += __kmp_dispatch_num_buffers;
2440 KD_TRACE(100, ("__kmpc_next_section: T#%d change buffer_index:%d\n", gtid,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmpc_next_section: T#%d change buffer_index:%d\n"
, gtid, sh->buffer_index); }
2441 sh->buffer_index))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmpc_next_section: T#%d change buffer_index:%d\n"
, gtid, sh->buffer_index); }
;
2442
2443 KMP_MB(); /* Flush all pending memory write invalidates. */
2444
2445 } // if
2446
2447 th->th.th_dispatch->th_deo_fcn = NULL__null;
2448 th->th.th_dispatch->th_dxo_fcn = NULL__null;
2449 th->th.th_dispatch->th_dispatch_sh_current = NULL__null;
2450 th->th.th_dispatch->th_dispatch_pr_current = NULL__null;
2451
2452#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2453 if (ompt_enabled.ompt_callback_dispatch) {
2454 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL__null);
2455 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2456 ompt_data_t instance = ompt_data_none{0};
2457 instance.ptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2458 ompt_callbacks.ompt_callback(ompt_callback_dispatch)ompt_callback_dispatch_callback(
2459 &(team_info->parallel_data), &(task_info->task_data),
2460 ompt_dispatch_section, instance);
2461 }
2462#endif
2463 KMP_POP_PARTITIONED_TIMER()((void)0);
2464 }
2465
2466 return sectionIndex;
2467}
2468
2469/*!
2470@ingroup WORK_SHARING
2471@param loc source location information
2472@param global_tid global thread number
2473
2474End of "sections" construct.
2475Don't need to wait here: barrier is added separately when needed.
2476*/
2477void __kmpc_end_sections(ident_t *loc, kmp_int32 gtid) {
2478
2479 kmp_info_t *th = __kmp_threads[gtid];
2480 int active = !th->th.th_team->t.t_serialized;
2481
2482 KD_TRACE(100, ("__kmpc_end_sections: T#%d called\n", gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmpc_end_sections: T#%d called\n"
, gtid); }
;
2483
2484 if (!active) {
2485 // In active case call finalization is done in __kmpc_next_section
2486#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2487 if (ompt_enabled.ompt_callback_work) {
2488 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL__null);
2489 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2490 ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback(
2491 ompt_work_sections, ompt_scope_end, &(team_info->parallel_data),
2492 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
2493 }
2494#endif
2495 KMP_POP_PARTITIONED_TIMER()((void)0);
2496 }
2497
2498 KD_TRACE(100, ("__kmpc_end_sections: T#%d returned\n", gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmpc_end_sections: T#%d returned\n"
, gtid); }
;
2499}
2500
2501template <typename T>
2502static void __kmp_dist_get_bounds(ident_t *loc, kmp_int32 gtid,
2503 kmp_int32 *plastiter, T *plower, T *pupper,
2504 typename traits_t<T>::signed_t incr) {
2505 typedef typename traits_t<T>::unsigned_t UT;
2506 kmp_uint32 team_id;
2507 kmp_uint32 nteams;
2508 UT trip_count;
2509 kmp_team_t *team;
2510 kmp_info_t *th;
2511
2512 KMP_DEBUG_ASSERT(plastiter && plower && pupper)if (!(plastiter && plower && pupper)) { __kmp_debug_assert
("plastiter && plower && pupper", "openmp/runtime/src/kmp_dispatch.cpp"
, 2512); }
;
2513 KE_TRACE(10, ("__kmpc_dist_get_bounds called (%d)\n", gtid))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_dist_get_bounds called (%d)\n"
, gtid); }
;
2514#ifdef KMP_DEBUG1
2515 typedef typename traits_t<T>::signed_t ST;
2516 {
2517 char *buff;
2518 // create format specifiers before the debug output
2519 buff = __kmp_str_format("__kmpc_dist_get_bounds: T#%%d liter=%%d "
2520 "iter=(%%%s, %%%s, %%%s) signed?<%s>\n",
2521 traits_t<T>::spec, traits_t<T>::spec,
2522 traits_t<ST>::spec, traits_t<T>::spec);
2523 KD_TRACE(100, (buff, gtid, *plastiter, *plower, *pupper, incr))if (kmp_d_debug >= 100) { __kmp_debug_printf (buff, gtid, *
plastiter, *plower, *pupper, incr); }
;
2524 __kmp_str_free(&buff);
2525 }
2526#endif
2527
2528 if (__kmp_env_consistency_check) {
2529 if (incr == 0) {
2530 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
2531 loc);
2532 }
2533 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
2534 // The loop is illegal.
2535 // Some zero-trip loops maintained by compiler, e.g.:
2536 // for(i=10;i<0;++i) // lower >= upper - run-time check
2537 // for(i=0;i>10;--i) // lower <= upper - run-time check
2538 // for(i=0;i>10;++i) // incr > 0 - compile-time check
2539 // for(i=10;i<0;--i) // incr < 0 - compile-time check
2540 // Compiler does not check the following illegal loops:
2541 // for(i=0;i<10;i+=incr) // where incr<0
2542 // for(i=10;i>0;i-=incr) // where incr<0
2543 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
2544 }
2545 }
2546 __kmp_assert_valid_gtid(gtid);
2547 th = __kmp_threads[gtid];
2548 team = th->th.th_team;
2549 KMP_DEBUG_ASSERT(th->th.th_teams_microtask)if (!(th->th.th_teams_microtask)) { __kmp_debug_assert("th->th.th_teams_microtask"
, "openmp/runtime/src/kmp_dispatch.cpp", 2549); }
; // we are in the teams construct
2550 nteams = th->th.th_teams_size.nteams;
2551 team_id = team->t.t_master_tid;
2552 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc)if (!(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc
)) { __kmp_debug_assert("nteams == (kmp_uint32)team->t.t_parent->t.t_nproc"
, "openmp/runtime/src/kmp_dispatch.cpp", 2552); }
;
2553
2554 // compute global trip count
2555 if (incr == 1) {
2556 trip_count = *pupper - *plower + 1;
2557 } else if (incr == -1) {
2558 trip_count = *plower - *pupper + 1;
2559 } else if (incr > 0) {
2560 // upper-lower can exceed the limit of signed type
2561 trip_count = (UT)(*pupper - *plower) / incr + 1;
2562 } else {
2563 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
2564 }
2565
2566 if (trip_count <= nteams) {
2567 KMP_DEBUG_ASSERT(if (!(__kmp_static == kmp_sch_static_greedy || __kmp_static ==
kmp_sch_static_balanced)) { __kmp_debug_assert("__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced"
, "openmp/runtime/src/kmp_dispatch.cpp", 2570); }
2568 __kmp_static == kmp_sch_static_greedy ||if (!(__kmp_static == kmp_sch_static_greedy || __kmp_static ==
kmp_sch_static_balanced)) { __kmp_debug_assert("__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced"
, "openmp/runtime/src/kmp_dispatch.cpp", 2570); }
2569 __kmp_static ==if (!(__kmp_static == kmp_sch_static_greedy || __kmp_static ==
kmp_sch_static_balanced)) { __kmp_debug_assert("__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced"
, "openmp/runtime/src/kmp_dispatch.cpp", 2570); }
2570 kmp_sch_static_balanced)if (!(__kmp_static == kmp_sch_static_greedy || __kmp_static ==
kmp_sch_static_balanced)) { __kmp_debug_assert("__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced"
, "openmp/runtime/src/kmp_dispatch.cpp", 2570); }
; // Unknown static scheduling type.
2571 // only some teams get single iteration, others get nothing
2572 if (team_id < trip_count) {
2573 *pupper = *plower = *plower + team_id * incr;
2574 } else {
2575 *plower = *pupper + incr; // zero-trip loop
2576 }
2577 if (plastiter != NULL__null)
2578 *plastiter = (team_id == trip_count - 1);
2579 } else {
2580 if (__kmp_static == kmp_sch_static_balanced) {
2581 UT chunk = trip_count / nteams;
2582 UT extras = trip_count % nteams;
2583 *plower +=
2584 incr * (team_id * chunk + (team_id < extras ? team_id : extras));
2585 *pupper = *plower + chunk * incr - (team_id < extras ? 0 : incr);
2586 if (plastiter != NULL__null)
2587 *plastiter = (team_id == nteams - 1);
2588 } else {
2589 T chunk_inc_count =
2590 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
2591 T upper = *pupper;
2592 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy)if (!(__kmp_static == kmp_sch_static_greedy)) { __kmp_debug_assert
("__kmp_static == kmp_sch_static_greedy", "openmp/runtime/src/kmp_dispatch.cpp"
, 2592); }
;
2593 // Unknown static scheduling type.
2594 *plower += team_id * chunk_inc_count;
2595 *pupper = *plower + chunk_inc_count - incr;
2596 // Check/correct bounds if needed
2597 if (incr > 0) {
2598 if (*pupper < *plower)
2599 *pupper = traits_t<T>::max_value;
2600 if (plastiter != NULL__null)
2601 *plastiter = *plower <= upper && *pupper > upper - incr;
2602 if (*pupper > upper)
2603 *pupper = upper; // tracker C73258
2604 } else {
2605 if (*pupper > *plower)
2606 *pupper = traits_t<T>::min_value;
2607 if (plastiter != NULL__null)
2608 *plastiter = *plower >= upper && *pupper < upper - incr;
2609 if (*pupper < upper)
2610 *pupper = upper; // tracker C73258
2611 }
2612 }
2613 }
2614}
2615
2616//-----------------------------------------------------------------------------
2617// Dispatch routines
2618// Transfer call to template< type T >
2619// __kmp_dispatch_init( ident_t *loc, int gtid, enum sched_type schedule,
2620// T lb, T ub, ST st, ST chunk )
2621extern "C" {
2622
2623/*!
2624@ingroup WORK_SHARING
2625@{
2626@param loc Source location
2627@param gtid Global thread id
2628@param schedule Schedule type
2629@param lb Lower bound
2630@param ub Upper bound
2631@param st Step (or increment if you prefer)
2632@param chunk The chunk size to block with
2633
2634This function prepares the runtime to start a dynamically scheduled for loop,
2635saving the loop arguments.
2636These functions are all identical apart from the types of the arguments.
2637*/
2638
2639void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
2640 enum sched_type schedule, kmp_int32 lb,
2641 kmp_int32 ub, kmp_int32 st, kmp_int32 chunk) {
2642 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_dispatch.cpp", 2642); }
;
2643#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2644 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
2645#endif
2646 __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk, true);
2647}
2648/*!
2649See @ref __kmpc_dispatch_init_4
2650*/
2651void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
2652 enum sched_type schedule, kmp_uint32 lb,
2653 kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk) {
2654 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_dispatch.cpp", 2654); }
;
2655#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2656 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
2657#endif
2658 __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk, true);
2659}
2660
2661/*!
2662See @ref __kmpc_dispatch_init_4
2663*/
2664void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
2665 enum sched_type schedule, kmp_int64 lb,
2666 kmp_int64 ub, kmp_int64 st, kmp_int64 chunk) {
2667 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_dispatch.cpp", 2667); }
;
2668#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2669 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
2670#endif
2671 __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk, true);
2672}
2673
2674/*!
2675See @ref __kmpc_dispatch_init_4
2676*/
2677void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
2678 enum sched_type schedule, kmp_uint64 lb,
2679 kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk) {
2680 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_dispatch.cpp", 2680); }
;
2681#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2682 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
2683#endif
2684 __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk, true);
2685}
2686
2687/*!
2688See @ref __kmpc_dispatch_init_4
2689
2690Difference from __kmpc_dispatch_init set of functions is these functions
2691are called for composite distribute parallel for construct. Thus before
2692regular iterations dispatching we need to calc per-team iteration space.
2693
2694These functions are all identical apart from the types of the arguments.
2695*/
2696void __kmpc_dist_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
2697 enum sched_type schedule, kmp_int32 *p_last,
2698 kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
2699 kmp_int32 chunk) {
2700 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_dispatch.cpp", 2700); }
;
2701#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2702 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
2703#endif
2704 __kmp_dist_get_bounds<kmp_int32>(loc, gtid, p_last, &lb, &ub, st);
2705 __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk, true);
2706}
2707
2708void __kmpc_dist_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
2709 enum sched_type schedule, kmp_int32 *p_last,
2710 kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
2711 kmp_int32 chunk) {
2712 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_dispatch.cpp", 2712); }
;
2713#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2714 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
2715#endif
2716 __kmp_dist_get_bounds<kmp_uint32>(loc, gtid, p_last, &lb, &ub, st);
2717 __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk, true);
2718}
2719
2720void __kmpc_dist_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
2721 enum sched_type schedule, kmp_int32 *p_last,
2722 kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
2723 kmp_int64 chunk) {
2724 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_dispatch.cpp", 2724); }
;
2725#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2726 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
2727#endif
2728 __kmp_dist_get_bounds<kmp_int64>(loc, gtid, p_last, &lb, &ub, st);
2729 __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk, true);
2730}
2731
2732void __kmpc_dist_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
2733 enum sched_type schedule, kmp_int32 *p_last,
2734 kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
2735 kmp_int64 chunk) {
2736 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "openmp/runtime/src/kmp_dispatch.cpp", 2736); }
;
2737#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2738 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
2739#endif
2740 __kmp_dist_get_bounds<kmp_uint64>(loc, gtid, p_last, &lb, &ub, st);
2741 __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk, true);
2742}
2743
2744/*!
2745@param loc Source code location
2746@param gtid Global thread id
2747@param p_last Pointer to a flag set to one if this is the last chunk or zero
2748otherwise
2749@param p_lb Pointer to the lower bound for the next chunk of work
2750@param p_ub Pointer to the upper bound for the next chunk of work
2751@param p_st Pointer to the stride for the next chunk of work
2752@return one if there is work to be done, zero otherwise
2753
2754Get the next dynamically allocated chunk of work for this thread.
2755If there is no more work, then the lb,ub and stride need not be modified.
2756*/
2757int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
2758 kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st) {
2759#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2760 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
2761#endif
2762 return __kmp_dispatch_next<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st
2763#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2764 ,
2765 OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid)
2766#endif
2767 );
2768}
2769
2770/*!
2771See @ref __kmpc_dispatch_next_4
2772*/
2773int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
2774 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
2775 kmp_int32 *p_st) {
2776#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2777 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
2778#endif
2779 return __kmp_dispatch_next<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st
2780#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2781 ,
2782 OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid)
2783#endif
2784 );
2785}
2786
2787/*!
2788See @ref __kmpc_dispatch_next_4
2789*/
2790int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
2791 kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st) {
2792#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2793 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
2794#endif
2795 return __kmp_dispatch_next<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st
2796#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2797 ,
2798 OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid)
2799#endif
2800 );
2801}
2802
2803/*!
2804See @ref __kmpc_dispatch_next_4
2805*/
2806int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
2807 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
2808 kmp_int64 *p_st) {
2809#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2810 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
2811#endif
2812 return __kmp_dispatch_next<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st
2813#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2814 ,
2815 OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid)
2816#endif
2817 );
2818}
2819
2820/*!
2821@param loc Source code location
2822@param gtid Global thread id
2823
2824Mark the end of a dynamic loop.
2825*/
2826void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid) {
2827 __kmp_dispatch_finish<kmp_uint32>(gtid, loc);
2828}
2829
2830/*!
2831See @ref __kmpc_dispatch_fini_4
2832*/
2833void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid) {
2834 __kmp_dispatch_finish<kmp_uint64>(gtid, loc);
2835}
2836
2837/*!
2838See @ref __kmpc_dispatch_fini_4
2839*/
2840void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid) {
2841 __kmp_dispatch_finish<kmp_uint32>(gtid, loc);
2842}
2843
2844/*!
2845See @ref __kmpc_dispatch_fini_4
2846*/
2847void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid) {
2848 __kmp_dispatch_finish<kmp_uint64>(gtid, loc);
2849}
2850/*! @} */
2851
2852//-----------------------------------------------------------------------------
2853// Non-template routines from kmp_dispatch.cpp used in other sources
2854
2855kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker) {
2856 return value == checker;
2857}
2858
2859kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker) {
2860 return value != checker;
2861}
2862
2863kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker) {
2864 return value < checker;
2865}
2866
2867kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker) {
2868 return value >= checker;
2869}
2870
2871kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker) {
2872 return value <= checker;
2873}
2874
2875kmp_uint32
2876__kmp_wait_4(volatile kmp_uint32 *spinner, kmp_uint32 checker,
2877 kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
2878 void *obj // Higher-level synchronization object, or NULL.
2879) {
2880 // note: we may not belong to a team at this point
2881 volatile kmp_uint32 *spin = spinner;
2882 kmp_uint32 check = checker;
2883 kmp_uint32 spins;
2884 kmp_uint32 (*f)(kmp_uint32, kmp_uint32) = pred;
2885 kmp_uint32 r;
2886 kmp_uint64 time;
2887
2888 KMP_FSYNC_SPIN_INIT(obj, CCAST(kmp_uint32 *, spin))int sync_iters = 0; if (__kmp_itt_fsync_prepare_ptr__3_0) { if
(obj == __null) { obj = const_cast<kmp_uint32 *>(spin)
; } } __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0x4376) : "%ebx")
;
2889 KMP_INIT_YIELD(spins){ (spins) = __kmp_yield_init; };
2890 KMP_INIT_BACKOFF(time){ (time) = __kmp_pause_init; };
2891 // main wait spin loop
2892 while (!f(r = TCR_4(*spin)(*spin), check)) {
2893 KMP_FSYNC_SPIN_PREPARE(obj)do { if (__kmp_itt_fsync_prepare_ptr__3_0 && sync_iters
< __kmp_itt_prepare_delay) { ++sync_iters; if (sync_iters
>= __kmp_itt_prepare_delay) { (!__kmp_itt_fsync_prepare_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)((void
*)obj)); } } } while (0)
;
2894 /* GEH - remove this since it was accidentally introduced when kmp_wait was
2895 split. It causes problems with infinite recursion because of exit lock */
2896 /* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort)
2897 __kmp_abort_thread(); */
2898 KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time){ if (__kmp_tpause_enabled) { if (((__kmp_nth) > (__kmp_avail_proc
? __kmp_avail_proc : __kmp_xproc))) { __kmp_tpause(0, (time)
); } else { __kmp_tpause(__kmp_tpause_hint, (time)); } (time)
= (time << 1 | 1) & ((kmp_uint64)0xFFFF); } else {
__kmp_x86_pause(); if ((((__kmp_use_yield == 1 || __kmp_use_yield
== 2) && (((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
: __kmp_xproc)))))) { __kmp_yield(); } else if (__kmp_use_yield
== 1) { (spins) -= 2; if (!(spins)) { __kmp_yield(); (spins)
= __kmp_yield_next; } } } }
;
2899 }
2900 KMP_FSYNC_SPIN_ACQUIRED(obj)do { __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0x4377) : "%ebx"); if (sync_iters >= __kmp_itt_prepare_delay
) { (!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)((void *)obj)); } } while (0)
;
2901 return r;
2902}
2903
2904void __kmp_wait_4_ptr(void *spinner, kmp_uint32 checker,
2905 kmp_uint32 (*pred)(void *, kmp_uint32),
2906 void *obj // Higher-level synchronization object, or NULL.
2907) {
2908 // note: we may not belong to a team at this point
2909 void *spin = spinner;
2910 kmp_uint32 check = checker;
2911 kmp_uint32 spins;
2912 kmp_uint32 (*f)(void *, kmp_uint32) = pred;
2913 kmp_uint64 time;
2914
2915 KMP_FSYNC_SPIN_INIT(obj, spin)int sync_iters = 0; if (__kmp_itt_fsync_prepare_ptr__3_0) { if
(obj == __null) { obj = spin; } } __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0x4376) : "%ebx")
;
2916 KMP_INIT_YIELD(spins){ (spins) = __kmp_yield_init; };
2917 KMP_INIT_BACKOFF(time){ (time) = __kmp_pause_init; };
2918 // main wait spin loop
2919 while (!f(spin, check)) {
2920 KMP_FSYNC_SPIN_PREPARE(obj)do { if (__kmp_itt_fsync_prepare_ptr__3_0 && sync_iters
< __kmp_itt_prepare_delay) { ++sync_iters; if (sync_iters
>= __kmp_itt_prepare_delay) { (!__kmp_itt_fsync_prepare_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)((void
*)obj)); } } } while (0)
;
2921 /* if we have waited a bit, or are noversubscribed, yield */
2922 /* pause is in the following code */
2923 KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time){ if (__kmp_tpause_enabled) { if (((__kmp_nth) > (__kmp_avail_proc
? __kmp_avail_proc : __kmp_xproc))) { __kmp_tpause(0, (time)
); } else { __kmp_tpause(__kmp_tpause_hint, (time)); } (time)
= (time << 1 | 1) & ((kmp_uint64)0xFFFF); } else {
__kmp_x86_pause(); if ((((__kmp_use_yield == 1 || __kmp_use_yield
== 2) && (((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
: __kmp_xproc)))))) { __kmp_yield(); } else if (__kmp_use_yield
== 1) { (spins) -= 2; if (!(spins)) { __kmp_yield(); (spins)
= __kmp_yield_next; } } } }
;
2924 }
2925 KMP_FSYNC_SPIN_ACQUIRED(obj)do { __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0x4377) : "%ebx"); if (sync_iters >= __kmp_itt_prepare_delay
) { (!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)((void *)obj)); } } while (0)
;
2926}
2927
2928} // extern "C"
2929
2930#ifdef KMP_GOMP_COMPAT
2931
2932void __kmp_aux_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
2933 enum sched_type schedule, kmp_int32 lb,
2934 kmp_int32 ub, kmp_int32 st, kmp_int32 chunk,
2935 int push_ws) {
2936 __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk,
2937 push_ws);
2938}
2939
2940void __kmp_aux_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
2941 enum sched_type schedule, kmp_uint32 lb,
2942 kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk,
2943 int push_ws) {
2944 __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk,
2945 push_ws);
2946}
2947
2948void __kmp_aux_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
2949 enum sched_type schedule, kmp_int64 lb,
2950 kmp_int64 ub, kmp_int64 st, kmp_int64 chunk,
2951 int push_ws) {
2952 __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk,
2953 push_ws);
2954}
2955
2956void __kmp_aux_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
2957 enum sched_type schedule, kmp_uint64 lb,
2958 kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk,
2959 int push_ws) {
2960 __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk,
2961 push_ws);
2962}
2963
2964void __kmp_aux_dispatch_fini_chunk_4(ident_t *loc, kmp_int32 gtid) {
2965 __kmp_dispatch_finish_chunk<kmp_uint32>(gtid, loc);
2966}
2967
2968void __kmp_aux_dispatch_fini_chunk_8(ident_t *loc, kmp_int32 gtid) {
2969 __kmp_dispatch_finish_chunk<kmp_uint64>(gtid, loc);
2970}
2971
2972void __kmp_aux_dispatch_fini_chunk_4u(ident_t *loc, kmp_int32 gtid) {
2973 __kmp_dispatch_finish_chunk<kmp_uint32>(gtid, loc);
2974}
2975
2976void __kmp_aux_dispatch_fini_chunk_8u(ident_t *loc, kmp_int32 gtid) {
2977 __kmp_dispatch_finish_chunk<kmp_uint64>(gtid, loc);
2978}
2979
2980#endif /* KMP_GOMP_COMPAT */
2981
2982/* ------------------------------------------------------------------------ */