Bug Summary

File:projects/openmp/runtime/src/kmp_dispatch.cpp
Warning:line 1982, column 7
Dereference of null pointer (loaded from variable 'p_last')

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name kmp_dispatch.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-8/lib/clang/8.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D omp_EXPORTS -I /build/llvm-toolchain-snapshot-8~svn345461/build-llvm/projects/openmp/runtime/src -I /build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src -I /build/llvm-toolchain-snapshot-8~svn345461/build-llvm/include -I /build/llvm-toolchain-snapshot-8~svn345461/include -I /build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/i18n -I /build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/include/50 -I /build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/thirdparty/ittnotify -U NDEBUG -D _GNU_SOURCE -D _REENTRANT -D _FORTIFY_SOURCE=2 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/include/clang/8.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-8/lib/clang/8.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -Wno-switch -Wno-missing-field-initializers -Wno-missing-braces -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-8~svn345461/build-llvm/projects/openmp/runtime/src -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -fno-rtti -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2018-10-27-211344-32123-1 -x c++ /build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp -faddrsig
1/*
2 * kmp_dispatch.cpp: dynamic scheduling - iteration initialization and dispatch.
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// The LLVM Compiler Infrastructure
8//
9// This file is dual licensed under the MIT and the University of Illinois Open
10// Source Licenses. See LICENSE.txt for details.
11//
12//===----------------------------------------------------------------------===//
13
14/* Dynamic scheduling initialization and dispatch.
15 *
16 * NOTE: __kmp_nth is a constant inside of any dispatch loop, however
17 * it may change values between parallel regions. __kmp_max_nth
18 * is the largest value __kmp_nth may take, 1 is the smallest.
19 */
20
21#include "kmp.h"
22#include "kmp_error.h"
23#include "kmp_i18n.h"
24#include "kmp_itt.h"
25#include "kmp_stats.h"
26#include "kmp_str.h"
27#if KMP_OS_WINDOWS0 && KMP_ARCH_X860
28#include <float.h>
29#endif
30#include "kmp_lock.h"
31#include "kmp_dispatch.h"
32#if KMP_USE_HIER_SCHED0
33#include "kmp_dispatch_hier.h"
34#endif
35
36#if OMPT_SUPPORT1
37#include "ompt-specific.h"
38#endif
39
40/* ------------------------------------------------------------------------ */
41/* ------------------------------------------------------------------------ */
42
43void __kmp_dispatch_deo_error(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
44 kmp_info_t *th;
45
46 KMP_DEBUG_ASSERT(gtid_ref)if (!(gtid_ref)) { __kmp_debug_assert("gtid_ref", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 46); }
;
47
48 if (__kmp_env_consistency_check) {
49 th = __kmp_threads[*gtid_ref];
50 if (th->th.th_root->r.r_active &&
51 (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws != ct_none)) {
52#if KMP_USE_DYNAMIC_LOCK1
53 __kmp_push_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref, NULL__null, 0);
54#else
55 __kmp_push_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref, NULL__null);
56#endif
57 }
58 }
59}
60
61void __kmp_dispatch_dxo_error(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
62 kmp_info_t *th;
63
64 if (__kmp_env_consistency_check) {
65 th = __kmp_threads[*gtid_ref];
66 if (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws != ct_none) {
67 __kmp_pop_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref);
68 }
69 }
70}
71
72// Initialize a dispatch_private_info_template<T> buffer for a particular
73// type of schedule,chunk. The loop description is found in lb (lower bound),
74// ub (upper bound), and st (stride). nproc is the number of threads relevant
75// to the scheduling (often the number of threads in a team, but not always if
76// hierarchical scheduling is used). tid is the id of the thread calling
77// the function within the group of nproc threads. It will have a value
78// between 0 and nproc - 1. This is often just the thread id within a team, but
79// is not necessarily the case when using hierarchical scheduling.
80// loc is the source file location of the corresponding loop
81// gtid is the global thread id
82template <typename T>
83void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid,
84 dispatch_private_info_template<T> *pr,
85 enum sched_type schedule, T lb, T ub,
86 typename traits_t<T>::signed_t st,
87#if USE_ITT_BUILD1
88 kmp_uint64 *cur_chunk,
89#endif
90 typename traits_t<T>::signed_t chunk,
91 T nproc, T tid) {
92 typedef typename traits_t<T>::unsigned_t UT;
93 typedef typename traits_t<T>::floating_t DBL;
94
95 int active;
96 T tc;
97 kmp_info_t *th;
98 kmp_team_t *team;
99
100#ifdef KMP_DEBUG1
101 typedef typename traits_t<T>::signed_t ST;
102 {
103 char *buff;
104 // create format specifiers before the debug output
105 buff = __kmp_str_format("__kmp_dispatch_init_algorithm: T#%%d called "
106 "pr:%%p lb:%%%s ub:%%%s st:%%%s "
107 "schedule:%%d chunk:%%%s nproc:%%%s tid:%%%s\n",
108 traits_t<T>::spec, traits_t<T>::spec,
109 traits_t<ST>::spec, traits_t<ST>::spec,
110 traits_t<T>::spec, traits_t<T>::spec);
111 KD_TRACE(10, (buff, gtid, pr, lb, ub, st, schedule, chunk, nproc, tid))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, pr
, lb, ub, st, schedule, chunk, nproc, tid); }
;
112 __kmp_str_free(&buff);
113 }
114#endif
115 /* setup data */
116 th = __kmp_threads[gtid];
117 team = th->th.th_team;
118 active = !team->t.t_serialized;
119
120#if USE_ITT_BUILD1
121 int itt_need_metadata_reporting = __itt_metadata_add_ptr__kmp_itt_metadata_add_ptr__3_0 &&
122 __kmp_forkjoin_frames_mode == 3 &&
123 KMP_MASTER_GTID(gtid)(__kmp_tid_from_gtid((gtid)) == 0) &&
124#if OMP_40_ENABLED(50 >= 40)
125 th->th.th_teams_microtask == NULL__null &&
126#endif
127 team->t.t_active_level == 1;
128#endif
129#if (KMP_STATIC_STEAL_ENABLED1)
130 if (SCHEDULE_HAS_NONMONOTONIC(schedule)(((schedule)&kmp_sch_modifier_nonmonotonic) != 0))
131 // AC: we now have only one implementation of stealing, so use it
132 schedule = kmp_sch_static_steal;
133 else
134#endif
135 schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule)(enum sched_type)( (schedule) & ~(kmp_sch_modifier_nonmonotonic
| kmp_sch_modifier_monotonic))
;
136
137 /* Pick up the nomerge/ordered bits from the scheduling type */
138 if ((schedule >= kmp_nm_lower) && (schedule < kmp_nm_upper)) {
139 pr->flags.nomerge = TRUE(!0);
140 schedule =
141 (enum sched_type)(((int)schedule) - (kmp_nm_lower - kmp_sch_lower));
142 } else {
143 pr->flags.nomerge = FALSE0;
144 }
145 pr->type_size = traits_t<T>::type_size; // remember the size of variables
146 if (kmp_ord_lower & schedule) {
147 pr->flags.ordered = TRUE(!0);
148 schedule =
149 (enum sched_type)(((int)schedule) - (kmp_ord_lower - kmp_sch_lower));
150 } else {
151 pr->flags.ordered = FALSE0;
152 }
153
154 if (schedule == kmp_sch_static) {
155 schedule = __kmp_static;
156 } else {
157 if (schedule == kmp_sch_runtime) {
158 // Use the scheduling specified by OMP_SCHEDULE (or __kmp_sch_default if
159 // not specified)
160 schedule = team->t.t_sched.r_sched_type;
161 // Detail the schedule if needed (global controls are differentiated
162 // appropriately)
163 if (schedule == kmp_sch_guided_chunked) {
164 schedule = __kmp_guided;
165 } else if (schedule == kmp_sch_static) {
166 schedule = __kmp_static;
167 }
168 // Use the chunk size specified by OMP_SCHEDULE (or default if not
169 // specified)
170 chunk = team->t.t_sched.chunk;
171#if USE_ITT_BUILD1
172 if (cur_chunk)
173 *cur_chunk = chunk;
174#endif
175#ifdef KMP_DEBUG1
176 {
177 char *buff;
178 // create format specifiers before the debug output
179 buff = __kmp_str_format("__kmp_dispatch_init_algorithm: T#%%d new: "
180 "schedule:%%d chunk:%%%s\n",
181 traits_t<ST>::spec);
182 KD_TRACE(10, (buff, gtid, schedule, chunk))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, schedule
, chunk); }
;
183 __kmp_str_free(&buff);
184 }
185#endif
186 } else {
187 if (schedule == kmp_sch_guided_chunked) {
188 schedule = __kmp_guided;
189 }
190 if (chunk <= 0) {
191 chunk = KMP_DEFAULT_CHUNK1;
192 }
193 }
194
195 if (schedule == kmp_sch_auto) {
196 // mapping and differentiation: in the __kmp_do_serial_initialize()
197 schedule = __kmp_auto;
198#ifdef KMP_DEBUG1
199 {
200 char *buff;
201 // create format specifiers before the debug output
202 buff = __kmp_str_format(
203 "__kmp_dispatch_init_algorithm: kmp_sch_auto: T#%%d new: "
204 "schedule:%%d chunk:%%%s\n",
205 traits_t<ST>::spec);
206 KD_TRACE(10, (buff, gtid, schedule, chunk))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, schedule
, chunk); }
;
207 __kmp_str_free(&buff);
208 }
209#endif
210 }
211
212 /* guided analytical not safe for too many threads */
213 if (schedule == kmp_sch_guided_analytical_chunked && nproc > 1 << 20) {
214 schedule = kmp_sch_guided_iterative_chunked;
215 KMP_WARNING(DispatchManyThreads)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_DispatchManyThreads
), __kmp_msg_null)
;
216 }
217#if OMP_45_ENABLED(50 >= 45)
218 if (schedule == kmp_sch_runtime_simd) {
219 // compiler provides simd_width in the chunk parameter
220 schedule = team->t.t_sched.r_sched_type;
221 // Detail the schedule if needed (global controls are differentiated
222 // appropriately)
223 if (schedule == kmp_sch_static || schedule == kmp_sch_auto ||
224 schedule == __kmp_static) {
225 schedule = kmp_sch_static_balanced_chunked;
226 } else {
227 if (schedule == kmp_sch_guided_chunked || schedule == __kmp_guided) {
228 schedule = kmp_sch_guided_simd;
229 }
230 chunk = team->t.t_sched.chunk * chunk;
231 }
232#if USE_ITT_BUILD1
233 if (cur_chunk)
234 *cur_chunk = chunk;
235#endif
236#ifdef KMP_DEBUG1
237 {
238 char *buff;
239 // create format specifiers before the debug output
240 buff = __kmp_str_format("__kmp_dispatch_init: T#%%d new: schedule:%%d"
241 " chunk:%%%s\n",
242 traits_t<ST>::spec);
243 KD_TRACE(10, (buff, gtid, schedule, chunk))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, schedule
, chunk); }
;
244 __kmp_str_free(&buff);
245 }
246#endif
247 }
248#endif // OMP_45_ENABLED
249 pr->u.p.parm1 = chunk;
250 }
251 KMP_ASSERT2((kmp_sch_lower < schedule && schedule < kmp_sch_upper),if (!((kmp_sch_lower < schedule && schedule < kmp_sch_upper
))) { __kmp_debug_assert(("unknown scheduling type"), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 252); }
252 "unknown scheduling type")if (!((kmp_sch_lower < schedule && schedule < kmp_sch_upper
))) { __kmp_debug_assert(("unknown scheduling type"), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 252); }
;
253
254 pr->u.p.count = 0;
255
256 if (__kmp_env_consistency_check) {
257 if (st == 0) {
258 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited,
259 (pr->flags.ordered ? ct_pdo_ordered : ct_pdo), loc);
260 }
261 }
262 // compute trip count
263 if (st == 1) { // most common case
264 if (ub >= lb) {
265 tc = ub - lb + 1;
266 } else { // ub < lb
267 tc = 0; // zero-trip
268 }
269 } else if (st < 0) {
270 if (lb >= ub) {
271 // AC: cast to unsigned is needed for loops like (i=2B; i>-2B; i-=1B),
272 // where the division needs to be unsigned regardless of the result type
273 tc = (UT)(lb - ub) / (-st) + 1;
274 } else { // lb < ub
275 tc = 0; // zero-trip
276 }
277 } else { // st > 0
278 if (ub >= lb) {
279 // AC: cast to unsigned is needed for loops like (i=-2B; i<2B; i+=1B),
280 // where the division needs to be unsigned regardless of the result type
281 tc = (UT)(ub - lb) / st + 1;
282 } else { // ub < lb
283 tc = 0; // zero-trip
284 }
285 }
286
287 pr->u.p.lb = lb;
288 pr->u.p.ub = ub;
289 pr->u.p.st = st;
290 pr->u.p.tc = tc;
291
292#if KMP_OS_WINDOWS0
293 pr->u.p.last_upper = ub + st;
294#endif /* KMP_OS_WINDOWS */
295
296 /* NOTE: only the active parallel region(s) has active ordered sections */
297
298 if (active) {
299 if (pr->flags.ordered) {
300 pr->ordered_bumped = 0;
301 pr->u.p.ordered_lower = 1;
302 pr->u.p.ordered_upper = 0;
303 }
304 }
305
306 switch (schedule) {
307#if (KMP_STATIC_STEAL_ENABLED1)
308 case kmp_sch_static_steal: {
309 T ntc, init;
310
311 KD_TRACE(100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_steal case\n"
, gtid); }
312 ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_steal case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_steal case\n"
, gtid); }
313 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_steal case\n"
, gtid); }
;
314
315 ntc = (tc % chunk ? 1 : 0) + tc / chunk;
316 if (nproc > 1 && ntc >= nproc) {
317 KMP_COUNT_BLOCK(OMP_LOOP_STATIC_STEAL)((void)0);
318 T id = tid;
319 T small_chunk, extras;
320
321 small_chunk = ntc / nproc;
322 extras = ntc % nproc;
323
324 init = id * small_chunk + (id < extras ? id : extras);
325 pr->u.p.count = init;
326 pr->u.p.ub = init + small_chunk + (id < extras ? 1 : 0);
327
328 pr->u.p.parm2 = lb;
329 // pr->pfields.parm3 = 0; // it's not used in static_steal
330 pr->u.p.parm4 = (id + 1) % nproc; // remember neighbour tid
331 pr->u.p.st = st;
332 if (traits_t<T>::type_size > 4) {
333 // AC: TODO: check if 16-byte CAS available and use it to
334 // improve performance (probably wait for explicit request
335 // before spending time on this).
336 // For now use dynamically allocated per-thread lock,
337 // free memory in __kmp_dispatch_next when status==0.
338 KMP_DEBUG_ASSERT(th->th.th_dispatch->th_steal_lock == NULL)if (!(th->th.th_dispatch->th_steal_lock == __null)) { __kmp_debug_assert
("th->th.th_dispatch->th_steal_lock == __null", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 338); }
;
339 th->th.th_dispatch->th_steal_lock =
340 (kmp_lock_t *)__kmp_allocate(sizeof(kmp_lock_t))___kmp_allocate((sizeof(kmp_lock_t)), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 340)
;
341 __kmp_init_lock(th->th.th_dispatch->th_steal_lock);
342 }
343 break;
344 } else {
345 KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d falling-through to "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to "
"kmp_sch_static_balanced\n", gtid); }
346 "kmp_sch_static_balanced\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to "
"kmp_sch_static_balanced\n", gtid); }
347 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to "
"kmp_sch_static_balanced\n", gtid); }
;
348 schedule = kmp_sch_static_balanced;
349 /* too few iterations: fall-through to kmp_sch_static_balanced */
350 } // if
351 /* FALL-THROUGH to static balanced */
352 } // case
353#endif
354 case kmp_sch_static_balanced: {
355 T init, limit;
356
357 KD_TRACE(if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n"
, gtid); }
358 100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n"
, gtid); }
359 ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n"
, gtid); }
360 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n"
, gtid); }
;
361
362 if (nproc > 1) {
363 T id = tid;
364
365 if (tc < nproc) {
366 if (id < tc) {
367 init = id;
368 limit = id;
369 pr->u.p.parm1 = (id == tc - 1); /* parm1 stores *plastiter */
370 } else {
371 pr->u.p.count = 1; /* means no more chunks to execute */
372 pr->u.p.parm1 = FALSE0;
373 break;
374 }
375 } else {
376 T small_chunk = tc / nproc;
377 T extras = tc % nproc;
378 init = id * small_chunk + (id < extras ? id : extras);
379 limit = init + small_chunk - (id < extras ? 0 : 1);
380 pr->u.p.parm1 = (id == nproc - 1);
381 }
382 } else {
383 if (tc > 0) {
384 init = 0;
385 limit = tc - 1;
386 pr->u.p.parm1 = TRUE(!0);
387 } else {
388 // zero trip count
389 pr->u.p.count = 1; /* means no more chunks to execute */
390 pr->u.p.parm1 = FALSE0;
391 break;
392 }
393 }
394#if USE_ITT_BUILD1
395 // Calculate chunk for metadata report
396 if (itt_need_metadata_reporting)
397 if (cur_chunk)
398 *cur_chunk = limit - init + 1;
399#endif
400 if (st == 1) {
401 pr->u.p.lb = lb + init;
402 pr->u.p.ub = lb + limit;
403 } else {
404 // calculated upper bound, "ub" is user-defined upper bound
405 T ub_tmp = lb + limit * st;
406 pr->u.p.lb = lb + init * st;
407 // adjust upper bound to "ub" if needed, so that MS lastprivate will match
408 // it exactly
409 if (st > 0) {
410 pr->u.p.ub = (ub_tmp + st > ub ? ub : ub_tmp);
411 } else {
412 pr->u.p.ub = (ub_tmp + st < ub ? ub : ub_tmp);
413 }
414 }
415 if (pr->flags.ordered) {
416 pr->u.p.ordered_lower = init;
417 pr->u.p.ordered_upper = limit;
418 }
419 break;
420 } // case
421#if OMP_45_ENABLED(50 >= 45)
422 case kmp_sch_static_balanced_chunked: {
423 // similar to balanced, but chunk adjusted to multiple of simd width
424 T nth = nproc;
425 KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d runtime(simd:static)"if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d runtime(simd:static)"
" -> falling-through to static_greedy\n", gtid); }
426 " -> falling-through to static_greedy\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d runtime(simd:static)"
" -> falling-through to static_greedy\n", gtid); }
427 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d runtime(simd:static)"
" -> falling-through to static_greedy\n", gtid); }
;
428 schedule = kmp_sch_static_greedy;
429 if (nth > 1)
430 pr->u.p.parm1 = ((tc + nth - 1) / nth + chunk - 1) & ~(chunk - 1);
431 else
432 pr->u.p.parm1 = tc;
433 break;
434 } // case
435 case kmp_sch_guided_simd:
436#endif // OMP_45_ENABLED
437 case kmp_sch_guided_iterative_chunked: {
438 KD_TRACE(if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked"
" case\n", gtid); }
439 100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked"
" case\n", gtid); }
440 ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked"if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked"
" case\n", gtid); }
441 " case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked"
" case\n", gtid); }
442 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked"
" case\n", gtid); }
;
443
444 if (nproc > 1) {
445 if ((2L * chunk + 1) * nproc >= tc) {
446 /* chunk size too large, switch to dynamic */
447 schedule = kmp_sch_dynamic_chunked;
448 } else {
449 // when remaining iters become less than parm2 - switch to dynamic
450 pr->u.p.parm2 = guided_int_param * nproc * (chunk + 1);
451 *(double *)&pr->u.p.parm3 =
452 guided_flt_param / nproc; // may occupy parm3 and parm4
453 }
454 } else {
455 KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d falling-through to "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to "
"kmp_sch_static_greedy\n", gtid); }
456 "kmp_sch_static_greedy\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to "
"kmp_sch_static_greedy\n", gtid); }
457 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to "
"kmp_sch_static_greedy\n", gtid); }
;
458 schedule = kmp_sch_static_greedy;
459 /* team->t.t_nproc == 1: fall-through to kmp_sch_static_greedy */
460 KD_TRACE(if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n"
, gtid); }
461 100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n"
, gtid); }
462 ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n"
, gtid); }
463 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n"
, gtid); }
;
464 pr->u.p.parm1 = tc;
465 } // if
466 } // case
467 break;
468 case kmp_sch_guided_analytical_chunked: {
469 KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d "
"kmp_sch_guided_analytical_chunked case\n", gtid); }
470 "kmp_sch_guided_analytical_chunked case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d "
"kmp_sch_guided_analytical_chunked case\n", gtid); }
471 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d "
"kmp_sch_guided_analytical_chunked case\n", gtid); }
;
472
473 if (nproc > 1) {
474 if ((2L * chunk + 1) * nproc >= tc) {
475 /* chunk size too large, switch to dynamic */
476 schedule = kmp_sch_dynamic_chunked;
477 } else {
478 /* commonly used term: (2 nproc - 1)/(2 nproc) */
479 DBL x;
480
481#if KMP_OS_WINDOWS0 && KMP_ARCH_X860
482 /* Linux* OS already has 64-bit computation by default for long double,
483 and on Windows* OS on Intel(R) 64, /Qlong_double doesn't work. On
484 Windows* OS on IA-32 architecture, we need to set precision to 64-bit
485 instead of the default 53-bit. Even though long double doesn't work
486 on Windows* OS on Intel(R) 64, the resulting lack of precision is not
487 expected to impact the correctness of the algorithm, but this has not
488 been mathematically proven. */
489 // save original FPCW and set precision to 64-bit, as
490 // Windows* OS on IA-32 architecture defaults to 53-bit
491 unsigned int oldFpcw = _control87(0, 0);
492 _control87(_PC_64, _MCW_PC); // 0,0x30000
493#endif
494 /* value used for comparison in solver for cross-over point */
495 long double target = ((long double)chunk * 2 + 1) * nproc / tc;
496
497 /* crossover point--chunk indexes equal to or greater than
498 this point switch to dynamic-style scheduling */
499 UT cross;
500
501 /* commonly used term: (2 nproc - 1)/(2 nproc) */
502 x = (long double)1.0 - (long double)0.5 / nproc;
503
504#ifdef KMP_DEBUG1
505 { // test natural alignment
506 struct _test_a {
507 char a;
508 union {
509 char b;
510 DBL d;
511 };
512 } t;
513 ptrdiff_t natural_alignment =
514 (ptrdiff_t)&t.b - (ptrdiff_t)&t - (ptrdiff_t)1;
515 //__kmp_warn( " %llx %llx %lld", (long long)&t.d, (long long)&t, (long
516 // long)natural_alignment );
517 KMP_DEBUG_ASSERT(if (!((((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment
)) == 0)) { __kmp_debug_assert("(((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment)) == 0"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 518); }
518 (((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment)) == 0)if (!((((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment
)) == 0)) { __kmp_debug_assert("(((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment)) == 0"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 518); }
;
519 }
520#endif // KMP_DEBUG
521
522 /* save the term in thread private dispatch structure */
523 *(DBL *)&pr->u.p.parm3 = x;
524
525 /* solve for the crossover point to the nearest integer i for which C_i
526 <= chunk */
527 {
528 UT left, right, mid;
529 long double p;
530
531 /* estimate initial upper and lower bound */
532
533 /* doesn't matter what value right is as long as it is positive, but
534 it affects performance of the solver */
535 right = 229;
536 p = __kmp_pow<UT>(x, right);
537 if (p > target) {
538 do {
539 p *= p;
540 right <<= 1;
541 } while (p > target && right < (1 << 27));
542 /* lower bound is previous (failed) estimate of upper bound */
543 left = right >> 1;
544 } else {
545 left = 0;
546 }
547
548 /* bisection root-finding method */
549 while (left + 1 < right) {
550 mid = (left + right) / 2;
551 if (__kmp_pow<UT>(x, mid) > target) {
552 left = mid;
553 } else {
554 right = mid;
555 }
556 } // while
557 cross = right;
558 }
559 /* assert sanity of computed crossover point */
560 KMP_ASSERT(cross && __kmp_pow<UT>(x, cross - 1) > target &&if (!(cross && __kmp_pow<UT>(x, cross - 1) >
target && __kmp_pow<UT>(x, cross) <= target
)) { __kmp_debug_assert("cross && __kmp_pow<UT>(x, cross - 1) > target && __kmp_pow<UT>(x, cross) <= target"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 561); }
561 __kmp_pow<UT>(x, cross) <= target)if (!(cross && __kmp_pow<UT>(x, cross - 1) >
target && __kmp_pow<UT>(x, cross) <= target
)) { __kmp_debug_assert("cross && __kmp_pow<UT>(x, cross - 1) > target && __kmp_pow<UT>(x, cross) <= target"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 561); }
;
562
563 /* save the crossover point in thread private dispatch structure */
564 pr->u.p.parm2 = cross;
565
566// C75803
567#if ((KMP_OS_LINUX1 || KMP_OS_WINDOWS0) && KMP_ARCH_X860) && (!defined(KMP_I8))
568#define GUIDED_ANALYTICAL_WORKAROUND(x) (*(DBL *)&pr->u.p.parm3)
569#else
570#define GUIDED_ANALYTICAL_WORKAROUND(x) (x)
571#endif
572 /* dynamic-style scheduling offset */
573 pr->u.p.count = tc - __kmp_dispatch_guided_remaining(
574 tc, GUIDED_ANALYTICAL_WORKAROUND(x), cross) -
575 cross * chunk;
576#if KMP_OS_WINDOWS0 && KMP_ARCH_X860
577 // restore FPCW
578 _control87(oldFpcw, _MCW_PC);
579#endif
580 } // if
581 } else {
582 KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d falling-through to "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to "
"kmp_sch_static_greedy\n", gtid); }
583 "kmp_sch_static_greedy\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to "
"kmp_sch_static_greedy\n", gtid); }
584 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to "
"kmp_sch_static_greedy\n", gtid); }
;
585 schedule = kmp_sch_static_greedy;
586 /* team->t.t_nproc == 1: fall-through to kmp_sch_static_greedy */
587 pr->u.p.parm1 = tc;
588 } // if
589 } // case
590 break;
591 case kmp_sch_static_greedy:
592 KD_TRACE(if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n"
, gtid); }
593 100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n"
, gtid); }
594 ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n"
, gtid); }
595 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n"
, gtid); }
;
596 pr->u.p.parm1 = (nproc > 1) ? (tc + nproc - 1) / nproc : tc;
597 break;
598 case kmp_sch_static_chunked:
599 case kmp_sch_dynamic_chunked:
600 if (pr->u.p.parm1 <= 0) {
601 pr->u.p.parm1 = KMP_DEFAULT_CHUNK1;
602 }
603 KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d "
"kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", gtid
); }
604 "kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d "
"kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", gtid
); }
605 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d "
"kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", gtid
); }
;
606 break;
607 case kmp_sch_trapezoidal: {
608 /* TSS: trapezoid self-scheduling, minimum chunk_size = parm1 */
609
610 T parm1, parm2, parm3, parm4;
611 KD_TRACE(100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_trapezoidal case\n"
, gtid); }
612 ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_trapezoidal case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_trapezoidal case\n"
, gtid); }
613 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_trapezoidal case\n"
, gtid); }
;
614
615 parm1 = chunk;
616
617 /* F : size of the first cycle */
618 parm2 = (tc / (2 * nproc));
619
620 if (parm2 < 1) {
621 parm2 = 1;
622 }
623
624 /* L : size of the last cycle. Make sure the last cycle is not larger
625 than the first cycle. */
626 if (parm1 < 1) {
627 parm1 = 1;
628 } else if (parm1 > parm2) {
629 parm1 = parm2;
630 }
631
632 /* N : number of cycles */
633 parm3 = (parm2 + parm1);
634 parm3 = (2 * tc + parm3 - 1) / parm3;
635
636 if (parm3 < 2) {
637 parm3 = 2;
638 }
639
640 /* sigma : decreasing incr of the trapezoid */
641 parm4 = (parm3 - 1);
642 parm4 = (parm2 - parm1) / parm4;
643
644 // pointless check, because parm4 >= 0 always
645 // if ( parm4 < 0 ) {
646 // parm4 = 0;
647 //}
648
649 pr->u.p.parm1 = parm1;
650 pr->u.p.parm2 = parm2;
651 pr->u.p.parm3 = parm3;
652 pr->u.p.parm4 = parm4;
653 } // case
654 break;
655
656 default: {
657 __kmp_fatal(KMP_MSG(UnknownSchedTypeDetected)__kmp_msg_format(kmp_i18n_msg_UnknownSchedTypeDetected), // Primary message
658 KMP_HNT(GetNewerLibrary)__kmp_msg_format(kmp_i18n_hnt_GetNewerLibrary), // Hint
659 __kmp_msg_null // Variadic argument list terminator
660 );
661 } break;
662 } // switch
663 pr->schedule = schedule;
664}
665
666#if KMP_USE_HIER_SCHED0
667template <typename T>
668inline void __kmp_dispatch_init_hier_runtime(ident_t *loc, T lb, T ub,
669 typename traits_t<T>::signed_t st);
670template <>
671inline void
672__kmp_dispatch_init_hier_runtime<kmp_int32>(ident_t *loc, kmp_int32 lb,
673 kmp_int32 ub, kmp_int32 st) {
674 __kmp_dispatch_init_hierarchy<kmp_int32>(
675 loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers,
676 __kmp_hier_scheds.scheds, __kmp_hier_scheds.small_chunks, lb, ub, st);
677}
678template <>
679inline void
680__kmp_dispatch_init_hier_runtime<kmp_uint32>(ident_t *loc, kmp_uint32 lb,
681 kmp_uint32 ub, kmp_int32 st) {
682 __kmp_dispatch_init_hierarchy<kmp_uint32>(
683 loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers,
684 __kmp_hier_scheds.scheds, __kmp_hier_scheds.small_chunks, lb, ub, st);
685}
686template <>
687inline void
688__kmp_dispatch_init_hier_runtime<kmp_int64>(ident_t *loc, kmp_int64 lb,
689 kmp_int64 ub, kmp_int64 st) {
690 __kmp_dispatch_init_hierarchy<kmp_int64>(
691 loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers,
692 __kmp_hier_scheds.scheds, __kmp_hier_scheds.large_chunks, lb, ub, st);
693}
694template <>
695inline void
696__kmp_dispatch_init_hier_runtime<kmp_uint64>(ident_t *loc, kmp_uint64 lb,
697 kmp_uint64 ub, kmp_int64 st) {
698 __kmp_dispatch_init_hierarchy<kmp_uint64>(
699 loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers,
700 __kmp_hier_scheds.scheds, __kmp_hier_scheds.large_chunks, lb, ub, st);
701}
702
703// free all the hierarchy scheduling memory associated with the team
704void __kmp_dispatch_free_hierarchies(kmp_team_t *team) {
705 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
706 for (int i = 0; i < num_disp_buff; ++i) {
707 // type does not matter here so use kmp_int32
708 auto sh =
709 reinterpret_cast<dispatch_shared_info_template<kmp_int32> volatile *>(
710 &team->t.t_disp_buffer[i]);
711 if (sh->hier) {
712 sh->hier->deallocate();
713 __kmp_free(sh->hier)___kmp_free((sh->hier), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 713)
;
714 }
715 }
716}
717#endif
718
719// UT - unsigned flavor of T, ST - signed flavor of T,
720// DBL - double if sizeof(T)==4, or long double if sizeof(T)==8
721template <typename T>
722static void
723__kmp_dispatch_init(ident_t *loc, int gtid, enum sched_type schedule, T lb,
724 T ub, typename traits_t<T>::signed_t st,
725 typename traits_t<T>::signed_t chunk, int push_ws) {
726 typedef typename traits_t<T>::unsigned_t UT;
727
728 int active;
729 kmp_info_t *th;
730 kmp_team_t *team;
731 kmp_uint32 my_buffer_index;
732 dispatch_private_info_template<T> *pr;
733 dispatch_shared_info_template<T> volatile *sh;
734
735 KMP_BUILD_ASSERT(sizeof(dispatch_private_info_template<T>) ==static_assert(sizeof(dispatch_private_info_template<T>)
== sizeof(dispatch_private_info), "Build condition error")
736 sizeof(dispatch_private_info))static_assert(sizeof(dispatch_private_info_template<T>)
== sizeof(dispatch_private_info), "Build condition error")
;
737 KMP_BUILD_ASSERT(sizeof(dispatch_shared_info_template<UT>) ==static_assert(sizeof(dispatch_shared_info_template<UT>)
== sizeof(dispatch_shared_info), "Build condition error")
738 sizeof(dispatch_shared_info))static_assert(sizeof(dispatch_shared_info_template<UT>)
== sizeof(dispatch_shared_info), "Build condition error")
;
739
740 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
741 __kmp_parallel_initialize();
742
743#if INCLUDE_SSC_MARKS(1 && 1)
744 SSC_MARK_DISPATCH_INIT()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0xd696) : "%ebx")
;
745#endif
746#ifdef KMP_DEBUG1
747 typedef typename traits_t<T>::signed_t ST;
748 {
749 char *buff;
750 // create format specifiers before the debug output
751 buff = __kmp_str_format("__kmp_dispatch_init: T#%%d called: schedule:%%d "
752 "chunk:%%%s lb:%%%s ub:%%%s st:%%%s\n",
753 traits_t<ST>::spec, traits_t<T>::spec,
754 traits_t<T>::spec, traits_t<ST>::spec);
755 KD_TRACE(10, (buff, gtid, schedule, chunk, lb, ub, st))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, schedule
, chunk, lb, ub, st); }
;
756 __kmp_str_free(&buff);
757 }
758#endif
759 /* setup data */
760 th = __kmp_threads[gtid];
761 team = th->th.th_team;
762 active = !team->t.t_serialized;
763 th->th.th_ident = loc;
764
765 // Any half-decent optimizer will remove this test when the blocks are empty
766 // since the macros expand to nothing
767 // when statistics are disabled.
768 if (schedule == __kmp_static) {
769 KMP_COUNT_BLOCK(OMP_LOOP_STATIC)((void)0);
770 } else {
771 KMP_COUNT_BLOCK(OMP_LOOP_DYNAMIC)((void)0);
772 }
773
774#if KMP_USE_HIER_SCHED0
775 // Initialize the scheduling hierarchy if requested in OMP_SCHEDULE envirable
776 // Hierarchical scheduling does not work with ordered, so if ordered is
777 // detected, then revert back to threaded scheduling.
778 bool ordered;
779 enum sched_type my_sched = schedule;
780 my_buffer_index = th->th.th_dispatch->th_disp_index;
781 pr = reinterpret_cast<dispatch_private_info_template<T> *>(
782 &th->th.th_dispatch
783 ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
784 my_sched = SCHEDULE_WITHOUT_MODIFIERS(my_sched)(enum sched_type)( (my_sched) & ~(kmp_sch_modifier_nonmonotonic
| kmp_sch_modifier_monotonic))
;
785 if ((my_sched >= kmp_nm_lower) && (my_sched < kmp_nm_upper))
786 my_sched =
787 (enum sched_type)(((int)my_sched) - (kmp_nm_lower - kmp_sch_lower));
788 ordered = (kmp_ord_lower & my_sched);
789 if (pr->flags.use_hier) {
790 if (ordered) {
791 KD_TRACE(100, ("__kmp_dispatch_init: T#%d ordered loop detected. "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d ordered loop detected. "
"Disabling hierarchical scheduling.\n", gtid); }
792 "Disabling hierarchical scheduling.\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d ordered loop detected. "
"Disabling hierarchical scheduling.\n", gtid); }
793 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d ordered loop detected. "
"Disabling hierarchical scheduling.\n", gtid); }
;
794 pr->flags.use_hier = FALSE0;
795 }
796 }
797 if (schedule == kmp_sch_runtime && __kmp_hier_scheds.size > 0) {
798 // Don't use hierarchical for ordered parallel loops and don't
799 // use the runtime hierarchy if one was specified in the program
800 if (!ordered && !pr->flags.use_hier)
801 __kmp_dispatch_init_hier_runtime<T>(loc, lb, ub, st);
802 }
803#endif // KMP_USE_HIER_SCHED
804
805#if USE_ITT_BUILD1
806 kmp_uint64 cur_chunk = chunk;
807 int itt_need_metadata_reporting = __itt_metadata_add_ptr__kmp_itt_metadata_add_ptr__3_0 &&
808 __kmp_forkjoin_frames_mode == 3 &&
809 KMP_MASTER_GTID(gtid)(__kmp_tid_from_gtid((gtid)) == 0) &&
810#if OMP_40_ENABLED(50 >= 40)
811 th->th.th_teams_microtask == NULL__null &&
812#endif
813 team->t.t_active_level == 1;
814#endif
815 if (!active) {
816 pr = reinterpret_cast<dispatch_private_info_template<T> *>(
817 th->th.th_dispatch->th_disp_buffer); /* top of the stack */
818 } else {
819 KMP_DEBUG_ASSERT(th->th.th_dispatch ==if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 820); }
820 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid])if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 820); }
;
821
822 my_buffer_index = th->th.th_dispatch->th_disp_index++;
823
824 /* What happens when number of threads changes, need to resize buffer? */
825 pr = reinterpret_cast<dispatch_private_info_template<T> *>(
826 &th->th.th_dispatch
827 ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
828 sh = reinterpret_cast<dispatch_shared_info_template<T> volatile *>(
829 &team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
830 KD_TRACE(10, ("__kmp_dispatch_init: T#%d my_buffer_index:%d\n", gtid,if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d my_buffer_index:%d\n"
, gtid, my_buffer_index); }
831 my_buffer_index))if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d my_buffer_index:%d\n"
, gtid, my_buffer_index); }
;
832 }
833
834 __kmp_dispatch_init_algorithm(loc, gtid, pr, schedule, lb, ub, st,
835#if USE_ITT_BUILD1
836 &cur_chunk,
837#endif
838 chunk, (T)th->th.th_team_nproc,
839 (T)th->th.th_info.ds.ds_tid);
840 if (active) {
841 if (pr->flags.ordered == 0) {
842 th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo_error;
843 th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo_error;
844 } else {
845 th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo<UT>;
846 th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo<UT>;
847 }
848 }
849
850 if (active) {
851 /* The name of this buffer should be my_buffer_index when it's free to use
852 * it */
853
854 KD_TRACE(100, ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d "
"sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index
); }
855 "sh->buffer_index:%d\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d "
"sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index
); }
856 gtid, my_buffer_index, sh->buffer_index))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d "
"sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index
); }
;
857 __kmp_wait_yield<kmp_uint32>(&sh->buffer_index, my_buffer_index,
858 __kmp_eq<kmp_uint32> USE_ITT_BUILD_ARG(NULL), __null);
859 // Note: KMP_WAIT_YIELD() cannot be used there: buffer index and
860 // my_buffer_index are *always* 32-bit integers.
861 KMP_MB(); /* is this necessary? */
862 KD_TRACE(100, ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d "
"sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index
); }
863 "sh->buffer_index:%d\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d "
"sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index
); }
864 gtid, my_buffer_index, sh->buffer_index))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d "
"sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index
); }
;
865
866 th->th.th_dispatch->th_dispatch_pr_current = (dispatch_private_info_t *)pr;
867 th->th.th_dispatch->th_dispatch_sh_current =
868 CCAST(dispatch_shared_info_t *, (volatile dispatch_shared_info_t *)sh)const_cast<dispatch_shared_info_t *>((volatile dispatch_shared_info_t
*)sh)
;
869#if USE_ITT_BUILD1
870 if (pr->flags.ordered) {
871 __kmp_itt_ordered_init(gtid);
872 }
873 // Report loop metadata
874 if (itt_need_metadata_reporting) {
875 // Only report metadata by master of active team at level 1
876 kmp_uint64 schedtype = 0;
877 switch (schedule) {
878 case kmp_sch_static_chunked:
879 case kmp_sch_static_balanced: // Chunk is calculated in the switch above
880 break;
881 case kmp_sch_static_greedy:
882 cur_chunk = pr->u.p.parm1;
883 break;
884 case kmp_sch_dynamic_chunked:
885 schedtype = 1;
886 break;
887 case kmp_sch_guided_iterative_chunked:
888 case kmp_sch_guided_analytical_chunked:
889#if OMP_45_ENABLED(50 >= 45)
890 case kmp_sch_guided_simd:
891#endif
892 schedtype = 2;
893 break;
894 default:
895 // Should we put this case under "static"?
896 // case kmp_sch_static_steal:
897 schedtype = 3;
898 break;
899 }
900 __kmp_itt_metadata_loop(loc, schedtype, pr->u.p.tc, cur_chunk);
901 }
902#if KMP_USE_HIER_SCHED0
903 if (pr->flags.use_hier) {
904 pr->u.p.count = 0;
905 pr->u.p.ub = pr->u.p.lb = pr->u.p.st = pr->u.p.tc = 0;
906 }
907#endif // KMP_USER_HIER_SCHED
908#endif /* USE_ITT_BUILD */
909 }
910
911#ifdef KMP_DEBUG1
912 {
913 char *buff;
914 // create format specifiers before the debug output
915 buff = __kmp_str_format(
916 "__kmp_dispatch_init: T#%%d returning: schedule:%%d ordered:%%%s "
917 "lb:%%%s ub:%%%s"
918 " st:%%%s tc:%%%s count:%%%s\n\tordered_lower:%%%s ordered_upper:%%%s"
919 " parm1:%%%s parm2:%%%s parm3:%%%s parm4:%%%s\n",
920 traits_t<UT>::spec, traits_t<T>::spec, traits_t<T>::spec,
921 traits_t<ST>::spec, traits_t<UT>::spec, traits_t<UT>::spec,
922 traits_t<UT>::spec, traits_t<UT>::spec, traits_t<T>::spec,
923 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec);
924 KD_TRACE(10, (buff, gtid, pr->schedule, pr->flags.ordered, pr->u.p.lb,if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, pr
->schedule, pr->flags.ordered, pr->u.p.lb, pr->u.
p.ub, pr->u.p.st, pr->u.p.tc, pr->u.p.count, pr->
u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1
, pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4); }
925 pr->u.p.ub, pr->u.p.st, pr->u.p.tc, pr->u.p.count,if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, pr
->schedule, pr->flags.ordered, pr->u.p.lb, pr->u.
p.ub, pr->u.p.st, pr->u.p.tc, pr->u.p.count, pr->
u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1
, pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4); }
926 pr->u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1,if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, pr
->schedule, pr->flags.ordered, pr->u.p.lb, pr->u.
p.ub, pr->u.p.st, pr->u.p.tc, pr->u.p.count, pr->
u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1
, pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4); }
927 pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, pr
->schedule, pr->flags.ordered, pr->u.p.lb, pr->u.
p.ub, pr->u.p.st, pr->u.p.tc, pr->u.p.count, pr->
u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1
, pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4); }
;
928 __kmp_str_free(&buff);
929 }
930#endif
931#if (KMP_STATIC_STEAL_ENABLED1)
932 // It cannot be guaranteed that after execution of a loop with some other
933 // schedule kind all the parm3 variables will contain the same value. Even if
934 // all parm3 will be the same, it still exists a bad case like using 0 and 1
935 // rather than program life-time increment. So the dedicated variable is
936 // required. The 'static_steal_counter' is used.
937 if (schedule == kmp_sch_static_steal) {
938 // Other threads will inspect this variable when searching for a victim.
939 // This is a flag showing that other threads may steal from this thread
940 // since then.
941 volatile T *p = &pr->u.p.static_steal_counter;
942 *p = *p + 1;
943 }
944#endif // ( KMP_STATIC_STEAL_ENABLED )
945
946#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
947 if (ompt_enabled.ompt_callback_work) {
948 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL__null);
949 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
950 ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback(
951 ompt_work_loop, ompt_scope_begin, &(team_info->parallel_data),
952 &(task_info->task_data), pr->u.p.tc, OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid));
953 }
954#endif
955 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_dynamic)((void)0);
956}
957
958/* For ordered loops, either __kmp_dispatch_finish() should be called after
959 * every iteration, or __kmp_dispatch_finish_chunk() should be called after
960 * every chunk of iterations. If the ordered section(s) were not executed
961 * for this iteration (or every iteration in this chunk), we need to set the
962 * ordered iteration counters so that the next thread can proceed. */
963template <typename UT>
964static void __kmp_dispatch_finish(int gtid, ident_t *loc) {
965 typedef typename traits_t<UT>::signed_t ST;
966 kmp_info_t *th = __kmp_threads[gtid];
967
968 KD_TRACE(100, ("__kmp_dispatch_finish: T#%d called\n", gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d called\n"
, gtid); }
;
969 if (!th->th.th_team->t.t_serialized) {
970
971 dispatch_private_info_template<UT> *pr =
972 reinterpret_cast<dispatch_private_info_template<UT> *>(
973 th->th.th_dispatch->th_dispatch_pr_current);
974 dispatch_shared_info_template<UT> volatile *sh =
975 reinterpret_cast<dispatch_shared_info_template<UT> volatile *>(
976 th->th.th_dispatch->th_dispatch_sh_current);
977 KMP_DEBUG_ASSERT(pr)if (!(pr)) { __kmp_debug_assert("pr", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 977); }
;
978 KMP_DEBUG_ASSERT(sh)if (!(sh)) { __kmp_debug_assert("sh", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 978); }
;
979 KMP_DEBUG_ASSERT(th->th.th_dispatch ==if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 980); }
980 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid])if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 980); }
;
981
982 if (pr->ordered_bumped) {
983 KD_TRACE(if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n"
, gtid); }
984 1000,if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n"
, gtid); }
985 ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n"
, gtid); }
986 gtid))if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n"
, gtid); }
;
987 pr->ordered_bumped = 0;
988 } else {
989 UT lower = pr->u.p.ordered_lower;
990
991#ifdef KMP_DEBUG1
992 {
993 char *buff;
994 // create format specifiers before the debug output
995 buff = __kmp_str_format("__kmp_dispatch_finish: T#%%d before wait: "
996 "ordered_iteration:%%%s lower:%%%s\n",
997 traits_t<UT>::spec, traits_t<UT>::spec);
998 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower))if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid,
sh->u.s.ordered_iteration, lower); }
;
999 __kmp_str_free(&buff);
1000 }
1001#endif
1002
1003 __kmp_wait_yield<UT>(&sh->u.s.ordered_iteration, lower,
1004 __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL), __null);
1005 KMP_MB(); /* is this necessary? */
1006#ifdef KMP_DEBUG1
1007 {
1008 char *buff;
1009 // create format specifiers before the debug output
1010 buff = __kmp_str_format("__kmp_dispatch_finish: T#%%d after wait: "
1011 "ordered_iteration:%%%s lower:%%%s\n",
1012 traits_t<UT>::spec, traits_t<UT>::spec);
1013 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower))if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid,
sh->u.s.ordered_iteration, lower); }
;
1014 __kmp_str_free(&buff);
1015 }
1016#endif
1017
1018 test_then_inc<ST>((volatile ST *)&sh->u.s.ordered_iteration);
1019 } // if
1020 } // if
1021 KD_TRACE(100, ("__kmp_dispatch_finish: T#%d returned\n", gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d returned\n"
, gtid); }
;
1022}
1023
1024#ifdef KMP_GOMP_COMPAT
1025
1026template <typename UT>
1027static void __kmp_dispatch_finish_chunk(int gtid, ident_t *loc) {
1028 typedef typename traits_t<UT>::signed_t ST;
1029 kmp_info_t *th = __kmp_threads[gtid];
1030
1031 KD_TRACE(100, ("__kmp_dispatch_finish_chunk: T#%d called\n", gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_finish_chunk: T#%d called\n"
, gtid); }
;
1032 if (!th->th.th_team->t.t_serialized) {
1033 // int cid;
1034 dispatch_private_info_template<UT> *pr =
1035 reinterpret_cast<dispatch_private_info_template<UT> *>(
1036 th->th.th_dispatch->th_dispatch_pr_current);
1037 dispatch_shared_info_template<UT> volatile *sh =
1038 reinterpret_cast<dispatch_shared_info_template<UT> volatile *>(
1039 th->th.th_dispatch->th_dispatch_sh_current);
1040 KMP_DEBUG_ASSERT(pr)if (!(pr)) { __kmp_debug_assert("pr", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1040); }
;
1041 KMP_DEBUG_ASSERT(sh)if (!(sh)) { __kmp_debug_assert("sh", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1041); }
;
1042 KMP_DEBUG_ASSERT(th->th.th_dispatch ==if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1043); }
1043 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid])if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1043); }
;
1044
1045 // for (cid = 0; cid < KMP_MAX_ORDERED; ++cid) {
1046 UT lower = pr->u.p.ordered_lower;
1047 UT upper = pr->u.p.ordered_upper;
1048 UT inc = upper - lower + 1;
1049
1050 if (pr->ordered_bumped == inc) {
1051 KD_TRACE(if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n"
, gtid); }
1052 1000,if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n"
, gtid); }
1053 ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n"
, gtid); }
1054 gtid))if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n"
, gtid); }
;
1055 pr->ordered_bumped = 0;
1056 } else {
1057 inc -= pr->ordered_bumped;
1058
1059#ifdef KMP_DEBUG1
1060 {
1061 char *buff;
1062 // create format specifiers before the debug output
1063 buff = __kmp_str_format(
1064 "__kmp_dispatch_finish_chunk: T#%%d before wait: "
1065 "ordered_iteration:%%%s lower:%%%s upper:%%%s\n",
1066 traits_t<UT>::spec, traits_t<UT>::spec, traits_t<UT>::spec);
1067 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower, upper))if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid,
sh->u.s.ordered_iteration, lower, upper); }
;
1068 __kmp_str_free(&buff);
1069 }
1070#endif
1071
1072 __kmp_wait_yield<UT>(&sh->u.s.ordered_iteration, lower,
1073 __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL), __null);
1074
1075 KMP_MB(); /* is this necessary? */
1076 KD_TRACE(1000, ("__kmp_dispatch_finish_chunk: T#%d resetting "if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish_chunk: T#%d resetting "
"ordered_bumped to zero\n", gtid); }
1077 "ordered_bumped to zero\n",if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish_chunk: T#%d resetting "
"ordered_bumped to zero\n", gtid); }
1078 gtid))if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish_chunk: T#%d resetting "
"ordered_bumped to zero\n", gtid); }
;
1079 pr->ordered_bumped = 0;
1080//!!!!! TODO check if the inc should be unsigned, or signed???
1081#ifdef KMP_DEBUG1
1082 {
1083 char *buff;
1084 // create format specifiers before the debug output
1085 buff = __kmp_str_format(
1086 "__kmp_dispatch_finish_chunk: T#%%d after wait: "
1087 "ordered_iteration:%%%s inc:%%%s lower:%%%s upper:%%%s\n",
1088 traits_t<UT>::spec, traits_t<UT>::spec, traits_t<UT>::spec,
1089 traits_t<UT>::spec);
1090 KD_TRACE(1000,if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid,
sh->u.s.ordered_iteration, inc, lower, upper); }
1091 (buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper))if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid,
sh->u.s.ordered_iteration, inc, lower, upper); }
;
1092 __kmp_str_free(&buff);
1093 }
1094#endif
1095
1096 test_then_add<ST>((volatile ST *)&sh->u.s.ordered_iteration, inc);
1097 }
1098 // }
1099 }
1100 KD_TRACE(100, ("__kmp_dispatch_finish_chunk: T#%d returned\n", gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_finish_chunk: T#%d returned\n"
, gtid); }
;
1101}
1102
1103#endif /* KMP_GOMP_COMPAT */
1104
1105template <typename T>
1106int __kmp_dispatch_next_algorithm(int gtid,
1107 dispatch_private_info_template<T> *pr,
1108 dispatch_shared_info_template<T> volatile *sh,
1109 kmp_int32 *p_last, T *p_lb, T *p_ub,
1110 typename traits_t<T>::signed_t *p_st, T nproc,
1111 T tid) {
1112 typedef typename traits_t<T>::unsigned_t UT;
1113 typedef typename traits_t<T>::signed_t ST;
1114 typedef typename traits_t<T>::floating_t DBL;
1115 int status = 0;
1116 kmp_int32 last = 0;
1117 T start;
1118 ST incr;
1119 UT limit, trip, init;
1120 kmp_info_t *th = __kmp_threads[gtid];
1121 kmp_team_t *team = th->th.th_team;
1122
1123 KMP_DEBUG_ASSERT(th->th.th_dispatch ==if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1124); }
1124 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid])if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1124); }
;
1125 KMP_DEBUG_ASSERT(pr)if (!(pr)) { __kmp_debug_assert("pr", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1125); }
;
1126 KMP_DEBUG_ASSERT(sh)if (!(sh)) { __kmp_debug_assert("sh", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1126); }
;
1127 KMP_DEBUG_ASSERT(tid >= 0 && tid < nproc)if (!(tid >= 0 && tid < nproc)) { __kmp_debug_assert
("tid >= 0 && tid < nproc", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1127); }
;
1128#ifdef KMP_DEBUG1
1129 {
1130 char *buff;
1131 // create format specifiers before the debug output
1132 buff =
1133 __kmp_str_format("__kmp_dispatch_next_algorithm: T#%%d called pr:%%p "
1134 "sh:%%p nproc:%%%s tid:%%%s\n",
1135 traits_t<T>::spec, traits_t<T>::spec);
1136 KD_TRACE(10, (buff, gtid, pr, sh, nproc, tid))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, pr
, sh, nproc, tid); }
;
1137 __kmp_str_free(&buff);
1138 }
1139#endif
1140
1141 // zero trip count
1142 if (pr->u.p.tc == 0) {
1143 KD_TRACE(10,if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d early exit trip count is "
"zero status:%d\n", gtid, status); }
1144 ("__kmp_dispatch_next_algorithm: T#%d early exit trip count is "if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d early exit trip count is "
"zero status:%d\n", gtid, status); }
1145 "zero status:%d\n",if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d early exit trip count is "
"zero status:%d\n", gtid, status); }
1146 gtid, status))if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d early exit trip count is "
"zero status:%d\n", gtid, status); }
;
1147 return 0;
1148 }
1149
1150 switch (pr->schedule) {
1151#if (KMP_STATIC_STEAL_ENABLED1)
1152 case kmp_sch_static_steal: {
1153 T chunk = pr->u.p.parm1;
1154
1155 KD_TRACE(100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_steal case\n"
, gtid); }
1156 ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_steal case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_steal case\n"
, gtid); }
1157 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_steal case\n"
, gtid); }
;
1158
1159 trip = pr->u.p.tc - 1;
1160
1161 if (traits_t<T>::type_size > 4) {
1162 // use lock for 8-byte and CAS for 4-byte induction
1163 // variable. TODO (optional): check and use 16-byte CAS
1164 kmp_lock_t *lck = th->th.th_dispatch->th_steal_lock;
1165 KMP_DEBUG_ASSERT(lck != NULL)if (!(lck != __null)) { __kmp_debug_assert("lck != __null", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1165); }
;
1166 if (pr->u.p.count < (UT)pr->u.p.ub) {
1167 __kmp_acquire_lock(lck, gtid);
1168 // try to get own chunk of iterations
1169 init = (pr->u.p.count)++;
1170 status = (init < (UT)pr->u.p.ub);
1171 __kmp_release_lock(lck, gtid);
1172 } else {
1173 status = 0; // no own chunks
1174 }
1175 if (!status) { // try to steal
1176 kmp_info_t **other_threads = team->t.t_threads;
1177 int while_limit = nproc; // nproc attempts to find a victim
1178 int while_index = 0;
1179 // TODO: algorithm of searching for a victim
1180 // should be cleaned up and measured
1181 while ((!status) && (while_limit != ++while_index)) {
1182 T remaining;
1183 T victimIdx = pr->u.p.parm4;
1184 T oldVictimIdx = victimIdx ? victimIdx - 1 : nproc - 1;
1185 dispatch_private_info_template<T> *victim =
1186 reinterpret_cast<dispatch_private_info_template<T> *>(
1187 other_threads[victimIdx]
1188 ->th.th_dispatch->th_dispatch_pr_current);
1189 while ((victim == NULL__null || victim == pr ||
1190 (*(volatile T *)&victim->u.p.static_steal_counter !=
1191 *(volatile T *)&pr->u.p.static_steal_counter)) &&
1192 oldVictimIdx != victimIdx) {
1193 victimIdx = (victimIdx + 1) % nproc;
1194 victim = reinterpret_cast<dispatch_private_info_template<T> *>(
1195 other_threads[victimIdx]
1196 ->th.th_dispatch->th_dispatch_pr_current);
1197 }
1198 if (!victim || (*(volatile T *)&victim->u.p.static_steal_counter !=
1199 *(volatile T *)&pr->u.p.static_steal_counter)) {
1200 continue; // try once more (nproc attempts in total)
1201 // no victim is ready yet to participate in stealing
1202 // because all victims are still in kmp_init_dispatch
1203 }
1204 if (victim->u.p.count + 2 > (UT)victim->u.p.ub) {
1205 pr->u.p.parm4 = (victimIdx + 1) % nproc; // shift start tid
1206 continue; // not enough chunks to steal, goto next victim
1207 }
1208
1209 lck = other_threads[victimIdx]->th.th_dispatch->th_steal_lock;
1210 KMP_ASSERT(lck != NULL)if (!(lck != __null)) { __kmp_debug_assert("lck != NULL", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1210); }
;
1211 __kmp_acquire_lock(lck, gtid);
1212 limit = victim->u.p.ub; // keep initial ub
1213 if (victim->u.p.count >= limit ||
1214 (remaining = limit - victim->u.p.count) < 2) {
1215 __kmp_release_lock(lck, gtid);
1216 pr->u.p.parm4 = (victimIdx + 1) % nproc; // next victim
1217 continue; // not enough chunks to steal
1218 }
1219 // stealing succeded, reduce victim's ub by 1/4 of undone chunks or
1220 // by 1
1221 if (remaining > 3) {
1222 // steal 1/4 of remaining
1223 KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen, remaining >> 2)((void)0);
1224 init = (victim->u.p.ub -= (remaining >> 2));
1225 } else {
1226 // steal 1 chunk of 2 or 3 remaining
1227 KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen, 1)((void)0);
1228 init = (victim->u.p.ub -= 1);
1229 }
1230 __kmp_release_lock(lck, gtid);
1231
1232 KMP_DEBUG_ASSERT(init + 1 <= limit)if (!(init + 1 <= limit)) { __kmp_debug_assert("init + 1 <= limit"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1232); }
;
1233 pr->u.p.parm4 = victimIdx; // remember victim to steal from
1234 status = 1;
1235 while_index = 0;
1236 // now update own count and ub with stolen range but init chunk
1237 __kmp_acquire_lock(th->th.th_dispatch->th_steal_lock, gtid);
1238 pr->u.p.count = init + 1;
1239 pr->u.p.ub = limit;
1240 __kmp_release_lock(th->th.th_dispatch->th_steal_lock, gtid);
1241 } // while (search for victim)
1242 } // if (try to find victim and steal)
1243 } else {
1244 // 4-byte induction variable, use 8-byte CAS for pair (count, ub)
1245 typedef union {
1246 struct {
1247 UT count;
1248 T ub;
1249 } p;
1250 kmp_int64 b;
1251 } union_i4;
1252 // All operations on 'count' or 'ub' must be combined atomically
1253 // together.
1254 {
1255 union_i4 vold, vnew;
1256 vold.b = *(volatile kmp_int64 *)(&pr->u.p.count);
1257 vnew = vold;
1258 vnew.p.count++;
1259 while (!KMP_COMPARE_AND_STORE_ACQ64(__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile
kmp_int64 *)&pr->u.p.count), (kmp_uint64)(*(kmp_int64
*) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b))
1260 (volatile kmp_int64 *)&pr->u.p.count,__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile
kmp_int64 *)&pr->u.p.count), (kmp_uint64)(*(kmp_int64
*) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b))
1261 *VOLATILE_CAST(kmp_int64 *) & vold.b,__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile
kmp_int64 *)&pr->u.p.count), (kmp_uint64)(*(kmp_int64
*) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b))
1262 *VOLATILE_CAST(kmp_int64 *) & vnew.b)__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile
kmp_int64 *)&pr->u.p.count), (kmp_uint64)(*(kmp_int64
*) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b))
) {
1263 KMP_CPU_PAUSE()__kmp_x86_pause();
1264 vold.b = *(volatile kmp_int64 *)(&pr->u.p.count);
1265 vnew = vold;
1266 vnew.p.count++;
1267 }
1268 vnew = vold;
1269 init = vnew.p.count;
1270 status = (init < (UT)vnew.p.ub);
1271 }
1272
1273 if (!status) {
1274 kmp_info_t **other_threads = team->t.t_threads;
1275 int while_limit = nproc; // nproc attempts to find a victim
1276 int while_index = 0;
1277
1278 // TODO: algorithm of searching for a victim
1279 // should be cleaned up and measured
1280 while ((!status) && (while_limit != ++while_index)) {
1281 union_i4 vold, vnew;
1282 kmp_int32 remaining;
1283 T victimIdx = pr->u.p.parm4;
1284 T oldVictimIdx = victimIdx ? victimIdx - 1 : nproc - 1;
1285 dispatch_private_info_template<T> *victim =
1286 reinterpret_cast<dispatch_private_info_template<T> *>(
1287 other_threads[victimIdx]
1288 ->th.th_dispatch->th_dispatch_pr_current);
1289 while ((victim == NULL__null || victim == pr ||
1290 (*(volatile T *)&victim->u.p.static_steal_counter !=
1291 *(volatile T *)&pr->u.p.static_steal_counter)) &&
1292 oldVictimIdx != victimIdx) {
1293 victimIdx = (victimIdx + 1) % nproc;
1294 victim = reinterpret_cast<dispatch_private_info_template<T> *>(
1295 other_threads[victimIdx]
1296 ->th.th_dispatch->th_dispatch_pr_current);
1297 }
1298 if (!victim || (*(volatile T *)&victim->u.p.static_steal_counter !=
1299 *(volatile T *)&pr->u.p.static_steal_counter)) {
1300 continue; // try once more (nproc attempts in total)
1301 // no victim is ready yet to participate in stealing
1302 // because all victims are still in kmp_init_dispatch
1303 }
1304 pr->u.p.parm4 = victimIdx; // new victim found
1305 while (1) { // CAS loop if victim has enough chunks to steal
1306 vold.b = *(volatile kmp_int64 *)(&victim->u.p.count);
1307 vnew = vold;
1308
1309 KMP_DEBUG_ASSERT((vnew.p.ub - 1) * (UT)chunk <= trip)if (!((vnew.p.ub - 1) * (UT)chunk <= trip)) { __kmp_debug_assert
("(vnew.p.ub - 1) * (UT)chunk <= trip", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1309); }
;
1310 if (vnew.p.count >= (UT)vnew.p.ub ||
1311 (remaining = vnew.p.ub - vnew.p.count) < 2) {
1312 pr->u.p.parm4 = (victimIdx + 1) % nproc; // shift start victim id
1313 break; // not enough chunks to steal, goto next victim
1314 }
1315 if (remaining > 3) {
1316 vnew.p.ub -= (remaining >> 2); // try to steal 1/4 of remaining
1317 } else {
1318 vnew.p.ub -= 1; // steal 1 chunk of 2 or 3 remaining
1319 }
1320 KMP_DEBUG_ASSERT((vnew.p.ub - 1) * (UT)chunk <= trip)if (!((vnew.p.ub - 1) * (UT)chunk <= trip)) { __kmp_debug_assert
("(vnew.p.ub - 1) * (UT)chunk <= trip", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1320); }
;
1321 // TODO: Should this be acquire or release?
1322 if (KMP_COMPARE_AND_STORE_ACQ64(__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile
kmp_int64 *)&victim->u.p.count), (kmp_uint64)(*(kmp_int64
*) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b))
1323 (volatile kmp_int64 *)&victim->u.p.count,__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile
kmp_int64 *)&victim->u.p.count), (kmp_uint64)(*(kmp_int64
*) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b))
1324 *VOLATILE_CAST(kmp_int64 *) & vold.b,__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile
kmp_int64 *)&victim->u.p.count), (kmp_uint64)(*(kmp_int64
*) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b))
1325 *VOLATILE_CAST(kmp_int64 *) & vnew.b)__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile
kmp_int64 *)&victim->u.p.count), (kmp_uint64)(*(kmp_int64
*) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b))
) {
1326 // stealing succedded
1327 KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen,((void)0)
1328 vold.p.ub - vnew.p.ub)((void)0);
1329 status = 1;
1330 while_index = 0;
1331 // now update own count and ub
1332 init = vnew.p.ub;
1333 vold.p.count = init + 1;
1334#if KMP_ARCH_X860
1335 KMP_XCHG_FIXED64((volatile kmp_int64 *)(&pr->u.p.count), vold.b)__sync_lock_test_and_set((volatile kmp_uint64 *)((volatile kmp_int64
*)(&pr->u.p.count)), (kmp_uint64)(vold.b))
;
1336#else
1337 *(volatile kmp_int64 *)(&pr->u.p.count) = vold.b;
1338#endif
1339 break;
1340 } // if (check CAS result)
1341 KMP_CPU_PAUSE()__kmp_x86_pause(); // CAS failed, repeate attempt
1342 } // while (try to steal from particular victim)
1343 } // while (search for victim)
1344 } // if (try to find victim and steal)
1345 } // if (4-byte induction variable)
1346 if (!status) {
1347 *p_lb = 0;
1348 *p_ub = 0;
1349 if (p_st != NULL__null)
1350 *p_st = 0;
1351 } else {
1352 start = pr->u.p.parm2;
1353 init *= chunk;
1354 limit = chunk + init - 1;
1355 incr = pr->u.p.st;
1356 KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_chunks, 1)((void)0);
1357
1358 KMP_DEBUG_ASSERT(init <= trip)if (!(init <= trip)) { __kmp_debug_assert("init <= trip"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1358); }
;
1359 if ((last = (limit >= trip)) != 0)
1360 limit = trip;
1361 if (p_st != NULL__null)
1362 *p_st = incr;
1363
1364 if (incr == 1) {
1365 *p_lb = start + init;
1366 *p_ub = start + limit;
1367 } else {
1368 *p_lb = start + init * incr;
1369 *p_ub = start + limit * incr;
1370 }
1371
1372 if (pr->flags.ordered) {
1373 pr->u.p.ordered_lower = init;
1374 pr->u.p.ordered_upper = limit;
1375 } // if
1376 } // if
1377 break;
1378 } // case
1379#endif // ( KMP_STATIC_STEAL_ENABLED )
1380 case kmp_sch_static_balanced: {
1381 KD_TRACE(if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n"
, gtid); }
1382 10,if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n"
, gtid); }
1383 ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n",if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n"
, gtid); }
1384 gtid))if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n"
, gtid); }
;
1385 /* check if thread has any iteration to do */
1386 if ((status = !pr->u.p.count) != 0) {
1387 pr->u.p.count = 1;
1388 *p_lb = pr->u.p.lb;
1389 *p_ub = pr->u.p.ub;
1390 last = pr->u.p.parm1;
1391 if (p_st != NULL__null)
1392 *p_st = pr->u.p.st;
1393 } else { /* no iterations to do */
1394 pr->u.p.lb = pr->u.p.ub + pr->u.p.st;
1395 }
1396 } // case
1397 break;
1398 case kmp_sch_static_greedy: /* original code for kmp_sch_static_greedy was
1399 merged here */
1400 case kmp_sch_static_chunked: {
1401 T parm1;
1402
1403 KD_TRACE(100, ("__kmp_dispatch_next_algorithm: T#%d "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d "
"kmp_sch_static_[affinity|chunked] case\n", gtid); }
1404 "kmp_sch_static_[affinity|chunked] case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d "
"kmp_sch_static_[affinity|chunked] case\n", gtid); }
1405 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d "
"kmp_sch_static_[affinity|chunked] case\n", gtid); }
;
1406 parm1 = pr->u.p.parm1;
1407
1408 trip = pr->u.p.tc - 1;
1409 init = parm1 * (pr->u.p.count + tid);
1410
1411 if ((status = (init <= trip)) != 0) {
1412 start = pr->u.p.lb;
1413 incr = pr->u.p.st;
1414 limit = parm1 + init - 1;
1415
1416 if ((last = (limit >= trip)) != 0)
1417 limit = trip;
1418
1419 if (p_st != NULL__null)
1420 *p_st = incr;
1421
1422 pr->u.p.count += nproc;
1423
1424 if (incr == 1) {
1425 *p_lb = start + init;
1426 *p_ub = start + limit;
1427 } else {
1428 *p_lb = start + init * incr;
1429 *p_ub = start + limit * incr;
1430 }
1431
1432 if (pr->flags.ordered) {
1433 pr->u.p.ordered_lower = init;
1434 pr->u.p.ordered_upper = limit;
1435 } // if
1436 } // if
1437 } // case
1438 break;
1439
1440 case kmp_sch_dynamic_chunked: {
1441 T chunk = pr->u.p.parm1;
1442
1443 KD_TRACE(if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n"
, gtid); }
1444 100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n"
, gtid); }
1445 ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n"
, gtid); }
1446 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n"
, gtid); }
;
1447
1448 init = chunk * test_then_inc_acq<ST>((volatile ST *)&sh->u.s.iteration);
1449 trip = pr->u.p.tc - 1;
1450
1451 if ((status = (init <= trip)) == 0) {
1452 *p_lb = 0;
1453 *p_ub = 0;
1454 if (p_st != NULL__null)
1455 *p_st = 0;
1456 } else {
1457 start = pr->u.p.lb;
1458 limit = chunk + init - 1;
1459 incr = pr->u.p.st;
1460
1461 if ((last = (limit >= trip)) != 0)
1462 limit = trip;
1463
1464 if (p_st != NULL__null)
1465 *p_st = incr;
1466
1467 if (incr == 1) {
1468 *p_lb = start + init;
1469 *p_ub = start + limit;
1470 } else {
1471 *p_lb = start + init * incr;
1472 *p_ub = start + limit * incr;
1473 }
1474
1475 if (pr->flags.ordered) {
1476 pr->u.p.ordered_lower = init;
1477 pr->u.p.ordered_upper = limit;
1478 } // if
1479 } // if
1480 } // case
1481 break;
1482
1483 case kmp_sch_guided_iterative_chunked: {
1484 T chunkspec = pr->u.p.parm1;
1485 KD_TRACE(100, ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_chunked "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_chunked "
"iterative case\n", gtid); }
1486 "iterative case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_chunked "
"iterative case\n", gtid); }
1487 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_chunked "
"iterative case\n", gtid); }
;
1488 trip = pr->u.p.tc;
1489 // Start atomic part of calculations
1490 while (1) {
1491 ST remaining; // signed, because can be < 0
1492 init = sh->u.s.iteration; // shared value
1493 remaining = trip - init;
1494 if (remaining <= 0) { // AC: need to compare with 0 first
1495 // nothing to do, don't try atomic op
1496 status = 0;
1497 break;
1498 }
1499 if ((T)remaining <
1500 pr->u.p.parm2) { // compare with K*nproc*(chunk+1), K=2 by default
1501 // use dynamic-style shcedule
1502 // atomically inrement iterations, get old value
1503 init = test_then_add<ST>(RCAST(volatile ST *, &sh->u.s.iteration)reinterpret_cast<volatile ST *>(&sh->u.s.iteration
)
,
1504 (ST)chunkspec);
1505 remaining = trip - init;
1506 if (remaining <= 0) {
1507 status = 0; // all iterations got by other threads
1508 } else {
1509 // got some iterations to work on
1510 status = 1;
1511 if ((T)remaining > chunkspec) {
1512 limit = init + chunkspec - 1;
1513 } else {
1514 last = 1; // the last chunk
1515 limit = init + remaining - 1;
1516 } // if
1517 } // if
1518 break;
1519 } // if
1520 limit = init +
1521 (UT)(remaining * *(double *)&pr->u.p.parm3); // divide by K*nproc
1522 if (compare_and_swap<ST>(RCAST(volatile ST *, &sh->u.s.iteration)reinterpret_cast<volatile ST *>(&sh->u.s.iteration
)
,
1523 (ST)init, (ST)limit)) {
1524 // CAS was successful, chunk obtained
1525 status = 1;
1526 --limit;
1527 break;
1528 } // if
1529 } // while
1530 if (status != 0) {
1531 start = pr->u.p.lb;
1532 incr = pr->u.p.st;
1533 if (p_st != NULL__null)
1534 *p_st = incr;
1535 *p_lb = start + init * incr;
1536 *p_ub = start + limit * incr;
1537 if (pr->flags.ordered) {
1538 pr->u.p.ordered_lower = init;
1539 pr->u.p.ordered_upper = limit;
1540 } // if
1541 } else {
1542 *p_lb = 0;
1543 *p_ub = 0;
1544 if (p_st != NULL__null)
1545 *p_st = 0;
1546 } // if
1547 } // case
1548 break;
1549
1550#if OMP_45_ENABLED(50 >= 45)
1551 case kmp_sch_guided_simd: {
1552 // same as iterative but curr-chunk adjusted to be multiple of given
1553 // chunk
1554 T chunk = pr->u.p.parm1;
1555 KD_TRACE(100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_simd case\n"
, gtid); }
1556 ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_simd case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_simd case\n"
, gtid); }
1557 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_simd case\n"
, gtid); }
;
1558 trip = pr->u.p.tc;
1559 // Start atomic part of calculations
1560 while (1) {
1561 ST remaining; // signed, because can be < 0
1562 init = sh->u.s.iteration; // shared value
1563 remaining = trip - init;
1564 if (remaining <= 0) { // AC: need to compare with 0 first
1565 status = 0; // nothing to do, don't try atomic op
1566 break;
1567 }
1568 KMP_DEBUG_ASSERT(init % chunk == 0)if (!(init % chunk == 0)) { __kmp_debug_assert("init % chunk == 0"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1568); }
;
1569 // compare with K*nproc*(chunk+1), K=2 by default
1570 if ((T)remaining < pr->u.p.parm2) {
1571 // use dynamic-style shcedule
1572 // atomically inrement iterations, get old value
1573 init = test_then_add<ST>(RCAST(volatile ST *, &sh->u.s.iteration)reinterpret_cast<volatile ST *>(&sh->u.s.iteration
)
,
1574 (ST)chunk);
1575 remaining = trip - init;
1576 if (remaining <= 0) {
1577 status = 0; // all iterations got by other threads
1578 } else {
1579 // got some iterations to work on
1580 status = 1;
1581 if ((T)remaining > chunk) {
1582 limit = init + chunk - 1;
1583 } else {
1584 last = 1; // the last chunk
1585 limit = init + remaining - 1;
1586 } // if
1587 } // if
1588 break;
1589 } // if
1590 // divide by K*nproc
1591 UT span = remaining * (*(double *)&pr->u.p.parm3);
1592 UT rem = span % chunk;
1593 if (rem) // adjust so that span%chunk == 0
1594 span += chunk - rem;
1595 limit = init + span;
1596 if (compare_and_swap<ST>(RCAST(volatile ST *, &sh->u.s.iteration)reinterpret_cast<volatile ST *>(&sh->u.s.iteration
)
,
1597 (ST)init, (ST)limit)) {
1598 // CAS was successful, chunk obtained
1599 status = 1;
1600 --limit;
1601 break;
1602 } // if
1603 } // while
1604 if (status != 0) {
1605 start = pr->u.p.lb;
1606 incr = pr->u.p.st;
1607 if (p_st != NULL__null)
1608 *p_st = incr;
1609 *p_lb = start + init * incr;
1610 *p_ub = start + limit * incr;
1611 if (pr->flags.ordered) {
1612 pr->u.p.ordered_lower = init;
1613 pr->u.p.ordered_upper = limit;
1614 } // if
1615 } else {
1616 *p_lb = 0;
1617 *p_ub = 0;
1618 if (p_st != NULL__null)
1619 *p_st = 0;
1620 } // if
1621 } // case
1622 break;
1623#endif // OMP_45_ENABLED
1624
1625 case kmp_sch_guided_analytical_chunked: {
1626 T chunkspec = pr->u.p.parm1;
1627 UT chunkIdx;
1628#if KMP_OS_WINDOWS0 && KMP_ARCH_X860
1629 /* for storing original FPCW value for Windows* OS on
1630 IA-32 architecture 8-byte version */
1631 unsigned int oldFpcw;
1632 unsigned int fpcwSet = 0;
1633#endif
1634 KD_TRACE(100, ("__kmp_dispatch_next_algorithm: T#%d "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d "
"kmp_sch_guided_analytical_chunked case\n", gtid); }
1635 "kmp_sch_guided_analytical_chunked case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d "
"kmp_sch_guided_analytical_chunked case\n", gtid); }
1636 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d "
"kmp_sch_guided_analytical_chunked case\n", gtid); }
;
1637
1638 trip = pr->u.p.tc;
1639
1640 KMP_DEBUG_ASSERT(nproc > 1)if (!(nproc > 1)) { __kmp_debug_assert("nproc > 1", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1640); }
;
1641 KMP_DEBUG_ASSERT((2UL * chunkspec + 1) * (UT)nproc < trip)if (!((2UL * chunkspec + 1) * (UT)nproc < trip)) { __kmp_debug_assert
("(2UL * chunkspec + 1) * (UT)nproc < trip", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1641); }
;
1642
1643 while (1) { /* this while loop is a safeguard against unexpected zero
1644 chunk sizes */
1645 chunkIdx = test_then_inc_acq<ST>((volatile ST *)&sh->u.s.iteration);
1646 if (chunkIdx >= (UT)pr->u.p.parm2) {
1647 --trip;
1648 /* use dynamic-style scheduling */
1649 init = chunkIdx * chunkspec + pr->u.p.count;
1650 /* need to verify init > 0 in case of overflow in the above
1651 * calculation */
1652 if ((status = (init > 0 && init <= trip)) != 0) {
1653 limit = init + chunkspec - 1;
1654
1655 if ((last = (limit >= trip)) != 0)
1656 limit = trip;
1657 }
1658 break;
1659 } else {
1660/* use exponential-style scheduling */
1661/* The following check is to workaround the lack of long double precision on
1662 Windows* OS.
1663 This check works around the possible effect that init != 0 for chunkIdx == 0.
1664 */
1665#if KMP_OS_WINDOWS0 && KMP_ARCH_X860
1666 /* If we haven't already done so, save original
1667 FPCW and set precision to 64-bit, as Windows* OS
1668 on IA-32 architecture defaults to 53-bit */
1669 if (!fpcwSet) {
1670 oldFpcw = _control87(0, 0);
1671 _control87(_PC_64, _MCW_PC);
1672 fpcwSet = 0x30000;
1673 }
1674#endif
1675 if (chunkIdx) {
1676 init = __kmp_dispatch_guided_remaining<T>(
1677 trip, *(DBL *)&pr->u.p.parm3, chunkIdx);
1678 KMP_DEBUG_ASSERT(init)if (!(init)) { __kmp_debug_assert("init", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1678); }
;
1679 init = trip - init;
1680 } else
1681 init = 0;
1682 limit = trip - __kmp_dispatch_guided_remaining<T>(
1683 trip, *(DBL *)&pr->u.p.parm3, chunkIdx + 1);
1684 KMP_ASSERT(init <= limit)if (!(init <= limit)) { __kmp_debug_assert("init <= limit"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1684); }
;
1685 if (init < limit) {
1686 KMP_DEBUG_ASSERT(limit <= trip)if (!(limit <= trip)) { __kmp_debug_assert("limit <= trip"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1686); }
;
1687 --limit;
1688 status = 1;
1689 break;
1690 } // if
1691 } // if
1692 } // while (1)
1693#if KMP_OS_WINDOWS0 && KMP_ARCH_X860
1694 /* restore FPCW if necessary
1695 AC: check fpcwSet flag first because oldFpcw can be uninitialized here
1696 */
1697 if (fpcwSet && (oldFpcw & fpcwSet))
1698 _control87(oldFpcw, _MCW_PC);
1699#endif
1700 if (status != 0) {
1701 start = pr->u.p.lb;
1702 incr = pr->u.p.st;
1703 if (p_st != NULL__null)
1704 *p_st = incr;
1705 *p_lb = start + init * incr;
1706 *p_ub = start + limit * incr;
1707 if (pr->flags.ordered) {
1708 pr->u.p.ordered_lower = init;
1709 pr->u.p.ordered_upper = limit;
1710 }
1711 } else {
1712 *p_lb = 0;
1713 *p_ub = 0;
1714 if (p_st != NULL__null)
1715 *p_st = 0;
1716 }
1717 } // case
1718 break;
1719
1720 case kmp_sch_trapezoidal: {
1721 UT index;
1722 T parm2 = pr->u.p.parm2;
1723 T parm3 = pr->u.p.parm3;
1724 T parm4 = pr->u.p.parm4;
1725 KD_TRACE(100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_trapezoidal case\n"
, gtid); }
1726 ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_trapezoidal case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_trapezoidal case\n"
, gtid); }
1727 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_trapezoidal case\n"
, gtid); }
;
1728
1729 index = test_then_inc<ST>((volatile ST *)&sh->u.s.iteration);
1730
1731 init = (index * ((2 * parm2) - (index - 1) * parm4)) / 2;
1732 trip = pr->u.p.tc - 1;
1733
1734 if ((status = ((T)index < parm3 && init <= trip)) == 0) {
1735 *p_lb = 0;
1736 *p_ub = 0;
1737 if (p_st != NULL__null)
1738 *p_st = 0;
1739 } else {
1740 start = pr->u.p.lb;
1741 limit = ((index + 1) * (2 * parm2 - index * parm4)) / 2 - 1;
1742 incr = pr->u.p.st;
1743
1744 if ((last = (limit >= trip)) != 0)
1745 limit = trip;
1746
1747 if (p_st != NULL__null)
1748 *p_st = incr;
1749
1750 if (incr == 1) {
1751 *p_lb = start + init;
1752 *p_ub = start + limit;
1753 } else {
1754 *p_lb = start + init * incr;
1755 *p_ub = start + limit * incr;
1756 }
1757
1758 if (pr->flags.ordered) {
1759 pr->u.p.ordered_lower = init;
1760 pr->u.p.ordered_upper = limit;
1761 } // if
1762 } // if
1763 } // case
1764 break;
1765 default: {
1766 status = 0; // to avoid complaints on uninitialized variable use
1767 __kmp_fatal(KMP_MSG(UnknownSchedTypeDetected)__kmp_msg_format(kmp_i18n_msg_UnknownSchedTypeDetected), // Primary message
1768 KMP_HNT(GetNewerLibrary)__kmp_msg_format(kmp_i18n_hnt_GetNewerLibrary), // Hint
1769 __kmp_msg_null // Variadic argument list terminator
1770 );
1771 } break;
1772 } // switch
1773 if (p_last)
1774 *p_last = last;
1775#ifdef KMP_DEBUG1
1776 if (pr->flags.ordered) {
1777 char *buff;
1778 // create format specifiers before the debug output
1779 buff = __kmp_str_format("__kmp_dispatch_next_algorithm: T#%%d "
1780 "ordered_lower:%%%s ordered_upper:%%%s\n",
1781 traits_t<UT>::spec, traits_t<UT>::spec);
1782 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper))if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid,
pr->u.p.ordered_lower, pr->u.p.ordered_upper); }
;
1783 __kmp_str_free(&buff);
1784 }
1785 {
1786 char *buff;
1787 // create format specifiers before the debug output
1788 buff = __kmp_str_format(
1789 "__kmp_dispatch_next_algorithm: T#%%d exit status:%%d p_last:%%d "
1790 "p_lb:%%%s p_ub:%%%s p_st:%%%s\n",
1791 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
1792 KD_TRACE(10, (buff, gtid, status, *p_last, *p_lb, *p_ub, *p_st))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, status
, *p_last, *p_lb, *p_ub, *p_st); }
;
1793 __kmp_str_free(&buff);
1794 }
1795#endif
1796 return status;
1797}
1798
1799/* Define a macro for exiting __kmp_dispatch_next(). If status is 0 (no more
1800 work), then tell OMPT the loop is over. In some cases kmp_dispatch_fini()
1801 is not called. */
1802#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1803#define OMPT_LOOP_ENDif (status == 0) { if (ompt_enabled.ompt_callback_work) { ompt_team_info_t
*team_info = __ompt_get_teaminfo(0, __null); ompt_task_info_t
*task_info = __ompt_get_task_info_object(0); ompt_callbacks.
ompt_callback_work_callback( ompt_work_loop, ompt_scope_end, &
(team_info->parallel_data), &(task_info->task_data)
, 0, codeptr); } }
\
1804 if (status == 0) { \
1805 if (ompt_enabled.ompt_callback_work) { \
1806 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL__null); \
1807 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); \
1808 ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback( \
1809 ompt_work_loop, ompt_scope_end, &(team_info->parallel_data), \
1810 &(task_info->task_data), 0, codeptr); \
1811 } \
1812 }
1813// TODO: implement count
1814#else
1815#define OMPT_LOOP_ENDif (status == 0) { if (ompt_enabled.ompt_callback_work) { ompt_team_info_t
*team_info = __ompt_get_teaminfo(0, __null); ompt_task_info_t
*task_info = __ompt_get_task_info_object(0); ompt_callbacks.
ompt_callback_work_callback( ompt_work_loop, ompt_scope_end, &
(team_info->parallel_data), &(task_info->task_data)
, 0, codeptr); } }
// no-op
1816#endif
1817
1818#if KMP_STATS_ENABLED0
1819#define KMP_STATS_LOOP_END \
1820 { \
1821 kmp_int64 u, l, t, i; \
1822 l = (kmp_int64)(*p_lb); \
1823 u = (kmp_int64)(*p_ub); \
1824 i = (kmp_int64)(pr->u.p.st); \
1825 if (status == 0) { \
1826 t = 0; \
1827 KMP_POP_PARTITIONED_TIMER()((void)0); \
1828 } else if (i == 1) { \
1829 if (u >= l) \
1830 t = u - l + 1; \
1831 else \
1832 t = 0; \
1833 } else if (i < 0) { \
1834 if (l >= u) \
1835 t = (l - u) / (-i) + 1; \
1836 else \
1837 t = 0; \
1838 } else { \
1839 if (u >= l) \
1840 t = (u - l) / i + 1; \
1841 else \
1842 t = 0; \
1843 } \
1844 KMP_COUNT_VALUE(OMP_loop_dynamic_iterations, t)((void)0); \
1845 }
1846#else
1847#define KMP_STATS_LOOP_END /* Nothing */
1848#endif
1849
1850template <typename T>
1851static int __kmp_dispatch_next(ident_t *loc, int gtid, kmp_int32 *p_last,
1852 T *p_lb, T *p_ub,
1853 typename traits_t<T>::signed_t *p_st
1854#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1855 ,
1856 void *codeptr
1857#endif
1858 ) {
1859
1860 typedef typename traits_t<T>::unsigned_t UT;
1861 typedef typename traits_t<T>::signed_t ST;
1862 // This is potentially slightly misleading, schedule(runtime) will appear here
1863 // even if the actual runtme schedule is static. (Which points out a
1864 // disadavantage of schedule(runtime): even when static scheduling is used it
1865 // costs more than a compile time choice to use static scheduling would.)
1866 KMP_TIME_PARTITIONED_BLOCK(OMP_loop_dynamic_scheduling)((void)0);
1867
1868 int status;
1869 dispatch_private_info_template<T> *pr;
1870 kmp_info_t *th = __kmp_threads[gtid];
1871 kmp_team_t *team = th->th.th_team;
1872
1873 KMP_DEBUG_ASSERT(p_lb && p_ub && p_st)if (!(p_lb && p_ub && p_st)) { __kmp_debug_assert
("p_lb && p_ub && p_st", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1873); }
; // AC: these cannot be NULL
1874 KD_TRACE(if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n"
, gtid, p_lb, p_ub, p_st, p_last); }
1875 1000,if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n"
, gtid, p_lb, p_ub, p_st, p_last); }
1876 ("__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n",if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n"
, gtid, p_lb, p_ub, p_st, p_last); }
1877 gtid, p_lb, p_ub, p_st, p_last))if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n"
, gtid, p_lb, p_ub, p_st, p_last); }
;
1878
1879 if (team->t.t_serialized) {
3
Assuming the condition is true
4
Taking true branch
1880 /* NOTE: serialize this dispatch becase we are not at the active level */
1881 pr = reinterpret_cast<dispatch_private_info_template<T> *>(
1882 th->th.th_dispatch->th_disp_buffer); /* top of the stack */
1883 KMP_DEBUG_ASSERT(pr)if (!(pr)) { __kmp_debug_assert("pr", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1883); }
;
1884
1885 if ((status = (pr->u.p.tc != 0)) == 0) {
5
Assuming the condition is true
6
Taking false branch
1886 *p_lb = 0;
1887 *p_ub = 0;
1888 // if ( p_last != NULL )
1889 // *p_last = 0;
1890 if (p_st != NULL__null)
1891 *p_st = 0;
1892 if (__kmp_env_consistency_check) {
1893 if (pr->pushed_ws != ct_none) {
1894 pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
1895 }
1896 }
1897 } else if (pr->flags.nomerge) {
7
Assuming the condition is false
8
Taking false branch
1898 kmp_int32 last;
1899 T start;
1900 UT limit, trip, init;
1901 ST incr;
1902 T chunk = pr->u.p.parm1;
1903
1904 KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n"
, gtid); }
1905 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n"
, gtid); }
;
1906
1907 init = chunk * pr->u.p.count++;
1908 trip = pr->u.p.tc - 1;
1909
1910 if ((status = (init <= trip)) == 0) {
1911 *p_lb = 0;
1912 *p_ub = 0;
1913 // if ( p_last != NULL )
1914 // *p_last = 0;
1915 if (p_st != NULL__null)
1916 *p_st = 0;
1917 if (__kmp_env_consistency_check) {
1918 if (pr->pushed_ws != ct_none) {
1919 pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
1920 }
1921 }
1922 } else {
1923 start = pr->u.p.lb;
1924 limit = chunk + init - 1;
1925 incr = pr->u.p.st;
1926
1927 if ((last = (limit >= trip)) != 0) {
1928 limit = trip;
1929#if KMP_OS_WINDOWS0
1930 pr->u.p.last_upper = pr->u.p.ub;
1931#endif /* KMP_OS_WINDOWS */
1932 }
1933 if (p_last != NULL__null)
1934 *p_last = last;
1935 if (p_st != NULL__null)
1936 *p_st = incr;
1937 if (incr == 1) {
1938 *p_lb = start + init;
1939 *p_ub = start + limit;
1940 } else {
1941 *p_lb = start + init * incr;
1942 *p_ub = start + limit * incr;
1943 }
1944
1945 if (pr->flags.ordered) {
1946 pr->u.p.ordered_lower = init;
1947 pr->u.p.ordered_upper = limit;
1948#ifdef KMP_DEBUG1
1949 {
1950 char *buff;
1951 // create format specifiers before the debug output
1952 buff = __kmp_str_format("__kmp_dispatch_next: T#%%d "
1953 "ordered_lower:%%%s ordered_upper:%%%s\n",
1954 traits_t<UT>::spec, traits_t<UT>::spec);
1955 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid,
pr->u.p.ordered_lower, pr->u.p.ordered_upper); }
1956 pr->u.p.ordered_upper))if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid,
pr->u.p.ordered_lower, pr->u.p.ordered_upper); }
;
1957 __kmp_str_free(&buff);
1958 }
1959#endif
1960 } // if
1961 } // if
1962 } else {
1963 pr->u.p.tc = 0;
1964 *p_lb = pr->u.p.lb;
1965 *p_ub = pr->u.p.ub;
1966#if KMP_OS_WINDOWS0
1967 pr->u.p.last_upper = *p_ub;
1968#endif /* KMP_OS_WINDOWS */
1969 if (p_last != NULL__null)
9
Assuming 'p_last' is equal to NULL
10
Taking false branch
1970 *p_last = TRUE(!0);
1971 if (p_st != NULL__null)
11
Taking true branch
1972 *p_st = pr->u.p.st;
1973 } // if
1974#ifdef KMP_DEBUG1
1975 {
1976 char *buff;
1977 // create format specifiers before the debug output
1978 buff = __kmp_str_format(
1979 "__kmp_dispatch_next: T#%%d serialized case: p_lb:%%%s "
1980 "p_ub:%%%s p_st:%%%s p_last:%%p %%d returning:%%d\n",
1981 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
1982 KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, *p_st, p_last, *p_last, status))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, *
p_lb, *p_ub, *p_st, p_last, *p_last, status); }
;
12
Within the expansion of the macro 'KD_TRACE':
a
Assuming 'kmp_d_debug' is >= 10
b
Dereference of null pointer (loaded from variable 'p_last')
1983 __kmp_str_free(&buff);
1984 }
1985#endif
1986#if INCLUDE_SSC_MARKS(1 && 1)
1987 SSC_MARK_DISPATCH_NEXT()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0xd697) : "%ebx")
;
1988#endif
1989 OMPT_LOOP_ENDif (status == 0) { if (ompt_enabled.ompt_callback_work) { ompt_team_info_t
*team_info = __ompt_get_teaminfo(0, __null); ompt_task_info_t
*task_info = __ompt_get_task_info_object(0); ompt_callbacks.
ompt_callback_work_callback( ompt_work_loop, ompt_scope_end, &
(team_info->parallel_data), &(task_info->task_data)
, 0, codeptr); } }
;
1990 KMP_STATS_LOOP_END;
1991 return status;
1992 } else {
1993 kmp_int32 last = 0;
1994 dispatch_shared_info_template<T> volatile *sh;
1995
1996 KMP_DEBUG_ASSERT(th->th.th_dispatch ==if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1997); }
1997 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid])if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1997); }
;
1998
1999 pr = reinterpret_cast<dispatch_private_info_template<T> *>(
2000 th->th.th_dispatch->th_dispatch_pr_current);
2001 KMP_DEBUG_ASSERT(pr)if (!(pr)) { __kmp_debug_assert("pr", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2001); }
;
2002 sh = reinterpret_cast<dispatch_shared_info_template<T> volatile *>(
2003 th->th.th_dispatch->th_dispatch_sh_current);
2004 KMP_DEBUG_ASSERT(sh)if (!(sh)) { __kmp_debug_assert("sh", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2004); }
;
2005
2006#if KMP_USE_HIER_SCHED0
2007 if (pr->flags.use_hier)
2008 status = sh->hier->next(loc, gtid, pr, &last, p_lb, p_ub, p_st);
2009 else
2010#endif // KMP_USE_HIER_SCHED
2011 status = __kmp_dispatch_next_algorithm<T>(gtid, pr, sh, &last, p_lb, p_ub,
2012 p_st, th->th.th_team_nproc,
2013 th->th.th_info.ds.ds_tid);
2014 // status == 0: no more iterations to execute
2015 if (status == 0) {
2016 UT num_done;
2017
2018 num_done = test_then_inc<ST>((volatile ST *)&sh->u.s.num_done);
2019#ifdef KMP_DEBUG1
2020 {
2021 char *buff;
2022 // create format specifiers before the debug output
2023 buff = __kmp_str_format(
2024 "__kmp_dispatch_next: T#%%d increment num_done:%%%s\n",
2025 traits_t<UT>::spec);
2026 KD_TRACE(10, (buff, gtid, sh->u.s.num_done))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, sh
->u.s.num_done); }
;
2027 __kmp_str_free(&buff);
2028 }
2029#endif
2030
2031#if KMP_USE_HIER_SCHED0
2032 pr->flags.use_hier = FALSE0;
2033#endif
2034 if ((ST)num_done == th->th.th_team_nproc - 1) {
2035#if (KMP_STATIC_STEAL_ENABLED1)
2036 if (pr->schedule == kmp_sch_static_steal &&
2037 traits_t<T>::type_size > 4) {
2038 int i;
2039 kmp_info_t **other_threads = team->t.t_threads;
2040 // loop complete, safe to destroy locks used for stealing
2041 for (i = 0; i < th->th.th_team_nproc; ++i) {
2042 kmp_lock_t *lck = other_threads[i]->th.th_dispatch->th_steal_lock;
2043 KMP_ASSERT(lck != NULL)if (!(lck != __null)) { __kmp_debug_assert("lck != NULL", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2043); }
;
2044 __kmp_destroy_lock(lck);
2045 __kmp_free(lck)___kmp_free((lck), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2045)
;
2046 other_threads[i]->th.th_dispatch->th_steal_lock = NULL__null;
2047 }
2048 }
2049#endif
2050 /* NOTE: release this buffer to be reused */
2051
2052 KMP_MB(); /* Flush all pending memory write invalidates. */
2053
2054 sh->u.s.num_done = 0;
2055 sh->u.s.iteration = 0;
2056
2057 /* TODO replace with general release procedure? */
2058 if (pr->flags.ordered) {
2059 sh->u.s.ordered_iteration = 0;
2060 }
2061
2062 KMP_MB(); /* Flush all pending memory write invalidates. */
2063
2064 sh->buffer_index += __kmp_dispatch_num_buffers;
2065 KD_TRACE(100, ("__kmp_dispatch_next: T#%d change buffer_index:%d\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d change buffer_index:%d\n"
, gtid, sh->buffer_index); }
2066 gtid, sh->buffer_index))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d change buffer_index:%d\n"
, gtid, sh->buffer_index); }
;
2067
2068 KMP_MB(); /* Flush all pending memory write invalidates. */
2069
2070 } // if
2071 if (__kmp_env_consistency_check) {
2072 if (pr->pushed_ws != ct_none) {
2073 pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
2074 }
2075 }
2076
2077 th->th.th_dispatch->th_deo_fcn = NULL__null;
2078 th->th.th_dispatch->th_dxo_fcn = NULL__null;
2079 th->th.th_dispatch->th_dispatch_sh_current = NULL__null;
2080 th->th.th_dispatch->th_dispatch_pr_current = NULL__null;
2081 } // if (status == 0)
2082#if KMP_OS_WINDOWS0
2083 else if (last) {
2084 pr->u.p.last_upper = pr->u.p.ub;
2085 }
2086#endif /* KMP_OS_WINDOWS */
2087 if (p_last != NULL__null && status != 0)
2088 *p_last = last;
2089 } // if
2090
2091#ifdef KMP_DEBUG1
2092 {
2093 char *buff;
2094 // create format specifiers before the debug output
2095 buff = __kmp_str_format(
2096 "__kmp_dispatch_next: T#%%d normal case: "
2097 "p_lb:%%%s p_ub:%%%s p_st:%%%s p_last:%%p (%%d) returning:%%d\n",
2098 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
2099 KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last,if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, *
p_lb, *p_ub, p_st ? *p_st : 0, p_last, (p_last ? *p_last : 0)
, status); }
2100 (p_last ? *p_last : 0), status))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, *
p_lb, *p_ub, p_st ? *p_st : 0, p_last, (p_last ? *p_last : 0)
, status); }
;
2101 __kmp_str_free(&buff);
2102 }
2103#endif
2104#if INCLUDE_SSC_MARKS(1 && 1)
2105 SSC_MARK_DISPATCH_NEXT()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0xd697) : "%ebx")
;
2106#endif
2107 OMPT_LOOP_ENDif (status == 0) { if (ompt_enabled.ompt_callback_work) { ompt_team_info_t
*team_info = __ompt_get_teaminfo(0, __null); ompt_task_info_t
*task_info = __ompt_get_task_info_object(0); ompt_callbacks.
ompt_callback_work_callback( ompt_work_loop, ompt_scope_end, &
(team_info->parallel_data), &(task_info->task_data)
, 0, codeptr); } }
;
2108 KMP_STATS_LOOP_END;
2109 return status;
2110}
2111
2112template <typename T>
2113static void __kmp_dist_get_bounds(ident_t *loc, kmp_int32 gtid,
2114 kmp_int32 *plastiter, T *plower, T *pupper,
2115 typename traits_t<T>::signed_t incr) {
2116 typedef typename traits_t<T>::unsigned_t UT;
2117 kmp_uint32 team_id;
2118 kmp_uint32 nteams;
2119 UT trip_count;
2120 kmp_team_t *team;
2121 kmp_info_t *th;
2122
2123 KMP_DEBUG_ASSERT(plastiter && plower && pupper)if (!(plastiter && plower && pupper)) { __kmp_debug_assert
("plastiter && plower && pupper", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2123); }
;
2124 KE_TRACE(10, ("__kmpc_dist_get_bounds called (%d)\n", gtid))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_dist_get_bounds called (%d)\n"
, gtid); }
;
2125#ifdef KMP_DEBUG1
2126 typedef typename traits_t<T>::signed_t ST;
2127 {
2128 char *buff;
2129 // create format specifiers before the debug output
2130 buff = __kmp_str_format("__kmpc_dist_get_bounds: T#%%d liter=%%d "
2131 "iter=(%%%s, %%%s, %%%s) signed?<%s>\n",
2132 traits_t<T>::spec, traits_t<T>::spec,
2133 traits_t<ST>::spec, traits_t<T>::spec);
2134 KD_TRACE(100, (buff, gtid, *plastiter, *plower, *pupper, incr))if (kmp_d_debug >= 100) { __kmp_debug_printf (buff, gtid, *
plastiter, *plower, *pupper, incr); }
;
2135 __kmp_str_free(&buff);
2136 }
2137#endif
2138
2139 if (__kmp_env_consistency_check) {
2140 if (incr == 0) {
2141 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
2142 loc);
2143 }
2144 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
2145 // The loop is illegal.
2146 // Some zero-trip loops maintained by compiler, e.g.:
2147 // for(i=10;i<0;++i) // lower >= upper - run-time check
2148 // for(i=0;i>10;--i) // lower <= upper - run-time check
2149 // for(i=0;i>10;++i) // incr > 0 - compile-time check
2150 // for(i=10;i<0;--i) // incr < 0 - compile-time check
2151 // Compiler does not check the following illegal loops:
2152 // for(i=0;i<10;i+=incr) // where incr<0
2153 // for(i=10;i>0;i-=incr) // where incr<0
2154 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
2155 }
2156 }
2157 th = __kmp_threads[gtid];
2158 team = th->th.th_team;
2159#if OMP_40_ENABLED(50 >= 40)
2160 KMP_DEBUG_ASSERT(th->th.th_teams_microtask)if (!(th->th.th_teams_microtask)) { __kmp_debug_assert("th->th.th_teams_microtask"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2160); }
; // we are in the teams construct
2161 nteams = th->th.th_teams_size.nteams;
2162#endif
2163 team_id = team->t.t_master_tid;
2164 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc)if (!(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc
)) { __kmp_debug_assert("nteams == (kmp_uint32)team->t.t_parent->t.t_nproc"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2164); }
;
2165
2166 // compute global trip count
2167 if (incr == 1) {
2168 trip_count = *pupper - *plower + 1;
2169 } else if (incr == -1) {
2170 trip_count = *plower - *pupper + 1;
2171 } else if (incr > 0) {
2172 // upper-lower can exceed the limit of signed type
2173 trip_count = (UT)(*pupper - *plower) / incr + 1;
2174 } else {
2175 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
2176 }
2177
2178 if (trip_count <= nteams) {
2179 KMP_DEBUG_ASSERT(if (!(__kmp_static == kmp_sch_static_greedy || __kmp_static ==
kmp_sch_static_balanced)) { __kmp_debug_assert("__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2182); }
2180 __kmp_static == kmp_sch_static_greedy ||if (!(__kmp_static == kmp_sch_static_greedy || __kmp_static ==
kmp_sch_static_balanced)) { __kmp_debug_assert("__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2182); }
2181 __kmp_static ==if (!(__kmp_static == kmp_sch_static_greedy || __kmp_static ==
kmp_sch_static_balanced)) { __kmp_debug_assert("__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2182); }
2182 kmp_sch_static_balanced)if (!(__kmp_static == kmp_sch_static_greedy || __kmp_static ==
kmp_sch_static_balanced)) { __kmp_debug_assert("__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2182); }
; // Unknown static scheduling type.
2183 // only some teams get single iteration, others get nothing
2184 if (team_id < trip_count) {
2185 *pupper = *plower = *plower + team_id * incr;
2186 } else {
2187 *plower = *pupper + incr; // zero-trip loop
2188 }
2189 if (plastiter != NULL__null)
2190 *plastiter = (team_id == trip_count - 1);
2191 } else {
2192 if (__kmp_static == kmp_sch_static_balanced) {
2193 UT chunk = trip_count / nteams;
2194 UT extras = trip_count % nteams;
2195 *plower +=
2196 incr * (team_id * chunk + (team_id < extras ? team_id : extras));
2197 *pupper = *plower + chunk * incr - (team_id < extras ? 0 : incr);
2198 if (plastiter != NULL__null)
2199 *plastiter = (team_id == nteams - 1);
2200 } else {
2201 T chunk_inc_count =
2202 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
2203 T upper = *pupper;
2204 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy)if (!(__kmp_static == kmp_sch_static_greedy)) { __kmp_debug_assert
("__kmp_static == kmp_sch_static_greedy", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2204); }
;
2205 // Unknown static scheduling type.
2206 *plower += team_id * chunk_inc_count;
2207 *pupper = *plower + chunk_inc_count - incr;
2208 // Check/correct bounds if needed
2209 if (incr > 0) {
2210 if (*pupper < *plower)
2211 *pupper = traits_t<T>::max_value;
2212 if (plastiter != NULL__null)
2213 *plastiter = *plower <= upper && *pupper > upper - incr;
2214 if (*pupper > upper)
2215 *pupper = upper; // tracker C73258
2216 } else {
2217 if (*pupper > *plower)
2218 *pupper = traits_t<T>::min_value;
2219 if (plastiter != NULL__null)
2220 *plastiter = *plower >= upper && *pupper < upper - incr;
2221 if (*pupper < upper)
2222 *pupper = upper; // tracker C73258
2223 }
2224 }
2225 }
2226}
2227
2228//-----------------------------------------------------------------------------
2229// Dispatch routines
2230// Transfer call to template< type T >
2231// __kmp_dispatch_init( ident_t *loc, int gtid, enum sched_type schedule,
2232// T lb, T ub, ST st, ST chunk )
2233extern "C" {
2234
2235/*!
2236@ingroup WORK_SHARING
2237@{
2238@param loc Source location
2239@param gtid Global thread id
2240@param schedule Schedule type
2241@param lb Lower bound
2242@param ub Upper bound
2243@param st Step (or increment if you prefer)
2244@param chunk The chunk size to block with
2245
2246This function prepares the runtime to start a dynamically scheduled for loop,
2247saving the loop arguments.
2248These functions are all identical apart from the types of the arguments.
2249*/
2250
2251void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
2252 enum sched_type schedule, kmp_int32 lb,
2253 kmp_int32 ub, kmp_int32 st, kmp_int32 chunk) {
2254 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2254); }
;
2255#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2256 OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads
[gtid] && !__kmp_threads[gtid]->th.ompt_thread_info
.return_address) __kmp_threads[gtid]->th.ompt_thread_info.
return_address = __builtin_return_address(0)
;
2257#endif
2258 __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk, true);
2259}
2260/*!
2261See @ref __kmpc_dispatch_init_4
2262*/
2263void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
2264 enum sched_type schedule, kmp_uint32 lb,
2265 kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk) {
2266 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2266); }
;
2267#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2268 OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads
[gtid] && !__kmp_threads[gtid]->th.ompt_thread_info
.return_address) __kmp_threads[gtid]->th.ompt_thread_info.
return_address = __builtin_return_address(0)
;
2269#endif
2270 __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk, true);
2271}
2272
2273/*!
2274See @ref __kmpc_dispatch_init_4
2275*/
2276void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
2277 enum sched_type schedule, kmp_int64 lb,
2278 kmp_int64 ub, kmp_int64 st, kmp_int64 chunk) {
2279 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2279); }
;
2280#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2281 OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads
[gtid] && !__kmp_threads[gtid]->th.ompt_thread_info
.return_address) __kmp_threads[gtid]->th.ompt_thread_info.
return_address = __builtin_return_address(0)
;
2282#endif
2283 __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk, true);
2284}
2285
2286/*!
2287See @ref __kmpc_dispatch_init_4
2288*/
2289void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
2290 enum sched_type schedule, kmp_uint64 lb,
2291 kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk) {
2292 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2292); }
;
2293#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2294 OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads
[gtid] && !__kmp_threads[gtid]->th.ompt_thread_info
.return_address) __kmp_threads[gtid]->th.ompt_thread_info.
return_address = __builtin_return_address(0)
;
2295#endif
2296 __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk, true);
2297}
2298
2299/*!
2300See @ref __kmpc_dispatch_init_4
2301
2302Difference from __kmpc_dispatch_init set of functions is these functions
2303are called for composite distribute parallel for construct. Thus before
2304regular iterations dispatching we need to calc per-team iteration space.
2305
2306These functions are all identical apart from the types of the arguments.
2307*/
2308void __kmpc_dist_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
2309 enum sched_type schedule, kmp_int32 *p_last,
2310 kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
2311 kmp_int32 chunk) {
2312 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2312); }
;
2313#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2314 OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads
[gtid] && !__kmp_threads[gtid]->th.ompt_thread_info
.return_address) __kmp_threads[gtid]->th.ompt_thread_info.
return_address = __builtin_return_address(0)
;
2315#endif
2316 __kmp_dist_get_bounds<kmp_int32>(loc, gtid, p_last, &lb, &ub, st);
2317 __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk, true);
2318}
2319
2320void __kmpc_dist_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
2321 enum sched_type schedule, kmp_int32 *p_last,
2322 kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
2323 kmp_int32 chunk) {
2324 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2324); }
;
2325#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2326 OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads
[gtid] && !__kmp_threads[gtid]->th.ompt_thread_info
.return_address) __kmp_threads[gtid]->th.ompt_thread_info.
return_address = __builtin_return_address(0)
;
2327#endif
2328 __kmp_dist_get_bounds<kmp_uint32>(loc, gtid, p_last, &lb, &ub, st);
2329 __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk, true);
2330}
2331
2332void __kmpc_dist_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
2333 enum sched_type schedule, kmp_int32 *p_last,
2334 kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
2335 kmp_int64 chunk) {
2336 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2336); }
;
2337#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2338 OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads
[gtid] && !__kmp_threads[gtid]->th.ompt_thread_info
.return_address) __kmp_threads[gtid]->th.ompt_thread_info.
return_address = __builtin_return_address(0)
;
2339#endif
2340 __kmp_dist_get_bounds<kmp_int64>(loc, gtid, p_last, &lb, &ub, st);
2341 __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk, true);
2342}
2343
2344void __kmpc_dist_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
2345 enum sched_type schedule, kmp_int32 *p_last,
2346 kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
2347 kmp_int64 chunk) {
2348 KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2348); }
;
2349#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2350 OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads
[gtid] && !__kmp_threads[gtid]->th.ompt_thread_info
.return_address) __kmp_threads[gtid]->th.ompt_thread_info.
return_address = __builtin_return_address(0)
;
2351#endif
2352 __kmp_dist_get_bounds<kmp_uint64>(loc, gtid, p_last, &lb, &ub, st);
2353 __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk, true);
2354}
2355
2356/*!
2357@param loc Source code location
2358@param gtid Global thread id
2359@param p_last Pointer to a flag set to one if this is the last chunk or zero
2360otherwise
2361@param p_lb Pointer to the lower bound for the next chunk of work
2362@param p_ub Pointer to the upper bound for the next chunk of work
2363@param p_st Pointer to the stride for the next chunk of work
2364@return one if there is work to be done, zero otherwise
2365
2366Get the next dynamically allocated chunk of work for this thread.
2367If there is no more work, then the lb,ub and stride need not be modified.
2368*/
2369int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
2370 kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st) {
2371#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2372 OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads
[gtid] && !__kmp_threads[gtid]->th.ompt_thread_info
.return_address) __kmp_threads[gtid]->th.ompt_thread_info.
return_address = __builtin_return_address(0)
;
2373#endif
2374 return __kmp_dispatch_next<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st
2375#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2376 ,
2377 OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid)
2378#endif
2379 );
2380}
2381
2382/*!
2383See @ref __kmpc_dispatch_next_4
2384*/
2385int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
2386 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
2387 kmp_int32 *p_st) {
2388#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2389 OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads
[gtid] && !__kmp_threads[gtid]->th.ompt_thread_info
.return_address) __kmp_threads[gtid]->th.ompt_thread_info.
return_address = __builtin_return_address(0)
;
2390#endif
2391 return __kmp_dispatch_next<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st
2392#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2393 ,
2394 OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid)
2395#endif
2396 );
2397}
2398
2399/*!
2400See @ref __kmpc_dispatch_next_4
2401*/
2402int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
2403 kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st) {
2404#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2405 OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads
[gtid] && !__kmp_threads[gtid]->th.ompt_thread_info
.return_address) __kmp_threads[gtid]->th.ompt_thread_info.
return_address = __builtin_return_address(0)
;
2406#endif
2407 return __kmp_dispatch_next<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st
2408#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2409 ,
2410 OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid)
2411#endif
2412 );
2413}
2414
2415/*!
2416See @ref __kmpc_dispatch_next_4
2417*/
2418int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
2419 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
2420 kmp_int64 *p_st) {
2421#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2422 OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads
[gtid] && !__kmp_threads[gtid]->th.ompt_thread_info
.return_address) __kmp_threads[gtid]->th.ompt_thread_info.
return_address = __builtin_return_address(0)
;
2423#endif
2424 return __kmp_dispatch_next<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st
1
Passing value via 3rd parameter 'p_last'
2
Calling '__kmp_dispatch_next<unsigned long long>'
2425#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2426 ,
2427 OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid)
2428#endif
2429 );
2430}
2431
2432/*!
2433@param loc Source code location
2434@param gtid Global thread id
2435
2436Mark the end of a dynamic loop.
2437*/
2438void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid) {
2439 __kmp_dispatch_finish<kmp_uint32>(gtid, loc);
2440}
2441
2442/*!
2443See @ref __kmpc_dispatch_fini_4
2444*/
2445void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid) {
2446 __kmp_dispatch_finish<kmp_uint64>(gtid, loc);
2447}
2448
2449/*!
2450See @ref __kmpc_dispatch_fini_4
2451*/
2452void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid) {
2453 __kmp_dispatch_finish<kmp_uint32>(gtid, loc);
2454}
2455
2456/*!
2457See @ref __kmpc_dispatch_fini_4
2458*/
2459void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid) {
2460 __kmp_dispatch_finish<kmp_uint64>(gtid, loc);
2461}
2462/*! @} */
2463
2464//-----------------------------------------------------------------------------
2465// Non-template routines from kmp_dispatch.cpp used in other sources
2466
2467kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker) {
2468 return value == checker;
2469}
2470
2471kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker) {
2472 return value != checker;
2473}
2474
2475kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker) {
2476 return value < checker;
2477}
2478
2479kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker) {
2480 return value >= checker;
2481}
2482
2483kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker) {
2484 return value <= checker;
2485}
2486
2487kmp_uint32
2488__kmp_wait_yield_4(volatile kmp_uint32 *spinner, kmp_uint32 checker,
2489 kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
2490 void *obj // Higher-level synchronization object, or NULL.
2491 ) {
2492 // note: we may not belong to a team at this point
2493 volatile kmp_uint32 *spin = spinner;
2494 kmp_uint32 check = checker;
2495 kmp_uint32 spins;
2496 kmp_uint32 (*f)(kmp_uint32, kmp_uint32) = pred;
2497 kmp_uint32 r;
2498
2499 KMP_FSYNC_SPIN_INIT(obj, CCAST(kmp_uint32 *, spin))int sync_iters = 0; if (__kmp_itt_fsync_prepare_ptr__3_0) { if
(obj == __null) { obj = const_cast<kmp_uint32 *>(spin)
; } } __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0x4376) : "%ebx")
;
2500 KMP_INIT_YIELD(spins){ (spins) = __kmp_yield_init; };
2501 // main wait spin loop
2502 while (!f(r = TCR_4(*spin)(*spin), check)) {
2503 KMP_FSYNC_SPIN_PREPARE(obj)do { if (__kmp_itt_fsync_prepare_ptr__3_0 && sync_iters
< __kmp_itt_prepare_delay) { ++sync_iters; if (sync_iters
>= __kmp_itt_prepare_delay) { (!__kmp_itt_fsync_prepare_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)((void
*)obj)); } } } while (0)
;
2504 /* GEH - remove this since it was accidentally introduced when kmp_wait was
2505 split. It causes problems with infinite recursion because of exit lock */
2506 /* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort)
2507 __kmp_abort_thread(); */
2508
2509 /* if we have waited a bit, or are oversubscribed, yield */
2510 /* pause is in the following code */
2511 KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc){ __kmp_x86_pause(); __kmp_yield(((__kmp_nth) > __kmp_avail_proc
)); }
;
2512 KMP_YIELD_SPIN(spins){ __kmp_x86_pause(); (spins) -= 2; if (!(spins)) { __kmp_yield
(1); (spins) = __kmp_yield_next; } }
;
2513 }
2514 KMP_FSYNC_SPIN_ACQUIRED(obj)do { __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0x4377) : "%ebx"); if (sync_iters >= __kmp_itt_prepare_delay
) { (!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)((void *)obj)); } } while (0)
;
2515 return r;
2516}
2517
2518void __kmp_wait_yield_4_ptr(
2519 void *spinner, kmp_uint32 checker, kmp_uint32 (*pred)(void *, kmp_uint32),
2520 void *obj // Higher-level synchronization object, or NULL.
2521 ) {
2522 // note: we may not belong to a team at this point
2523 void *spin = spinner;
2524 kmp_uint32 check = checker;
2525 kmp_uint32 spins;
2526 kmp_uint32 (*f)(void *, kmp_uint32) = pred;
2527
2528 KMP_FSYNC_SPIN_INIT(obj, spin)int sync_iters = 0; if (__kmp_itt_fsync_prepare_ptr__3_0) { if
(obj == __null) { obj = spin; } } __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0x4376) : "%ebx")
;
2529 KMP_INIT_YIELD(spins){ (spins) = __kmp_yield_init; };
2530 // main wait spin loop
2531 while (!f(spin, check)) {
2532 KMP_FSYNC_SPIN_PREPARE(obj)do { if (__kmp_itt_fsync_prepare_ptr__3_0 && sync_iters
< __kmp_itt_prepare_delay) { ++sync_iters; if (sync_iters
>= __kmp_itt_prepare_delay) { (!__kmp_itt_fsync_prepare_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)((void
*)obj)); } } } while (0)
;
2533 /* if we have waited a bit, or are oversubscribed, yield */
2534 /* pause is in the following code */
2535 KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc){ __kmp_x86_pause(); __kmp_yield(((__kmp_nth) > __kmp_avail_proc
)); }
;
2536 KMP_YIELD_SPIN(spins){ __kmp_x86_pause(); (spins) -= 2; if (!(spins)) { __kmp_yield
(1); (spins) = __kmp_yield_next; } }
;
2537 }
2538 KMP_FSYNC_SPIN_ACQUIRED(obj)do { __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0x4377) : "%ebx"); if (sync_iters >= __kmp_itt_prepare_delay
) { (!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)((void *)obj)); } } while (0)
;
2539}
2540
2541} // extern "C"
2542
2543#ifdef KMP_GOMP_COMPAT
2544
2545void __kmp_aux_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
2546 enum sched_type schedule, kmp_int32 lb,
2547 kmp_int32 ub, kmp_int32 st, kmp_int32 chunk,
2548 int push_ws) {
2549 __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk,
2550 push_ws);
2551}
2552
2553void __kmp_aux_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
2554 enum sched_type schedule, kmp_uint32 lb,
2555 kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk,
2556 int push_ws) {
2557 __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk,
2558 push_ws);
2559}
2560
2561void __kmp_aux_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
2562 enum sched_type schedule, kmp_int64 lb,
2563 kmp_int64 ub, kmp_int64 st, kmp_int64 chunk,
2564 int push_ws) {
2565 __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk,
2566 push_ws);
2567}
2568
2569void __kmp_aux_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
2570 enum sched_type schedule, kmp_uint64 lb,
2571 kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk,
2572 int push_ws) {
2573 __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk,
2574 push_ws);
2575}
2576
2577void __kmp_aux_dispatch_fini_chunk_4(ident_t *loc, kmp_int32 gtid) {
2578 __kmp_dispatch_finish_chunk<kmp_uint32>(gtid, loc);
2579}
2580
2581void __kmp_aux_dispatch_fini_chunk_8(ident_t *loc, kmp_int32 gtid) {
2582 __kmp_dispatch_finish_chunk<kmp_uint64>(gtid, loc);
2583}
2584
2585void __kmp_aux_dispatch_fini_chunk_4u(ident_t *loc, kmp_int32 gtid) {
2586 __kmp_dispatch_finish_chunk<kmp_uint32>(gtid, loc);
2587}
2588
2589void __kmp_aux_dispatch_fini_chunk_8u(ident_t *loc, kmp_int32 gtid) {
2590 __kmp_dispatch_finish_chunk<kmp_uint64>(gtid, loc);
2591}
2592
2593#endif /* KMP_GOMP_COMPAT */
2594
2595/* ------------------------------------------------------------------------ */