File: | projects/openmp/runtime/src/kmp_dispatch.cpp |
Warning: | line 281, column 26 Division by zero |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* | |||
2 | * kmp_dispatch.cpp: dynamic scheduling - iteration initialization and dispatch. | |||
3 | */ | |||
4 | ||||
5 | //===----------------------------------------------------------------------===// | |||
6 | // | |||
7 | // The LLVM Compiler Infrastructure | |||
8 | // | |||
9 | // This file is dual licensed under the MIT and the University of Illinois Open | |||
10 | // Source Licenses. See LICENSE.txt for details. | |||
11 | // | |||
12 | //===----------------------------------------------------------------------===// | |||
13 | ||||
14 | /* Dynamic scheduling initialization and dispatch. | |||
15 | * | |||
16 | * NOTE: __kmp_nth is a constant inside of any dispatch loop, however | |||
17 | * it may change values between parallel regions. __kmp_max_nth | |||
18 | * is the largest value __kmp_nth may take, 1 is the smallest. | |||
19 | */ | |||
20 | ||||
21 | #include "kmp.h" | |||
22 | #include "kmp_error.h" | |||
23 | #include "kmp_i18n.h" | |||
24 | #include "kmp_itt.h" | |||
25 | #include "kmp_stats.h" | |||
26 | #include "kmp_str.h" | |||
27 | #if KMP_OS_WINDOWS0 && KMP_ARCH_X860 | |||
28 | #include <float.h> | |||
29 | #endif | |||
30 | #include "kmp_lock.h" | |||
31 | #include "kmp_dispatch.h" | |||
32 | #if KMP_USE_HIER_SCHED0 | |||
33 | #include "kmp_dispatch_hier.h" | |||
34 | #endif | |||
35 | ||||
36 | #if OMPT_SUPPORT1 | |||
37 | #include "ompt-specific.h" | |||
38 | #endif | |||
39 | ||||
40 | /* ------------------------------------------------------------------------ */ | |||
41 | /* ------------------------------------------------------------------------ */ | |||
42 | ||||
43 | void __kmp_dispatch_deo_error(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { | |||
44 | kmp_info_t *th; | |||
45 | ||||
46 | KMP_DEBUG_ASSERT(gtid_ref)if (!(gtid_ref)) { __kmp_debug_assert("gtid_ref", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 46); }; | |||
47 | ||||
48 | if (__kmp_env_consistency_check) { | |||
49 | th = __kmp_threads[*gtid_ref]; | |||
50 | if (th->th.th_root->r.r_active && | |||
51 | (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws != ct_none)) { | |||
52 | #if KMP_USE_DYNAMIC_LOCK1 | |||
53 | __kmp_push_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref, NULL__null, 0); | |||
54 | #else | |||
55 | __kmp_push_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref, NULL__null); | |||
56 | #endif | |||
57 | } | |||
58 | } | |||
59 | } | |||
60 | ||||
61 | void __kmp_dispatch_dxo_error(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { | |||
62 | kmp_info_t *th; | |||
63 | ||||
64 | if (__kmp_env_consistency_check) { | |||
65 | th = __kmp_threads[*gtid_ref]; | |||
66 | if (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws != ct_none) { | |||
67 | __kmp_pop_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref); | |||
68 | } | |||
69 | } | |||
70 | } | |||
71 | ||||
72 | // Initialize a dispatch_private_info_template<T> buffer for a particular | |||
73 | // type of schedule,chunk. The loop description is found in lb (lower bound), | |||
74 | // ub (upper bound), and st (stride). nproc is the number of threads relevant | |||
75 | // to the scheduling (often the number of threads in a team, but not always if | |||
76 | // hierarchical scheduling is used). tid is the id of the thread calling | |||
77 | // the function within the group of nproc threads. It will have a value | |||
78 | // between 0 and nproc - 1. This is often just the thread id within a team, but | |||
79 | // is not necessarily the case when using hierarchical scheduling. | |||
80 | // loc is the source file location of the corresponding loop | |||
81 | // gtid is the global thread id | |||
82 | template <typename T> | |||
83 | void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid, | |||
84 | dispatch_private_info_template<T> *pr, | |||
85 | enum sched_type schedule, T lb, T ub, | |||
86 | typename traits_t<T>::signed_t st, | |||
87 | #if USE_ITT_BUILD1 | |||
88 | kmp_uint64 *cur_chunk, | |||
89 | #endif | |||
90 | typename traits_t<T>::signed_t chunk, | |||
91 | T nproc, T tid) { | |||
92 | typedef typename traits_t<T>::unsigned_t UT; | |||
93 | typedef typename traits_t<T>::floating_t DBL; | |||
94 | ||||
95 | int active; | |||
96 | T tc; | |||
97 | kmp_info_t *th; | |||
98 | kmp_team_t *team; | |||
99 | ||||
100 | #ifdef KMP_DEBUG1 | |||
101 | typedef typename traits_t<T>::signed_t ST; | |||
102 | { | |||
103 | char *buff; | |||
104 | // create format specifiers before the debug output | |||
105 | buff = __kmp_str_format("__kmp_dispatch_init_algorithm: T#%%d called " | |||
106 | "pr:%%p lb:%%%s ub:%%%s st:%%%s " | |||
107 | "schedule:%%d chunk:%%%s nproc:%%%s tid:%%%s\n", | |||
108 | traits_t<T>::spec, traits_t<T>::spec, | |||
109 | traits_t<ST>::spec, traits_t<ST>::spec, | |||
110 | traits_t<T>::spec, traits_t<T>::spec); | |||
111 | KD_TRACE(10, (buff, gtid, pr, lb, ub, st, schedule, chunk, nproc, tid))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, pr , lb, ub, st, schedule, chunk, nproc, tid); }; | |||
112 | __kmp_str_free(&buff); | |||
113 | } | |||
114 | #endif | |||
115 | /* setup data */ | |||
116 | th = __kmp_threads[gtid]; | |||
117 | team = th->th.th_team; | |||
118 | active = !team->t.t_serialized; | |||
| ||||
119 | ||||
120 | #if USE_ITT_BUILD1 | |||
121 | int itt_need_metadata_reporting = __itt_metadata_add_ptr__kmp_itt_metadata_add_ptr__3_0 && | |||
122 | __kmp_forkjoin_frames_mode == 3 && | |||
123 | KMP_MASTER_GTID(gtid)(__kmp_tid_from_gtid((gtid)) == 0) && | |||
124 | #if OMP_40_ENABLED(50 >= 40) | |||
125 | th->th.th_teams_microtask == NULL__null && | |||
126 | #endif | |||
127 | team->t.t_active_level == 1; | |||
128 | #endif | |||
129 | #if (KMP_STATIC_STEAL_ENABLED1) | |||
130 | if (SCHEDULE_HAS_NONMONOTONIC(schedule)(((schedule)&kmp_sch_modifier_nonmonotonic) != 0)) | |||
131 | // AC: we now have only one implementation of stealing, so use it | |||
132 | schedule = kmp_sch_static_steal; | |||
133 | else | |||
134 | #endif | |||
135 | schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule)(enum sched_type)( (schedule) & ~(kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)); | |||
136 | ||||
137 | /* Pick up the nomerge/ordered bits from the scheduling type */ | |||
138 | if ((schedule >= kmp_nm_lower) && (schedule < kmp_nm_upper)) { | |||
139 | pr->flags.nomerge = TRUE(!0); | |||
140 | schedule = | |||
141 | (enum sched_type)(((int)schedule) - (kmp_nm_lower - kmp_sch_lower)); | |||
142 | } else { | |||
143 | pr->flags.nomerge = FALSE0; | |||
144 | } | |||
145 | pr->type_size = traits_t<T>::type_size; // remember the size of variables | |||
146 | if (kmp_ord_lower & schedule) { | |||
147 | pr->flags.ordered = TRUE(!0); | |||
148 | schedule = | |||
149 | (enum sched_type)(((int)schedule) - (kmp_ord_lower - kmp_sch_lower)); | |||
150 | } else { | |||
151 | pr->flags.ordered = FALSE0; | |||
152 | } | |||
153 | ||||
154 | if (schedule == kmp_sch_static) { | |||
155 | schedule = __kmp_static; | |||
156 | } else { | |||
157 | if (schedule == kmp_sch_runtime) { | |||
158 | // Use the scheduling specified by OMP_SCHEDULE (or __kmp_sch_default if | |||
159 | // not specified) | |||
160 | schedule = team->t.t_sched.r_sched_type; | |||
161 | // Detail the schedule if needed (global controls are differentiated | |||
162 | // appropriately) | |||
163 | if (schedule == kmp_sch_guided_chunked) { | |||
164 | schedule = __kmp_guided; | |||
165 | } else if (schedule == kmp_sch_static) { | |||
166 | schedule = __kmp_static; | |||
167 | } | |||
168 | // Use the chunk size specified by OMP_SCHEDULE (or default if not | |||
169 | // specified) | |||
170 | chunk = team->t.t_sched.chunk; | |||
171 | #if USE_ITT_BUILD1 | |||
172 | if (cur_chunk) | |||
173 | *cur_chunk = chunk; | |||
174 | #endif | |||
175 | #ifdef KMP_DEBUG1 | |||
176 | { | |||
177 | char *buff; | |||
178 | // create format specifiers before the debug output | |||
179 | buff = __kmp_str_format("__kmp_dispatch_init_algorithm: T#%%d new: " | |||
180 | "schedule:%%d chunk:%%%s\n", | |||
181 | traits_t<ST>::spec); | |||
182 | KD_TRACE(10, (buff, gtid, schedule, chunk))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, schedule , chunk); }; | |||
183 | __kmp_str_free(&buff); | |||
184 | } | |||
185 | #endif | |||
186 | } else { | |||
187 | if (schedule == kmp_sch_guided_chunked) { | |||
188 | schedule = __kmp_guided; | |||
189 | } | |||
190 | if (chunk <= 0) { | |||
191 | chunk = KMP_DEFAULT_CHUNK1; | |||
192 | } | |||
193 | } | |||
194 | ||||
195 | if (schedule == kmp_sch_auto) { | |||
196 | // mapping and differentiation: in the __kmp_do_serial_initialize() | |||
197 | schedule = __kmp_auto; | |||
198 | #ifdef KMP_DEBUG1 | |||
199 | { | |||
200 | char *buff; | |||
201 | // create format specifiers before the debug output | |||
202 | buff = __kmp_str_format( | |||
203 | "__kmp_dispatch_init_algorithm: kmp_sch_auto: T#%%d new: " | |||
204 | "schedule:%%d chunk:%%%s\n", | |||
205 | traits_t<ST>::spec); | |||
206 | KD_TRACE(10, (buff, gtid, schedule, chunk))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, schedule , chunk); }; | |||
207 | __kmp_str_free(&buff); | |||
208 | } | |||
209 | #endif | |||
210 | } | |||
211 | ||||
212 | /* guided analytical not safe for too many threads */ | |||
213 | if (schedule == kmp_sch_guided_analytical_chunked && nproc > 1 << 20) { | |||
214 | schedule = kmp_sch_guided_iterative_chunked; | |||
215 | KMP_WARNING(DispatchManyThreads)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_DispatchManyThreads ), __kmp_msg_null); | |||
216 | } | |||
217 | #if OMP_45_ENABLED(50 >= 45) | |||
218 | if (schedule == kmp_sch_runtime_simd) { | |||
219 | // compiler provides simd_width in the chunk parameter | |||
220 | schedule = team->t.t_sched.r_sched_type; | |||
221 | // Detail the schedule if needed (global controls are differentiated | |||
222 | // appropriately) | |||
223 | if (schedule == kmp_sch_static || schedule == kmp_sch_auto || | |||
224 | schedule == __kmp_static) { | |||
225 | schedule = kmp_sch_static_balanced_chunked; | |||
226 | } else { | |||
227 | if (schedule == kmp_sch_guided_chunked || schedule == __kmp_guided) { | |||
228 | schedule = kmp_sch_guided_simd; | |||
229 | } | |||
230 | chunk = team->t.t_sched.chunk * chunk; | |||
231 | } | |||
232 | #if USE_ITT_BUILD1 | |||
233 | if (cur_chunk) | |||
234 | *cur_chunk = chunk; | |||
235 | #endif | |||
236 | #ifdef KMP_DEBUG1 | |||
237 | { | |||
238 | char *buff; | |||
239 | // create format specifiers before the debug output | |||
240 | buff = __kmp_str_format("__kmp_dispatch_init: T#%%d new: schedule:%%d" | |||
241 | " chunk:%%%s\n", | |||
242 | traits_t<ST>::spec); | |||
243 | KD_TRACE(10, (buff, gtid, schedule, chunk))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, schedule , chunk); }; | |||
244 | __kmp_str_free(&buff); | |||
245 | } | |||
246 | #endif | |||
247 | } | |||
248 | #endif // OMP_45_ENABLED | |||
249 | pr->u.p.parm1 = chunk; | |||
250 | } | |||
251 | KMP_ASSERT2((kmp_sch_lower < schedule && schedule < kmp_sch_upper),if (!((kmp_sch_lower < schedule && schedule < kmp_sch_upper ))) { __kmp_debug_assert(("unknown scheduling type"), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 252); } | |||
252 | "unknown scheduling type")if (!((kmp_sch_lower < schedule && schedule < kmp_sch_upper ))) { __kmp_debug_assert(("unknown scheduling type"), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 252); }; | |||
253 | ||||
254 | pr->u.p.count = 0; | |||
255 | ||||
256 | if (__kmp_env_consistency_check) { | |||
257 | if (st == 0) { | |||
258 | __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, | |||
259 | (pr->flags.ordered ? ct_pdo_ordered : ct_pdo), loc); | |||
260 | } | |||
261 | } | |||
262 | // compute trip count | |||
263 | if (st == 1) { // most common case | |||
264 | if (ub >= lb) { | |||
265 | tc = ub - lb + 1; | |||
266 | } else { // ub < lb | |||
267 | tc = 0; // zero-trip | |||
268 | } | |||
269 | } else if (st < 0) { | |||
270 | if (lb >= ub) { | |||
271 | // AC: cast to unsigned is needed for loops like (i=2B; i>-2B; i-=1B), | |||
272 | // where the division needs to be unsigned regardless of the result type | |||
273 | tc = (UT)(lb - ub) / (-st) + 1; | |||
274 | } else { // lb < ub | |||
275 | tc = 0; // zero-trip | |||
276 | } | |||
277 | } else { // st > 0 | |||
278 | if (ub >= lb) { | |||
279 | // AC: cast to unsigned is needed for loops like (i=-2B; i<2B; i+=1B), | |||
280 | // where the division needs to be unsigned regardless of the result type | |||
281 | tc = (UT)(ub - lb) / st + 1; | |||
| ||||
282 | } else { // ub < lb | |||
283 | tc = 0; // zero-trip | |||
284 | } | |||
285 | } | |||
286 | ||||
287 | pr->u.p.lb = lb; | |||
288 | pr->u.p.ub = ub; | |||
289 | pr->u.p.st = st; | |||
290 | pr->u.p.tc = tc; | |||
291 | ||||
292 | #if KMP_OS_WINDOWS0 | |||
293 | pr->u.p.last_upper = ub + st; | |||
294 | #endif /* KMP_OS_WINDOWS */ | |||
295 | ||||
296 | /* NOTE: only the active parallel region(s) has active ordered sections */ | |||
297 | ||||
298 | if (active) { | |||
299 | if (pr->flags.ordered) { | |||
300 | pr->ordered_bumped = 0; | |||
301 | pr->u.p.ordered_lower = 1; | |||
302 | pr->u.p.ordered_upper = 0; | |||
303 | } | |||
304 | } | |||
305 | ||||
306 | switch (schedule) { | |||
307 | #if (KMP_STATIC_STEAL_ENABLED1) | |||
308 | case kmp_sch_static_steal: { | |||
309 | T ntc, init; | |||
310 | ||||
311 | KD_TRACE(100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_steal case\n" , gtid); } | |||
312 | ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_steal case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_steal case\n" , gtid); } | |||
313 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_steal case\n" , gtid); }; | |||
314 | ||||
315 | ntc = (tc % chunk ? 1 : 0) + tc / chunk; | |||
316 | if (nproc > 1 && ntc >= nproc) { | |||
317 | KMP_COUNT_BLOCK(OMP_LOOP_STATIC_STEAL)((void)0); | |||
318 | T id = tid; | |||
319 | T small_chunk, extras; | |||
320 | ||||
321 | small_chunk = ntc / nproc; | |||
322 | extras = ntc % nproc; | |||
323 | ||||
324 | init = id * small_chunk + (id < extras ? id : extras); | |||
325 | pr->u.p.count = init; | |||
326 | pr->u.p.ub = init + small_chunk + (id < extras ? 1 : 0); | |||
327 | ||||
328 | pr->u.p.parm2 = lb; | |||
329 | // pr->pfields.parm3 = 0; // it's not used in static_steal | |||
330 | pr->u.p.parm4 = (id + 1) % nproc; // remember neighbour tid | |||
331 | pr->u.p.st = st; | |||
332 | if (traits_t<T>::type_size > 4) { | |||
333 | // AC: TODO: check if 16-byte CAS available and use it to | |||
334 | // improve performance (probably wait for explicit request | |||
335 | // before spending time on this). | |||
336 | // For now use dynamically allocated per-thread lock, | |||
337 | // free memory in __kmp_dispatch_next when status==0. | |||
338 | KMP_DEBUG_ASSERT(th->th.th_dispatch->th_steal_lock == NULL)if (!(th->th.th_dispatch->th_steal_lock == __null)) { __kmp_debug_assert ("th->th.th_dispatch->th_steal_lock == __null", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 338); }; | |||
339 | th->th.th_dispatch->th_steal_lock = | |||
340 | (kmp_lock_t *)__kmp_allocate(sizeof(kmp_lock_t))___kmp_allocate((sizeof(kmp_lock_t)), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 340); | |||
341 | __kmp_init_lock(th->th.th_dispatch->th_steal_lock); | |||
342 | } | |||
343 | break; | |||
344 | } else { | |||
345 | KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d falling-through to "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to " "kmp_sch_static_balanced\n", gtid); } | |||
346 | "kmp_sch_static_balanced\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to " "kmp_sch_static_balanced\n", gtid); } | |||
347 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to " "kmp_sch_static_balanced\n", gtid); }; | |||
348 | schedule = kmp_sch_static_balanced; | |||
349 | /* too few iterations: fall-through to kmp_sch_static_balanced */ | |||
350 | } // if | |||
351 | /* FALL-THROUGH to static balanced */ | |||
352 | } // case | |||
353 | #endif | |||
354 | case kmp_sch_static_balanced: { | |||
355 | T init, limit; | |||
356 | ||||
357 | KD_TRACE(if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n" , gtid); } | |||
358 | 100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n" , gtid); } | |||
359 | ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n" , gtid); } | |||
360 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n" , gtid); }; | |||
361 | ||||
362 | if (nproc > 1) { | |||
363 | T id = tid; | |||
364 | ||||
365 | if (tc < nproc) { | |||
366 | if (id < tc) { | |||
367 | init = id; | |||
368 | limit = id; | |||
369 | pr->u.p.parm1 = (id == tc - 1); /* parm1 stores *plastiter */ | |||
370 | } else { | |||
371 | pr->u.p.count = 1; /* means no more chunks to execute */ | |||
372 | pr->u.p.parm1 = FALSE0; | |||
373 | break; | |||
374 | } | |||
375 | } else { | |||
376 | T small_chunk = tc / nproc; | |||
377 | T extras = tc % nproc; | |||
378 | init = id * small_chunk + (id < extras ? id : extras); | |||
379 | limit = init + small_chunk - (id < extras ? 0 : 1); | |||
380 | pr->u.p.parm1 = (id == nproc - 1); | |||
381 | } | |||
382 | } else { | |||
383 | if (tc > 0) { | |||
384 | init = 0; | |||
385 | limit = tc - 1; | |||
386 | pr->u.p.parm1 = TRUE(!0); | |||
387 | } else { | |||
388 | // zero trip count | |||
389 | pr->u.p.count = 1; /* means no more chunks to execute */ | |||
390 | pr->u.p.parm1 = FALSE0; | |||
391 | break; | |||
392 | } | |||
393 | } | |||
394 | #if USE_ITT_BUILD1 | |||
395 | // Calculate chunk for metadata report | |||
396 | if (itt_need_metadata_reporting) | |||
397 | if (cur_chunk) | |||
398 | *cur_chunk = limit - init + 1; | |||
399 | #endif | |||
400 | if (st == 1) { | |||
401 | pr->u.p.lb = lb + init; | |||
402 | pr->u.p.ub = lb + limit; | |||
403 | } else { | |||
404 | // calculated upper bound, "ub" is user-defined upper bound | |||
405 | T ub_tmp = lb + limit * st; | |||
406 | pr->u.p.lb = lb + init * st; | |||
407 | // adjust upper bound to "ub" if needed, so that MS lastprivate will match | |||
408 | // it exactly | |||
409 | if (st > 0) { | |||
410 | pr->u.p.ub = (ub_tmp + st > ub ? ub : ub_tmp); | |||
411 | } else { | |||
412 | pr->u.p.ub = (ub_tmp + st < ub ? ub : ub_tmp); | |||
413 | } | |||
414 | } | |||
415 | if (pr->flags.ordered) { | |||
416 | pr->u.p.ordered_lower = init; | |||
417 | pr->u.p.ordered_upper = limit; | |||
418 | } | |||
419 | break; | |||
420 | } // case | |||
421 | #if OMP_45_ENABLED(50 >= 45) | |||
422 | case kmp_sch_static_balanced_chunked: { | |||
423 | // similar to balanced, but chunk adjusted to multiple of simd width | |||
424 | T nth = nproc; | |||
425 | KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d runtime(simd:static)"if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d runtime(simd:static)" " -> falling-through to static_greedy\n", gtid); } | |||
426 | " -> falling-through to static_greedy\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d runtime(simd:static)" " -> falling-through to static_greedy\n", gtid); } | |||
427 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d runtime(simd:static)" " -> falling-through to static_greedy\n", gtid); }; | |||
428 | schedule = kmp_sch_static_greedy; | |||
429 | if (nth > 1) | |||
430 | pr->u.p.parm1 = ((tc + nth - 1) / nth + chunk - 1) & ~(chunk - 1); | |||
431 | else | |||
432 | pr->u.p.parm1 = tc; | |||
433 | break; | |||
434 | } // case | |||
435 | case kmp_sch_guided_simd: | |||
436 | #endif // OMP_45_ENABLED | |||
437 | case kmp_sch_guided_iterative_chunked: { | |||
438 | KD_TRACE(if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked" " case\n", gtid); } | |||
439 | 100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked" " case\n", gtid); } | |||
440 | ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked"if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked" " case\n", gtid); } | |||
441 | " case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked" " case\n", gtid); } | |||
442 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked" " case\n", gtid); }; | |||
443 | ||||
444 | if (nproc > 1) { | |||
445 | if ((2L * chunk + 1) * nproc >= tc) { | |||
446 | /* chunk size too large, switch to dynamic */ | |||
447 | schedule = kmp_sch_dynamic_chunked; | |||
448 | } else { | |||
449 | // when remaining iters become less than parm2 - switch to dynamic | |||
450 | pr->u.p.parm2 = guided_int_param * nproc * (chunk + 1); | |||
451 | *(double *)&pr->u.p.parm3 = | |||
452 | guided_flt_param / nproc; // may occupy parm3 and parm4 | |||
453 | } | |||
454 | } else { | |||
455 | KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d falling-through to "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to " "kmp_sch_static_greedy\n", gtid); } | |||
456 | "kmp_sch_static_greedy\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to " "kmp_sch_static_greedy\n", gtid); } | |||
457 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to " "kmp_sch_static_greedy\n", gtid); }; | |||
458 | schedule = kmp_sch_static_greedy; | |||
459 | /* team->t.t_nproc == 1: fall-through to kmp_sch_static_greedy */ | |||
460 | KD_TRACE(if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n" , gtid); } | |||
461 | 100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n" , gtid); } | |||
462 | ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n" , gtid); } | |||
463 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n" , gtid); }; | |||
464 | pr->u.p.parm1 = tc; | |||
465 | } // if | |||
466 | } // case | |||
467 | break; | |||
468 | case kmp_sch_guided_analytical_chunked: { | |||
469 | KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d " "kmp_sch_guided_analytical_chunked case\n", gtid); } | |||
470 | "kmp_sch_guided_analytical_chunked case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d " "kmp_sch_guided_analytical_chunked case\n", gtid); } | |||
471 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d " "kmp_sch_guided_analytical_chunked case\n", gtid); }; | |||
472 | ||||
473 | if (nproc > 1) { | |||
474 | if ((2L * chunk + 1) * nproc >= tc) { | |||
475 | /* chunk size too large, switch to dynamic */ | |||
476 | schedule = kmp_sch_dynamic_chunked; | |||
477 | } else { | |||
478 | /* commonly used term: (2 nproc - 1)/(2 nproc) */ | |||
479 | DBL x; | |||
480 | ||||
481 | #if KMP_OS_WINDOWS0 && KMP_ARCH_X860 | |||
482 | /* Linux* OS already has 64-bit computation by default for long double, | |||
483 | and on Windows* OS on Intel(R) 64, /Qlong_double doesn't work. On | |||
484 | Windows* OS on IA-32 architecture, we need to set precision to 64-bit | |||
485 | instead of the default 53-bit. Even though long double doesn't work | |||
486 | on Windows* OS on Intel(R) 64, the resulting lack of precision is not | |||
487 | expected to impact the correctness of the algorithm, but this has not | |||
488 | been mathematically proven. */ | |||
489 | // save original FPCW and set precision to 64-bit, as | |||
490 | // Windows* OS on IA-32 architecture defaults to 53-bit | |||
491 | unsigned int oldFpcw = _control87(0, 0); | |||
492 | _control87(_PC_64, _MCW_PC); // 0,0x30000 | |||
493 | #endif | |||
494 | /* value used for comparison in solver for cross-over point */ | |||
495 | long double target = ((long double)chunk * 2 + 1) * nproc / tc; | |||
496 | ||||
497 | /* crossover point--chunk indexes equal to or greater than | |||
498 | this point switch to dynamic-style scheduling */ | |||
499 | UT cross; | |||
500 | ||||
501 | /* commonly used term: (2 nproc - 1)/(2 nproc) */ | |||
502 | x = (long double)1.0 - (long double)0.5 / nproc; | |||
503 | ||||
504 | #ifdef KMP_DEBUG1 | |||
505 | { // test natural alignment | |||
506 | struct _test_a { | |||
507 | char a; | |||
508 | union { | |||
509 | char b; | |||
510 | DBL d; | |||
511 | }; | |||
512 | } t; | |||
513 | ptrdiff_t natural_alignment = | |||
514 | (ptrdiff_t)&t.b - (ptrdiff_t)&t - (ptrdiff_t)1; | |||
515 | //__kmp_warn( " %llx %llx %lld", (long long)&t.d, (long long)&t, (long | |||
516 | // long)natural_alignment ); | |||
517 | KMP_DEBUG_ASSERT(if (!((((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment )) == 0)) { __kmp_debug_assert("(((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment)) == 0" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 518); } | |||
518 | (((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment)) == 0)if (!((((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment )) == 0)) { __kmp_debug_assert("(((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment)) == 0" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 518); }; | |||
519 | } | |||
520 | #endif // KMP_DEBUG | |||
521 | ||||
522 | /* save the term in thread private dispatch structure */ | |||
523 | *(DBL *)&pr->u.p.parm3 = x; | |||
524 | ||||
525 | /* solve for the crossover point to the nearest integer i for which C_i | |||
526 | <= chunk */ | |||
527 | { | |||
528 | UT left, right, mid; | |||
529 | long double p; | |||
530 | ||||
531 | /* estimate initial upper and lower bound */ | |||
532 | ||||
533 | /* doesn't matter what value right is as long as it is positive, but | |||
534 | it affects performance of the solver */ | |||
535 | right = 229; | |||
536 | p = __kmp_pow<UT>(x, right); | |||
537 | if (p > target) { | |||
538 | do { | |||
539 | p *= p; | |||
540 | right <<= 1; | |||
541 | } while (p > target && right < (1 << 27)); | |||
542 | /* lower bound is previous (failed) estimate of upper bound */ | |||
543 | left = right >> 1; | |||
544 | } else { | |||
545 | left = 0; | |||
546 | } | |||
547 | ||||
548 | /* bisection root-finding method */ | |||
549 | while (left + 1 < right) { | |||
550 | mid = (left + right) / 2; | |||
551 | if (__kmp_pow<UT>(x, mid) > target) { | |||
552 | left = mid; | |||
553 | } else { | |||
554 | right = mid; | |||
555 | } | |||
556 | } // while | |||
557 | cross = right; | |||
558 | } | |||
559 | /* assert sanity of computed crossover point */ | |||
560 | KMP_ASSERT(cross && __kmp_pow<UT>(x, cross - 1) > target &&if (!(cross && __kmp_pow<UT>(x, cross - 1) > target && __kmp_pow<UT>(x, cross) <= target )) { __kmp_debug_assert("cross && __kmp_pow<UT>(x, cross - 1) > target && __kmp_pow<UT>(x, cross) <= target" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 561); } | |||
561 | __kmp_pow<UT>(x, cross) <= target)if (!(cross && __kmp_pow<UT>(x, cross - 1) > target && __kmp_pow<UT>(x, cross) <= target )) { __kmp_debug_assert("cross && __kmp_pow<UT>(x, cross - 1) > target && __kmp_pow<UT>(x, cross) <= target" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 561); }; | |||
562 | ||||
563 | /* save the crossover point in thread private dispatch structure */ | |||
564 | pr->u.p.parm2 = cross; | |||
565 | ||||
566 | // C75803 | |||
567 | #if ((KMP_OS_LINUX1 || KMP_OS_WINDOWS0) && KMP_ARCH_X860) && (!defined(KMP_I8)) | |||
568 | #define GUIDED_ANALYTICAL_WORKAROUND(x) (*(DBL *)&pr->u.p.parm3) | |||
569 | #else | |||
570 | #define GUIDED_ANALYTICAL_WORKAROUND(x) (x) | |||
571 | #endif | |||
572 | /* dynamic-style scheduling offset */ | |||
573 | pr->u.p.count = tc - __kmp_dispatch_guided_remaining( | |||
574 | tc, GUIDED_ANALYTICAL_WORKAROUND(x), cross) - | |||
575 | cross * chunk; | |||
576 | #if KMP_OS_WINDOWS0 && KMP_ARCH_X860 | |||
577 | // restore FPCW | |||
578 | _control87(oldFpcw, _MCW_PC); | |||
579 | #endif | |||
580 | } // if | |||
581 | } else { | |||
582 | KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d falling-through to "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to " "kmp_sch_static_greedy\n", gtid); } | |||
583 | "kmp_sch_static_greedy\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to " "kmp_sch_static_greedy\n", gtid); } | |||
584 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to " "kmp_sch_static_greedy\n", gtid); }; | |||
585 | schedule = kmp_sch_static_greedy; | |||
586 | /* team->t.t_nproc == 1: fall-through to kmp_sch_static_greedy */ | |||
587 | pr->u.p.parm1 = tc; | |||
588 | } // if | |||
589 | } // case | |||
590 | break; | |||
591 | case kmp_sch_static_greedy: | |||
592 | KD_TRACE(if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n" , gtid); } | |||
593 | 100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n" , gtid); } | |||
594 | ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n" , gtid); } | |||
595 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n" , gtid); }; | |||
596 | pr->u.p.parm1 = (nproc > 1) ? (tc + nproc - 1) / nproc : tc; | |||
597 | break; | |||
598 | case kmp_sch_static_chunked: | |||
599 | case kmp_sch_dynamic_chunked: | |||
600 | if (pr->u.p.parm1 <= 0) { | |||
601 | pr->u.p.parm1 = KMP_DEFAULT_CHUNK1; | |||
602 | } | |||
603 | KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d " "kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", gtid ); } | |||
604 | "kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d " "kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", gtid ); } | |||
605 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d " "kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", gtid ); }; | |||
606 | break; | |||
607 | case kmp_sch_trapezoidal: { | |||
608 | /* TSS: trapezoid self-scheduling, minimum chunk_size = parm1 */ | |||
609 | ||||
610 | T parm1, parm2, parm3, parm4; | |||
611 | KD_TRACE(100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_trapezoidal case\n" , gtid); } | |||
612 | ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_trapezoidal case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_trapezoidal case\n" , gtid); } | |||
613 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_trapezoidal case\n" , gtid); }; | |||
614 | ||||
615 | parm1 = chunk; | |||
616 | ||||
617 | /* F : size of the first cycle */ | |||
618 | parm2 = (tc / (2 * nproc)); | |||
619 | ||||
620 | if (parm2 < 1) { | |||
621 | parm2 = 1; | |||
622 | } | |||
623 | ||||
624 | /* L : size of the last cycle. Make sure the last cycle is not larger | |||
625 | than the first cycle. */ | |||
626 | if (parm1 < 1) { | |||
627 | parm1 = 1; | |||
628 | } else if (parm1 > parm2) { | |||
629 | parm1 = parm2; | |||
630 | } | |||
631 | ||||
632 | /* N : number of cycles */ | |||
633 | parm3 = (parm2 + parm1); | |||
634 | parm3 = (2 * tc + parm3 - 1) / parm3; | |||
635 | ||||
636 | if (parm3 < 2) { | |||
637 | parm3 = 2; | |||
638 | } | |||
639 | ||||
640 | /* sigma : decreasing incr of the trapezoid */ | |||
641 | parm4 = (parm3 - 1); | |||
642 | parm4 = (parm2 - parm1) / parm4; | |||
643 | ||||
644 | // pointless check, because parm4 >= 0 always | |||
645 | // if ( parm4 < 0 ) { | |||
646 | // parm4 = 0; | |||
647 | //} | |||
648 | ||||
649 | pr->u.p.parm1 = parm1; | |||
650 | pr->u.p.parm2 = parm2; | |||
651 | pr->u.p.parm3 = parm3; | |||
652 | pr->u.p.parm4 = parm4; | |||
653 | } // case | |||
654 | break; | |||
655 | ||||
656 | default: { | |||
657 | __kmp_fatal(KMP_MSG(UnknownSchedTypeDetected)__kmp_msg_format(kmp_i18n_msg_UnknownSchedTypeDetected), // Primary message | |||
658 | KMP_HNT(GetNewerLibrary)__kmp_msg_format(kmp_i18n_hnt_GetNewerLibrary), // Hint | |||
659 | __kmp_msg_null // Variadic argument list terminator | |||
660 | ); | |||
661 | } break; | |||
662 | } // switch | |||
663 | pr->schedule = schedule; | |||
664 | } | |||
665 | ||||
666 | #if KMP_USE_HIER_SCHED0 | |||
667 | template <typename T> | |||
668 | inline void __kmp_dispatch_init_hier_runtime(ident_t *loc, T lb, T ub, | |||
669 | typename traits_t<T>::signed_t st); | |||
670 | template <> | |||
671 | inline void | |||
672 | __kmp_dispatch_init_hier_runtime<kmp_int32>(ident_t *loc, kmp_int32 lb, | |||
673 | kmp_int32 ub, kmp_int32 st) { | |||
674 | __kmp_dispatch_init_hierarchy<kmp_int32>( | |||
675 | loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers, | |||
676 | __kmp_hier_scheds.scheds, __kmp_hier_scheds.small_chunks, lb, ub, st); | |||
677 | } | |||
678 | template <> | |||
679 | inline void | |||
680 | __kmp_dispatch_init_hier_runtime<kmp_uint32>(ident_t *loc, kmp_uint32 lb, | |||
681 | kmp_uint32 ub, kmp_int32 st) { | |||
682 | __kmp_dispatch_init_hierarchy<kmp_uint32>( | |||
683 | loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers, | |||
684 | __kmp_hier_scheds.scheds, __kmp_hier_scheds.small_chunks, lb, ub, st); | |||
685 | } | |||
686 | template <> | |||
687 | inline void | |||
688 | __kmp_dispatch_init_hier_runtime<kmp_int64>(ident_t *loc, kmp_int64 lb, | |||
689 | kmp_int64 ub, kmp_int64 st) { | |||
690 | __kmp_dispatch_init_hierarchy<kmp_int64>( | |||
691 | loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers, | |||
692 | __kmp_hier_scheds.scheds, __kmp_hier_scheds.large_chunks, lb, ub, st); | |||
693 | } | |||
694 | template <> | |||
695 | inline void | |||
696 | __kmp_dispatch_init_hier_runtime<kmp_uint64>(ident_t *loc, kmp_uint64 lb, | |||
697 | kmp_uint64 ub, kmp_int64 st) { | |||
698 | __kmp_dispatch_init_hierarchy<kmp_uint64>( | |||
699 | loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers, | |||
700 | __kmp_hier_scheds.scheds, __kmp_hier_scheds.large_chunks, lb, ub, st); | |||
701 | } | |||
702 | ||||
703 | // free all the hierarchy scheduling memory associated with the team | |||
704 | void __kmp_dispatch_free_hierarchies(kmp_team_t *team) { | |||
705 | int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2; | |||
706 | for (int i = 0; i < num_disp_buff; ++i) { | |||
707 | // type does not matter here so use kmp_int32 | |||
708 | auto sh = | |||
709 | reinterpret_cast<dispatch_shared_info_template<kmp_int32> volatile *>( | |||
710 | &team->t.t_disp_buffer[i]); | |||
711 | if (sh->hier) { | |||
712 | sh->hier->deallocate(); | |||
713 | __kmp_free(sh->hier)___kmp_free((sh->hier), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 713); | |||
714 | } | |||
715 | } | |||
716 | } | |||
717 | #endif | |||
718 | ||||
719 | // UT - unsigned flavor of T, ST - signed flavor of T, | |||
720 | // DBL - double if sizeof(T)==4, or long double if sizeof(T)==8 | |||
721 | template <typename T> | |||
722 | static void | |||
723 | __kmp_dispatch_init(ident_t *loc, int gtid, enum sched_type schedule, T lb, | |||
724 | T ub, typename traits_t<T>::signed_t st, | |||
725 | typename traits_t<T>::signed_t chunk, int push_ws) { | |||
726 | typedef typename traits_t<T>::unsigned_t UT; | |||
727 | ||||
728 | int active; | |||
729 | kmp_info_t *th; | |||
730 | kmp_team_t *team; | |||
731 | kmp_uint32 my_buffer_index; | |||
732 | dispatch_private_info_template<T> *pr; | |||
733 | dispatch_shared_info_template<T> volatile *sh; | |||
734 | ||||
735 | KMP_BUILD_ASSERT(sizeof(dispatch_private_info_template<T>) ==static_assert(sizeof(dispatch_private_info_template<T>) == sizeof(dispatch_private_info), "Build condition error") | |||
736 | sizeof(dispatch_private_info))static_assert(sizeof(dispatch_private_info_template<T>) == sizeof(dispatch_private_info), "Build condition error"); | |||
737 | KMP_BUILD_ASSERT(sizeof(dispatch_shared_info_template<UT>) ==static_assert(sizeof(dispatch_shared_info_template<UT>) == sizeof(dispatch_shared_info), "Build condition error") | |||
738 | sizeof(dispatch_shared_info))static_assert(sizeof(dispatch_shared_info_template<UT>) == sizeof(dispatch_shared_info), "Build condition error"); | |||
739 | ||||
740 | if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel)) | |||
741 | __kmp_parallel_initialize(); | |||
742 | ||||
743 | #if INCLUDE_SSC_MARKS(1 && 1) | |||
744 | SSC_MARK_DISPATCH_INIT()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(0xd696) : "%ebx"); | |||
745 | #endif | |||
746 | #ifdef KMP_DEBUG1 | |||
747 | typedef typename traits_t<T>::signed_t ST; | |||
748 | { | |||
749 | char *buff; | |||
750 | // create format specifiers before the debug output | |||
751 | buff = __kmp_str_format("__kmp_dispatch_init: T#%%d called: schedule:%%d " | |||
752 | "chunk:%%%s lb:%%%s ub:%%%s st:%%%s\n", | |||
753 | traits_t<ST>::spec, traits_t<T>::spec, | |||
754 | traits_t<T>::spec, traits_t<ST>::spec); | |||
755 | KD_TRACE(10, (buff, gtid, schedule, chunk, lb, ub, st))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, schedule , chunk, lb, ub, st); }; | |||
756 | __kmp_str_free(&buff); | |||
757 | } | |||
758 | #endif | |||
759 | /* setup data */ | |||
760 | th = __kmp_threads[gtid]; | |||
761 | team = th->th.th_team; | |||
762 | active = !team->t.t_serialized; | |||
763 | th->th.th_ident = loc; | |||
764 | ||||
765 | // Any half-decent optimizer will remove this test when the blocks are empty | |||
766 | // since the macros expand to nothing | |||
767 | // when statistics are disabled. | |||
768 | if (schedule == __kmp_static) { | |||
769 | KMP_COUNT_BLOCK(OMP_LOOP_STATIC)((void)0); | |||
770 | } else { | |||
771 | KMP_COUNT_BLOCK(OMP_LOOP_DYNAMIC)((void)0); | |||
772 | } | |||
773 | ||||
774 | #if KMP_USE_HIER_SCHED0 | |||
775 | // Initialize the scheduling hierarchy if requested in OMP_SCHEDULE envirable | |||
776 | // Hierarchical scheduling does not work with ordered, so if ordered is | |||
777 | // detected, then revert back to threaded scheduling. | |||
778 | bool ordered; | |||
779 | enum sched_type my_sched = schedule; | |||
780 | my_buffer_index = th->th.th_dispatch->th_disp_index; | |||
781 | pr = reinterpret_cast<dispatch_private_info_template<T> *>( | |||
782 | &th->th.th_dispatch | |||
783 | ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]); | |||
784 | my_sched = SCHEDULE_WITHOUT_MODIFIERS(my_sched)(enum sched_type)( (my_sched) & ~(kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)); | |||
785 | if ((my_sched >= kmp_nm_lower) && (my_sched < kmp_nm_upper)) | |||
786 | my_sched = | |||
787 | (enum sched_type)(((int)my_sched) - (kmp_nm_lower - kmp_sch_lower)); | |||
788 | ordered = (kmp_ord_lower & my_sched); | |||
789 | if (pr->flags.use_hier) { | |||
790 | if (ordered) { | |||
791 | KD_TRACE(100, ("__kmp_dispatch_init: T#%d ordered loop detected. "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d ordered loop detected. " "Disabling hierarchical scheduling.\n", gtid); } | |||
792 | "Disabling hierarchical scheduling.\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d ordered loop detected. " "Disabling hierarchical scheduling.\n", gtid); } | |||
793 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d ordered loop detected. " "Disabling hierarchical scheduling.\n", gtid); }; | |||
794 | pr->flags.use_hier = FALSE0; | |||
795 | } | |||
796 | } | |||
797 | if (schedule == kmp_sch_runtime && __kmp_hier_scheds.size > 0) { | |||
798 | // Don't use hierarchical for ordered parallel loops and don't | |||
799 | // use the runtime hierarchy if one was specified in the program | |||
800 | if (!ordered && !pr->flags.use_hier) | |||
801 | __kmp_dispatch_init_hier_runtime<T>(loc, lb, ub, st); | |||
802 | } | |||
803 | #endif // KMP_USE_HIER_SCHED | |||
804 | ||||
805 | #if USE_ITT_BUILD1 | |||
806 | kmp_uint64 cur_chunk = chunk; | |||
807 | int itt_need_metadata_reporting = __itt_metadata_add_ptr__kmp_itt_metadata_add_ptr__3_0 && | |||
808 | __kmp_forkjoin_frames_mode == 3 && | |||
809 | KMP_MASTER_GTID(gtid)(__kmp_tid_from_gtid((gtid)) == 0) && | |||
810 | #if OMP_40_ENABLED(50 >= 40) | |||
811 | th->th.th_teams_microtask == NULL__null && | |||
812 | #endif | |||
813 | team->t.t_active_level == 1; | |||
814 | #endif | |||
815 | if (!active) { | |||
816 | pr = reinterpret_cast<dispatch_private_info_template<T> *>( | |||
817 | th->th.th_dispatch->th_disp_buffer); /* top of the stack */ | |||
818 | } else { | |||
819 | KMP_DEBUG_ASSERT(th->th.th_dispatch ==if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch [th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 820); } | |||
820 | &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid])if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch [th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 820); }; | |||
821 | ||||
822 | my_buffer_index = th->th.th_dispatch->th_disp_index++; | |||
823 | ||||
824 | /* What happens when number of threads changes, need to resize buffer? */ | |||
825 | pr = reinterpret_cast<dispatch_private_info_template<T> *>( | |||
826 | &th->th.th_dispatch | |||
827 | ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]); | |||
828 | sh = reinterpret_cast<dispatch_shared_info_template<T> volatile *>( | |||
829 | &team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]); | |||
830 | KD_TRACE(10, ("__kmp_dispatch_init: T#%d my_buffer_index:%d\n", gtid,if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d my_buffer_index:%d\n" , gtid, my_buffer_index); } | |||
831 | my_buffer_index))if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d my_buffer_index:%d\n" , gtid, my_buffer_index); }; | |||
832 | } | |||
833 | ||||
834 | __kmp_dispatch_init_algorithm(loc, gtid, pr, schedule, lb, ub, st, | |||
835 | #if USE_ITT_BUILD1 | |||
836 | &cur_chunk, | |||
837 | #endif | |||
838 | chunk, (T)th->th.th_team_nproc, | |||
839 | (T)th->th.th_info.ds.ds_tid); | |||
840 | if (active) { | |||
841 | if (pr->flags.ordered == 0) { | |||
842 | th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo_error; | |||
843 | th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo_error; | |||
844 | } else { | |||
845 | th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo<UT>; | |||
846 | th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo<UT>; | |||
847 | } | |||
848 | } | |||
849 | ||||
850 | if (active) { | |||
851 | /* The name of this buffer should be my_buffer_index when it's free to use | |||
852 | * it */ | |||
853 | ||||
854 | KD_TRACE(100, ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d " "sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index ); } | |||
855 | "sh->buffer_index:%d\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d " "sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index ); } | |||
856 | gtid, my_buffer_index, sh->buffer_index))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d " "sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index ); }; | |||
857 | __kmp_wait_yield<kmp_uint32>(&sh->buffer_index, my_buffer_index, | |||
858 | __kmp_eq<kmp_uint32> USE_ITT_BUILD_ARG(NULL), __null); | |||
859 | // Note: KMP_WAIT_YIELD() cannot be used there: buffer index and | |||
860 | // my_buffer_index are *always* 32-bit integers. | |||
861 | KMP_MB(); /* is this necessary? */ | |||
862 | KD_TRACE(100, ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d " "sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index ); } | |||
863 | "sh->buffer_index:%d\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d " "sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index ); } | |||
864 | gtid, my_buffer_index, sh->buffer_index))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d " "sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index ); }; | |||
865 | ||||
866 | th->th.th_dispatch->th_dispatch_pr_current = (dispatch_private_info_t *)pr; | |||
867 | th->th.th_dispatch->th_dispatch_sh_current = | |||
868 | CCAST(dispatch_shared_info_t *, (volatile dispatch_shared_info_t *)sh)const_cast<dispatch_shared_info_t *>((volatile dispatch_shared_info_t *)sh); | |||
869 | #if USE_ITT_BUILD1 | |||
870 | if (pr->flags.ordered) { | |||
871 | __kmp_itt_ordered_init(gtid); | |||
872 | } | |||
873 | // Report loop metadata | |||
874 | if (itt_need_metadata_reporting) { | |||
875 | // Only report metadata by master of active team at level 1 | |||
876 | kmp_uint64 schedtype = 0; | |||
877 | switch (schedule) { | |||
878 | case kmp_sch_static_chunked: | |||
879 | case kmp_sch_static_balanced: // Chunk is calculated in the switch above | |||
880 | break; | |||
881 | case kmp_sch_static_greedy: | |||
882 | cur_chunk = pr->u.p.parm1; | |||
883 | break; | |||
884 | case kmp_sch_dynamic_chunked: | |||
885 | schedtype = 1; | |||
886 | break; | |||
887 | case kmp_sch_guided_iterative_chunked: | |||
888 | case kmp_sch_guided_analytical_chunked: | |||
889 | #if OMP_45_ENABLED(50 >= 45) | |||
890 | case kmp_sch_guided_simd: | |||
891 | #endif | |||
892 | schedtype = 2; | |||
893 | break; | |||
894 | default: | |||
895 | // Should we put this case under "static"? | |||
896 | // case kmp_sch_static_steal: | |||
897 | schedtype = 3; | |||
898 | break; | |||
899 | } | |||
900 | __kmp_itt_metadata_loop(loc, schedtype, pr->u.p.tc, cur_chunk); | |||
901 | } | |||
902 | #if KMP_USE_HIER_SCHED0 | |||
903 | if (pr->flags.use_hier) { | |||
904 | pr->u.p.count = 0; | |||
905 | pr->u.p.ub = pr->u.p.lb = pr->u.p.st = pr->u.p.tc = 0; | |||
906 | } | |||
907 | #endif // KMP_USER_HIER_SCHED | |||
908 | #endif /* USE_ITT_BUILD */ | |||
909 | } | |||
910 | ||||
911 | #ifdef KMP_DEBUG1 | |||
912 | { | |||
913 | char *buff; | |||
914 | // create format specifiers before the debug output | |||
915 | buff = __kmp_str_format( | |||
916 | "__kmp_dispatch_init: T#%%d returning: schedule:%%d ordered:%%%s " | |||
917 | "lb:%%%s ub:%%%s" | |||
918 | " st:%%%s tc:%%%s count:%%%s\n\tordered_lower:%%%s ordered_upper:%%%s" | |||
919 | " parm1:%%%s parm2:%%%s parm3:%%%s parm4:%%%s\n", | |||
920 | traits_t<UT>::spec, traits_t<T>::spec, traits_t<T>::spec, | |||
921 | traits_t<ST>::spec, traits_t<UT>::spec, traits_t<UT>::spec, | |||
922 | traits_t<UT>::spec, traits_t<UT>::spec, traits_t<T>::spec, | |||
923 | traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec); | |||
924 | KD_TRACE(10, (buff, gtid, pr->schedule, pr->flags.ordered, pr->u.p.lb,if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, pr ->schedule, pr->flags.ordered, pr->u.p.lb, pr->u. p.ub, pr->u.p.st, pr->u.p.tc, pr->u.p.count, pr-> u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1 , pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4); } | |||
925 | pr->u.p.ub, pr->u.p.st, pr->u.p.tc, pr->u.p.count,if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, pr ->schedule, pr->flags.ordered, pr->u.p.lb, pr->u. p.ub, pr->u.p.st, pr->u.p.tc, pr->u.p.count, pr-> u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1 , pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4); } | |||
926 | pr->u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1,if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, pr ->schedule, pr->flags.ordered, pr->u.p.lb, pr->u. p.ub, pr->u.p.st, pr->u.p.tc, pr->u.p.count, pr-> u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1 , pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4); } | |||
927 | pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, pr ->schedule, pr->flags.ordered, pr->u.p.lb, pr->u. p.ub, pr->u.p.st, pr->u.p.tc, pr->u.p.count, pr-> u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1 , pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4); }; | |||
928 | __kmp_str_free(&buff); | |||
929 | } | |||
930 | #endif | |||
931 | #if (KMP_STATIC_STEAL_ENABLED1) | |||
932 | // It cannot be guaranteed that after execution of a loop with some other | |||
933 | // schedule kind all the parm3 variables will contain the same value. Even if | |||
934 | // all parm3 will be the same, it still exists a bad case like using 0 and 1 | |||
935 | // rather than program life-time increment. So the dedicated variable is | |||
936 | // required. The 'static_steal_counter' is used. | |||
937 | if (schedule == kmp_sch_static_steal) { | |||
938 | // Other threads will inspect this variable when searching for a victim. | |||
939 | // This is a flag showing that other threads may steal from this thread | |||
940 | // since then. | |||
941 | volatile T *p = &pr->u.p.static_steal_counter; | |||
942 | *p = *p + 1; | |||
943 | } | |||
944 | #endif // ( KMP_STATIC_STEAL_ENABLED ) | |||
945 | ||||
946 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
947 | if (ompt_enabled.ompt_callback_work) { | |||
948 | ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL__null); | |||
949 | ompt_task_info_t *task_info = __ompt_get_task_info_object(0); | |||
950 | ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback( | |||
951 | ompt_work_loop, ompt_scope_begin, &(team_info->parallel_data), | |||
952 | &(task_info->task_data), pr->u.p.tc, OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid)); | |||
953 | } | |||
954 | #endif | |||
955 | KMP_PUSH_PARTITIONED_TIMER(OMP_loop_dynamic)((void)0); | |||
956 | } | |||
957 | ||||
958 | /* For ordered loops, either __kmp_dispatch_finish() should be called after | |||
959 | * every iteration, or __kmp_dispatch_finish_chunk() should be called after | |||
960 | * every chunk of iterations. If the ordered section(s) were not executed | |||
961 | * for this iteration (or every iteration in this chunk), we need to set the | |||
962 | * ordered iteration counters so that the next thread can proceed. */ | |||
963 | template <typename UT> | |||
964 | static void __kmp_dispatch_finish(int gtid, ident_t *loc) { | |||
965 | typedef typename traits_t<UT>::signed_t ST; | |||
966 | kmp_info_t *th = __kmp_threads[gtid]; | |||
967 | ||||
968 | KD_TRACE(100, ("__kmp_dispatch_finish: T#%d called\n", gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d called\n" , gtid); }; | |||
969 | if (!th->th.th_team->t.t_serialized) { | |||
970 | ||||
971 | dispatch_private_info_template<UT> *pr = | |||
972 | reinterpret_cast<dispatch_private_info_template<UT> *>( | |||
973 | th->th.th_dispatch->th_dispatch_pr_current); | |||
974 | dispatch_shared_info_template<UT> volatile *sh = | |||
975 | reinterpret_cast<dispatch_shared_info_template<UT> volatile *>( | |||
976 | th->th.th_dispatch->th_dispatch_sh_current); | |||
977 | KMP_DEBUG_ASSERT(pr)if (!(pr)) { __kmp_debug_assert("pr", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 977); }; | |||
978 | KMP_DEBUG_ASSERT(sh)if (!(sh)) { __kmp_debug_assert("sh", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 978); }; | |||
979 | KMP_DEBUG_ASSERT(th->th.th_dispatch ==if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch [th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 980); } | |||
980 | &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid])if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch [th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 980); }; | |||
981 | ||||
982 | if (pr->ordered_bumped) { | |||
983 | KD_TRACE(if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n" , gtid); } | |||
984 | 1000,if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n" , gtid); } | |||
985 | ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n" , gtid); } | |||
986 | gtid))if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n" , gtid); }; | |||
987 | pr->ordered_bumped = 0; | |||
988 | } else { | |||
989 | UT lower = pr->u.p.ordered_lower; | |||
990 | ||||
991 | #ifdef KMP_DEBUG1 | |||
992 | { | |||
993 | char *buff; | |||
994 | // create format specifiers before the debug output | |||
995 | buff = __kmp_str_format("__kmp_dispatch_finish: T#%%d before wait: " | |||
996 | "ordered_iteration:%%%s lower:%%%s\n", | |||
997 | traits_t<UT>::spec, traits_t<UT>::spec); | |||
998 | KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower))if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid, sh->u.s.ordered_iteration, lower); }; | |||
999 | __kmp_str_free(&buff); | |||
1000 | } | |||
1001 | #endif | |||
1002 | ||||
1003 | __kmp_wait_yield<UT>(&sh->u.s.ordered_iteration, lower, | |||
1004 | __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL), __null); | |||
1005 | KMP_MB(); /* is this necessary? */ | |||
1006 | #ifdef KMP_DEBUG1 | |||
1007 | { | |||
1008 | char *buff; | |||
1009 | // create format specifiers before the debug output | |||
1010 | buff = __kmp_str_format("__kmp_dispatch_finish: T#%%d after wait: " | |||
1011 | "ordered_iteration:%%%s lower:%%%s\n", | |||
1012 | traits_t<UT>::spec, traits_t<UT>::spec); | |||
1013 | KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower))if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid, sh->u.s.ordered_iteration, lower); }; | |||
1014 | __kmp_str_free(&buff); | |||
1015 | } | |||
1016 | #endif | |||
1017 | ||||
1018 | test_then_inc<ST>((volatile ST *)&sh->u.s.ordered_iteration); | |||
1019 | } // if | |||
1020 | } // if | |||
1021 | KD_TRACE(100, ("__kmp_dispatch_finish: T#%d returned\n", gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d returned\n" , gtid); }; | |||
1022 | } | |||
1023 | ||||
1024 | #ifdef KMP_GOMP_COMPAT | |||
1025 | ||||
1026 | template <typename UT> | |||
1027 | static void __kmp_dispatch_finish_chunk(int gtid, ident_t *loc) { | |||
1028 | typedef typename traits_t<UT>::signed_t ST; | |||
1029 | kmp_info_t *th = __kmp_threads[gtid]; | |||
1030 | ||||
1031 | KD_TRACE(100, ("__kmp_dispatch_finish_chunk: T#%d called\n", gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_finish_chunk: T#%d called\n" , gtid); }; | |||
1032 | if (!th->th.th_team->t.t_serialized) { | |||
1033 | // int cid; | |||
1034 | dispatch_private_info_template<UT> *pr = | |||
1035 | reinterpret_cast<dispatch_private_info_template<UT> *>( | |||
1036 | th->th.th_dispatch->th_dispatch_pr_current); | |||
1037 | dispatch_shared_info_template<UT> volatile *sh = | |||
1038 | reinterpret_cast<dispatch_shared_info_template<UT> volatile *>( | |||
1039 | th->th.th_dispatch->th_dispatch_sh_current); | |||
1040 | KMP_DEBUG_ASSERT(pr)if (!(pr)) { __kmp_debug_assert("pr", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1040); }; | |||
1041 | KMP_DEBUG_ASSERT(sh)if (!(sh)) { __kmp_debug_assert("sh", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1041); }; | |||
1042 | KMP_DEBUG_ASSERT(th->th.th_dispatch ==if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch [th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1043); } | |||
1043 | &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid])if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch [th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1043); }; | |||
1044 | ||||
1045 | // for (cid = 0; cid < KMP_MAX_ORDERED; ++cid) { | |||
1046 | UT lower = pr->u.p.ordered_lower; | |||
1047 | UT upper = pr->u.p.ordered_upper; | |||
1048 | UT inc = upper - lower + 1; | |||
1049 | ||||
1050 | if (pr->ordered_bumped == inc) { | |||
1051 | KD_TRACE(if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n" , gtid); } | |||
1052 | 1000,if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n" , gtid); } | |||
1053 | ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n" , gtid); } | |||
1054 | gtid))if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n" , gtid); }; | |||
1055 | pr->ordered_bumped = 0; | |||
1056 | } else { | |||
1057 | inc -= pr->ordered_bumped; | |||
1058 | ||||
1059 | #ifdef KMP_DEBUG1 | |||
1060 | { | |||
1061 | char *buff; | |||
1062 | // create format specifiers before the debug output | |||
1063 | buff = __kmp_str_format( | |||
1064 | "__kmp_dispatch_finish_chunk: T#%%d before wait: " | |||
1065 | "ordered_iteration:%%%s lower:%%%s upper:%%%s\n", | |||
1066 | traits_t<UT>::spec, traits_t<UT>::spec, traits_t<UT>::spec); | |||
1067 | KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower, upper))if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid, sh->u.s.ordered_iteration, lower, upper); }; | |||
1068 | __kmp_str_free(&buff); | |||
1069 | } | |||
1070 | #endif | |||
1071 | ||||
1072 | __kmp_wait_yield<UT>(&sh->u.s.ordered_iteration, lower, | |||
1073 | __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL), __null); | |||
1074 | ||||
1075 | KMP_MB(); /* is this necessary? */ | |||
1076 | KD_TRACE(1000, ("__kmp_dispatch_finish_chunk: T#%d resetting "if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish_chunk: T#%d resetting " "ordered_bumped to zero\n", gtid); } | |||
1077 | "ordered_bumped to zero\n",if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish_chunk: T#%d resetting " "ordered_bumped to zero\n", gtid); } | |||
1078 | gtid))if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish_chunk: T#%d resetting " "ordered_bumped to zero\n", gtid); }; | |||
1079 | pr->ordered_bumped = 0; | |||
1080 | //!!!!! TODO check if the inc should be unsigned, or signed??? | |||
1081 | #ifdef KMP_DEBUG1 | |||
1082 | { | |||
1083 | char *buff; | |||
1084 | // create format specifiers before the debug output | |||
1085 | buff = __kmp_str_format( | |||
1086 | "__kmp_dispatch_finish_chunk: T#%%d after wait: " | |||
1087 | "ordered_iteration:%%%s inc:%%%s lower:%%%s upper:%%%s\n", | |||
1088 | traits_t<UT>::spec, traits_t<UT>::spec, traits_t<UT>::spec, | |||
1089 | traits_t<UT>::spec); | |||
1090 | KD_TRACE(1000,if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper); } | |||
1091 | (buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper))if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper); }; | |||
1092 | __kmp_str_free(&buff); | |||
1093 | } | |||
1094 | #endif | |||
1095 | ||||
1096 | test_then_add<ST>((volatile ST *)&sh->u.s.ordered_iteration, inc); | |||
1097 | } | |||
1098 | // } | |||
1099 | } | |||
1100 | KD_TRACE(100, ("__kmp_dispatch_finish_chunk: T#%d returned\n", gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_finish_chunk: T#%d returned\n" , gtid); }; | |||
1101 | } | |||
1102 | ||||
1103 | #endif /* KMP_GOMP_COMPAT */ | |||
1104 | ||||
1105 | template <typename T> | |||
1106 | int __kmp_dispatch_next_algorithm(int gtid, | |||
1107 | dispatch_private_info_template<T> *pr, | |||
1108 | dispatch_shared_info_template<T> volatile *sh, | |||
1109 | kmp_int32 *p_last, T *p_lb, T *p_ub, | |||
1110 | typename traits_t<T>::signed_t *p_st, T nproc, | |||
1111 | T tid) { | |||
1112 | typedef typename traits_t<T>::unsigned_t UT; | |||
1113 | typedef typename traits_t<T>::signed_t ST; | |||
1114 | typedef typename traits_t<T>::floating_t DBL; | |||
1115 | int status = 0; | |||
1116 | kmp_int32 last = 0; | |||
1117 | T start; | |||
1118 | ST incr; | |||
1119 | UT limit, trip, init; | |||
1120 | kmp_info_t *th = __kmp_threads[gtid]; | |||
1121 | kmp_team_t *team = th->th.th_team; | |||
1122 | ||||
1123 | KMP_DEBUG_ASSERT(th->th.th_dispatch ==if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch [th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1124); } | |||
1124 | &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid])if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch [th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1124); }; | |||
1125 | KMP_DEBUG_ASSERT(pr)if (!(pr)) { __kmp_debug_assert("pr", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1125); }; | |||
1126 | KMP_DEBUG_ASSERT(sh)if (!(sh)) { __kmp_debug_assert("sh", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1126); }; | |||
1127 | KMP_DEBUG_ASSERT(tid >= 0 && tid < nproc)if (!(tid >= 0 && tid < nproc)) { __kmp_debug_assert ("tid >= 0 && tid < nproc", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1127); }; | |||
1128 | #ifdef KMP_DEBUG1 | |||
1129 | { | |||
1130 | char *buff; | |||
1131 | // create format specifiers before the debug output | |||
1132 | buff = | |||
1133 | __kmp_str_format("__kmp_dispatch_next_algorithm: T#%%d called pr:%%p " | |||
1134 | "sh:%%p nproc:%%%s tid:%%%s\n", | |||
1135 | traits_t<T>::spec, traits_t<T>::spec); | |||
1136 | KD_TRACE(10, (buff, gtid, pr, sh, nproc, tid))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, pr , sh, nproc, tid); }; | |||
1137 | __kmp_str_free(&buff); | |||
1138 | } | |||
1139 | #endif | |||
1140 | ||||
1141 | // zero trip count | |||
1142 | if (pr->u.p.tc == 0) { | |||
1143 | KD_TRACE(10,if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d early exit trip count is " "zero status:%d\n", gtid, status); } | |||
1144 | ("__kmp_dispatch_next_algorithm: T#%d early exit trip count is "if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d early exit trip count is " "zero status:%d\n", gtid, status); } | |||
1145 | "zero status:%d\n",if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d early exit trip count is " "zero status:%d\n", gtid, status); } | |||
1146 | gtid, status))if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d early exit trip count is " "zero status:%d\n", gtid, status); }; | |||
1147 | return 0; | |||
1148 | } | |||
1149 | ||||
1150 | switch (pr->schedule) { | |||
1151 | #if (KMP_STATIC_STEAL_ENABLED1) | |||
1152 | case kmp_sch_static_steal: { | |||
1153 | T chunk = pr->u.p.parm1; | |||
1154 | ||||
1155 | KD_TRACE(100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_steal case\n" , gtid); } | |||
1156 | ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_steal case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_steal case\n" , gtid); } | |||
1157 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_steal case\n" , gtid); }; | |||
1158 | ||||
1159 | trip = pr->u.p.tc - 1; | |||
1160 | ||||
1161 | if (traits_t<T>::type_size > 4) { | |||
1162 | // use lock for 8-byte and CAS for 4-byte induction | |||
1163 | // variable. TODO (optional): check and use 16-byte CAS | |||
1164 | kmp_lock_t *lck = th->th.th_dispatch->th_steal_lock; | |||
1165 | KMP_DEBUG_ASSERT(lck != NULL)if (!(lck != __null)) { __kmp_debug_assert("lck != __null", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1165); }; | |||
1166 | if (pr->u.p.count < (UT)pr->u.p.ub) { | |||
1167 | __kmp_acquire_lock(lck, gtid); | |||
1168 | // try to get own chunk of iterations | |||
1169 | init = (pr->u.p.count)++; | |||
1170 | status = (init < (UT)pr->u.p.ub); | |||
1171 | __kmp_release_lock(lck, gtid); | |||
1172 | } else { | |||
1173 | status = 0; // no own chunks | |||
1174 | } | |||
1175 | if (!status) { // try to steal | |||
1176 | kmp_info_t **other_threads = team->t.t_threads; | |||
1177 | int while_limit = nproc; // nproc attempts to find a victim | |||
1178 | int while_index = 0; | |||
1179 | // TODO: algorithm of searching for a victim | |||
1180 | // should be cleaned up and measured | |||
1181 | while ((!status) && (while_limit != ++while_index)) { | |||
1182 | T remaining; | |||
1183 | T victimIdx = pr->u.p.parm4; | |||
1184 | T oldVictimIdx = victimIdx ? victimIdx - 1 : nproc - 1; | |||
1185 | dispatch_private_info_template<T> *victim = | |||
1186 | reinterpret_cast<dispatch_private_info_template<T> *>( | |||
1187 | other_threads[victimIdx] | |||
1188 | ->th.th_dispatch->th_dispatch_pr_current); | |||
1189 | while ((victim == NULL__null || victim == pr || | |||
1190 | (*(volatile T *)&victim->u.p.static_steal_counter != | |||
1191 | *(volatile T *)&pr->u.p.static_steal_counter)) && | |||
1192 | oldVictimIdx != victimIdx) { | |||
1193 | victimIdx = (victimIdx + 1) % nproc; | |||
1194 | victim = reinterpret_cast<dispatch_private_info_template<T> *>( | |||
1195 | other_threads[victimIdx] | |||
1196 | ->th.th_dispatch->th_dispatch_pr_current); | |||
1197 | } | |||
1198 | if (!victim || (*(volatile T *)&victim->u.p.static_steal_counter != | |||
1199 | *(volatile T *)&pr->u.p.static_steal_counter)) { | |||
1200 | continue; // try once more (nproc attempts in total) | |||
1201 | // no victim is ready yet to participate in stealing | |||
1202 | // because all victims are still in kmp_init_dispatch | |||
1203 | } | |||
1204 | if (victim->u.p.count + 2 > (UT)victim->u.p.ub) { | |||
1205 | pr->u.p.parm4 = (victimIdx + 1) % nproc; // shift start tid | |||
1206 | continue; // not enough chunks to steal, goto next victim | |||
1207 | } | |||
1208 | ||||
1209 | lck = other_threads[victimIdx]->th.th_dispatch->th_steal_lock; | |||
1210 | KMP_ASSERT(lck != NULL)if (!(lck != __null)) { __kmp_debug_assert("lck != NULL", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1210); }; | |||
1211 | __kmp_acquire_lock(lck, gtid); | |||
1212 | limit = victim->u.p.ub; // keep initial ub | |||
1213 | if (victim->u.p.count >= limit || | |||
1214 | (remaining = limit - victim->u.p.count) < 2) { | |||
1215 | __kmp_release_lock(lck, gtid); | |||
1216 | pr->u.p.parm4 = (victimIdx + 1) % nproc; // next victim | |||
1217 | continue; // not enough chunks to steal | |||
1218 | } | |||
1219 | // stealing succeded, reduce victim's ub by 1/4 of undone chunks or | |||
1220 | // by 1 | |||
1221 | if (remaining > 3) { | |||
1222 | // steal 1/4 of remaining | |||
1223 | KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen, remaining >> 2)((void)0); | |||
1224 | init = (victim->u.p.ub -= (remaining >> 2)); | |||
1225 | } else { | |||
1226 | // steal 1 chunk of 2 or 3 remaining | |||
1227 | KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen, 1)((void)0); | |||
1228 | init = (victim->u.p.ub -= 1); | |||
1229 | } | |||
1230 | __kmp_release_lock(lck, gtid); | |||
1231 | ||||
1232 | KMP_DEBUG_ASSERT(init + 1 <= limit)if (!(init + 1 <= limit)) { __kmp_debug_assert("init + 1 <= limit" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1232); }; | |||
1233 | pr->u.p.parm4 = victimIdx; // remember victim to steal from | |||
1234 | status = 1; | |||
1235 | while_index = 0; | |||
1236 | // now update own count and ub with stolen range but init chunk | |||
1237 | __kmp_acquire_lock(th->th.th_dispatch->th_steal_lock, gtid); | |||
1238 | pr->u.p.count = init + 1; | |||
1239 | pr->u.p.ub = limit; | |||
1240 | __kmp_release_lock(th->th.th_dispatch->th_steal_lock, gtid); | |||
1241 | } // while (search for victim) | |||
1242 | } // if (try to find victim and steal) | |||
1243 | } else { | |||
1244 | // 4-byte induction variable, use 8-byte CAS for pair (count, ub) | |||
1245 | typedef union { | |||
1246 | struct { | |||
1247 | UT count; | |||
1248 | T ub; | |||
1249 | } p; | |||
1250 | kmp_int64 b; | |||
1251 | } union_i4; | |||
1252 | // All operations on 'count' or 'ub' must be combined atomically | |||
1253 | // together. | |||
1254 | { | |||
1255 | union_i4 vold, vnew; | |||
1256 | vold.b = *(volatile kmp_int64 *)(&pr->u.p.count); | |||
1257 | vnew = vold; | |||
1258 | vnew.p.count++; | |||
1259 | while (!KMP_COMPARE_AND_STORE_ACQ64(__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile kmp_int64 *)&pr->u.p.count), (kmp_uint64)(*(kmp_int64 *) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b)) | |||
1260 | (volatile kmp_int64 *)&pr->u.p.count,__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile kmp_int64 *)&pr->u.p.count), (kmp_uint64)(*(kmp_int64 *) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b)) | |||
1261 | *VOLATILE_CAST(kmp_int64 *) & vold.b,__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile kmp_int64 *)&pr->u.p.count), (kmp_uint64)(*(kmp_int64 *) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b)) | |||
1262 | *VOLATILE_CAST(kmp_int64 *) & vnew.b)__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile kmp_int64 *)&pr->u.p.count), (kmp_uint64)(*(kmp_int64 *) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b))) { | |||
1263 | KMP_CPU_PAUSE()__kmp_x86_pause(); | |||
1264 | vold.b = *(volatile kmp_int64 *)(&pr->u.p.count); | |||
1265 | vnew = vold; | |||
1266 | vnew.p.count++; | |||
1267 | } | |||
1268 | vnew = vold; | |||
1269 | init = vnew.p.count; | |||
1270 | status = (init < (UT)vnew.p.ub); | |||
1271 | } | |||
1272 | ||||
1273 | if (!status) { | |||
1274 | kmp_info_t **other_threads = team->t.t_threads; | |||
1275 | int while_limit = nproc; // nproc attempts to find a victim | |||
1276 | int while_index = 0; | |||
1277 | ||||
1278 | // TODO: algorithm of searching for a victim | |||
1279 | // should be cleaned up and measured | |||
1280 | while ((!status) && (while_limit != ++while_index)) { | |||
1281 | union_i4 vold, vnew; | |||
1282 | kmp_int32 remaining; | |||
1283 | T victimIdx = pr->u.p.parm4; | |||
1284 | T oldVictimIdx = victimIdx ? victimIdx - 1 : nproc - 1; | |||
1285 | dispatch_private_info_template<T> *victim = | |||
1286 | reinterpret_cast<dispatch_private_info_template<T> *>( | |||
1287 | other_threads[victimIdx] | |||
1288 | ->th.th_dispatch->th_dispatch_pr_current); | |||
1289 | while ((victim == NULL__null || victim == pr || | |||
1290 | (*(volatile T *)&victim->u.p.static_steal_counter != | |||
1291 | *(volatile T *)&pr->u.p.static_steal_counter)) && | |||
1292 | oldVictimIdx != victimIdx) { | |||
1293 | victimIdx = (victimIdx + 1) % nproc; | |||
1294 | victim = reinterpret_cast<dispatch_private_info_template<T> *>( | |||
1295 | other_threads[victimIdx] | |||
1296 | ->th.th_dispatch->th_dispatch_pr_current); | |||
1297 | } | |||
1298 | if (!victim || (*(volatile T *)&victim->u.p.static_steal_counter != | |||
1299 | *(volatile T *)&pr->u.p.static_steal_counter)) { | |||
1300 | continue; // try once more (nproc attempts in total) | |||
1301 | // no victim is ready yet to participate in stealing | |||
1302 | // because all victims are still in kmp_init_dispatch | |||
1303 | } | |||
1304 | pr->u.p.parm4 = victimIdx; // new victim found | |||
1305 | while (1) { // CAS loop if victim has enough chunks to steal | |||
1306 | vold.b = *(volatile kmp_int64 *)(&victim->u.p.count); | |||
1307 | vnew = vold; | |||
1308 | ||||
1309 | KMP_DEBUG_ASSERT((vnew.p.ub - 1) * (UT)chunk <= trip)if (!((vnew.p.ub - 1) * (UT)chunk <= trip)) { __kmp_debug_assert ("(vnew.p.ub - 1) * (UT)chunk <= trip", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1309); }; | |||
1310 | if (vnew.p.count >= (UT)vnew.p.ub || | |||
1311 | (remaining = vnew.p.ub - vnew.p.count) < 2) { | |||
1312 | pr->u.p.parm4 = (victimIdx + 1) % nproc; // shift start victim id | |||
1313 | break; // not enough chunks to steal, goto next victim | |||
1314 | } | |||
1315 | if (remaining > 3) { | |||
1316 | vnew.p.ub -= (remaining >> 2); // try to steal 1/4 of remaining | |||
1317 | } else { | |||
1318 | vnew.p.ub -= 1; // steal 1 chunk of 2 or 3 remaining | |||
1319 | } | |||
1320 | KMP_DEBUG_ASSERT((vnew.p.ub - 1) * (UT)chunk <= trip)if (!((vnew.p.ub - 1) * (UT)chunk <= trip)) { __kmp_debug_assert ("(vnew.p.ub - 1) * (UT)chunk <= trip", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1320); }; | |||
1321 | // TODO: Should this be acquire or release? | |||
1322 | if (KMP_COMPARE_AND_STORE_ACQ64(__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile kmp_int64 *)&victim->u.p.count), (kmp_uint64)(*(kmp_int64 *) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b)) | |||
1323 | (volatile kmp_int64 *)&victim->u.p.count,__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile kmp_int64 *)&victim->u.p.count), (kmp_uint64)(*(kmp_int64 *) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b)) | |||
1324 | *VOLATILE_CAST(kmp_int64 *) & vold.b,__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile kmp_int64 *)&victim->u.p.count), (kmp_uint64)(*(kmp_int64 *) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b)) | |||
1325 | *VOLATILE_CAST(kmp_int64 *) & vnew.b)__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile kmp_int64 *)&victim->u.p.count), (kmp_uint64)(*(kmp_int64 *) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b))) { | |||
1326 | // stealing succedded | |||
1327 | KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen,((void)0) | |||
1328 | vold.p.ub - vnew.p.ub)((void)0); | |||
1329 | status = 1; | |||
1330 | while_index = 0; | |||
1331 | // now update own count and ub | |||
1332 | init = vnew.p.ub; | |||
1333 | vold.p.count = init + 1; | |||
1334 | #if KMP_ARCH_X860 | |||
1335 | KMP_XCHG_FIXED64((volatile kmp_int64 *)(&pr->u.p.count), vold.b)__sync_lock_test_and_set((volatile kmp_uint64 *)((volatile kmp_int64 *)(&pr->u.p.count)), (kmp_uint64)(vold.b)); | |||
1336 | #else | |||
1337 | *(volatile kmp_int64 *)(&pr->u.p.count) = vold.b; | |||
1338 | #endif | |||
1339 | break; | |||
1340 | } // if (check CAS result) | |||
1341 | KMP_CPU_PAUSE()__kmp_x86_pause(); // CAS failed, repeate attempt | |||
1342 | } // while (try to steal from particular victim) | |||
1343 | } // while (search for victim) | |||
1344 | } // if (try to find victim and steal) | |||
1345 | } // if (4-byte induction variable) | |||
1346 | if (!status) { | |||
1347 | *p_lb = 0; | |||
1348 | *p_ub = 0; | |||
1349 | if (p_st != NULL__null) | |||
1350 | *p_st = 0; | |||
1351 | } else { | |||
1352 | start = pr->u.p.parm2; | |||
1353 | init *= chunk; | |||
1354 | limit = chunk + init - 1; | |||
1355 | incr = pr->u.p.st; | |||
1356 | KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_chunks, 1)((void)0); | |||
1357 | ||||
1358 | KMP_DEBUG_ASSERT(init <= trip)if (!(init <= trip)) { __kmp_debug_assert("init <= trip" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1358); }; | |||
1359 | if ((last = (limit >= trip)) != 0) | |||
1360 | limit = trip; | |||
1361 | if (p_st != NULL__null) | |||
1362 | *p_st = incr; | |||
1363 | ||||
1364 | if (incr == 1) { | |||
1365 | *p_lb = start + init; | |||
1366 | *p_ub = start + limit; | |||
1367 | } else { | |||
1368 | *p_lb = start + init * incr; | |||
1369 | *p_ub = start + limit * incr; | |||
1370 | } | |||
1371 | ||||
1372 | if (pr->flags.ordered) { | |||
1373 | pr->u.p.ordered_lower = init; | |||
1374 | pr->u.p.ordered_upper = limit; | |||
1375 | } // if | |||
1376 | } // if | |||
1377 | break; | |||
1378 | } // case | |||
1379 | #endif // ( KMP_STATIC_STEAL_ENABLED ) | |||
1380 | case kmp_sch_static_balanced: { | |||
1381 | KD_TRACE(if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n" , gtid); } | |||
1382 | 10,if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n" , gtid); } | |||
1383 | ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n",if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n" , gtid); } | |||
1384 | gtid))if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n" , gtid); }; | |||
1385 | /* check if thread has any iteration to do */ | |||
1386 | if ((status = !pr->u.p.count) != 0) { | |||
1387 | pr->u.p.count = 1; | |||
1388 | *p_lb = pr->u.p.lb; | |||
1389 | *p_ub = pr->u.p.ub; | |||
1390 | last = pr->u.p.parm1; | |||
1391 | if (p_st != NULL__null) | |||
1392 | *p_st = pr->u.p.st; | |||
1393 | } else { /* no iterations to do */ | |||
1394 | pr->u.p.lb = pr->u.p.ub + pr->u.p.st; | |||
1395 | } | |||
1396 | } // case | |||
1397 | break; | |||
1398 | case kmp_sch_static_greedy: /* original code for kmp_sch_static_greedy was | |||
1399 | merged here */ | |||
1400 | case kmp_sch_static_chunked: { | |||
1401 | T parm1; | |||
1402 | ||||
1403 | KD_TRACE(100, ("__kmp_dispatch_next_algorithm: T#%d "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d " "kmp_sch_static_[affinity|chunked] case\n", gtid); } | |||
1404 | "kmp_sch_static_[affinity|chunked] case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d " "kmp_sch_static_[affinity|chunked] case\n", gtid); } | |||
1405 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d " "kmp_sch_static_[affinity|chunked] case\n", gtid); }; | |||
1406 | parm1 = pr->u.p.parm1; | |||
1407 | ||||
1408 | trip = pr->u.p.tc - 1; | |||
1409 | init = parm1 * (pr->u.p.count + tid); | |||
1410 | ||||
1411 | if ((status = (init <= trip)) != 0) { | |||
1412 | start = pr->u.p.lb; | |||
1413 | incr = pr->u.p.st; | |||
1414 | limit = parm1 + init - 1; | |||
1415 | ||||
1416 | if ((last = (limit >= trip)) != 0) | |||
1417 | limit = trip; | |||
1418 | ||||
1419 | if (p_st != NULL__null) | |||
1420 | *p_st = incr; | |||
1421 | ||||
1422 | pr->u.p.count += nproc; | |||
1423 | ||||
1424 | if (incr == 1) { | |||
1425 | *p_lb = start + init; | |||
1426 | *p_ub = start + limit; | |||
1427 | } else { | |||
1428 | *p_lb = start + init * incr; | |||
1429 | *p_ub = start + limit * incr; | |||
1430 | } | |||
1431 | ||||
1432 | if (pr->flags.ordered) { | |||
1433 | pr->u.p.ordered_lower = init; | |||
1434 | pr->u.p.ordered_upper = limit; | |||
1435 | } // if | |||
1436 | } // if | |||
1437 | } // case | |||
1438 | break; | |||
1439 | ||||
1440 | case kmp_sch_dynamic_chunked: { | |||
1441 | T chunk = pr->u.p.parm1; | |||
1442 | ||||
1443 | KD_TRACE(if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n" , gtid); } | |||
1444 | 100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n" , gtid); } | |||
1445 | ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n" , gtid); } | |||
1446 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n" , gtid); }; | |||
1447 | ||||
1448 | init = chunk * test_then_inc_acq<ST>((volatile ST *)&sh->u.s.iteration); | |||
1449 | trip = pr->u.p.tc - 1; | |||
1450 | ||||
1451 | if ((status = (init <= trip)) == 0) { | |||
1452 | *p_lb = 0; | |||
1453 | *p_ub = 0; | |||
1454 | if (p_st != NULL__null) | |||
1455 | *p_st = 0; | |||
1456 | } else { | |||
1457 | start = pr->u.p.lb; | |||
1458 | limit = chunk + init - 1; | |||
1459 | incr = pr->u.p.st; | |||
1460 | ||||
1461 | if ((last = (limit >= trip)) != 0) | |||
1462 | limit = trip; | |||
1463 | ||||
1464 | if (p_st != NULL__null) | |||
1465 | *p_st = incr; | |||
1466 | ||||
1467 | if (incr == 1) { | |||
1468 | *p_lb = start + init; | |||
1469 | *p_ub = start + limit; | |||
1470 | } else { | |||
1471 | *p_lb = start + init * incr; | |||
1472 | *p_ub = start + limit * incr; | |||
1473 | } | |||
1474 | ||||
1475 | if (pr->flags.ordered) { | |||
1476 | pr->u.p.ordered_lower = init; | |||
1477 | pr->u.p.ordered_upper = limit; | |||
1478 | } // if | |||
1479 | } // if | |||
1480 | } // case | |||
1481 | break; | |||
1482 | ||||
1483 | case kmp_sch_guided_iterative_chunked: { | |||
1484 | T chunkspec = pr->u.p.parm1; | |||
1485 | KD_TRACE(100, ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_chunked "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_chunked " "iterative case\n", gtid); } | |||
1486 | "iterative case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_chunked " "iterative case\n", gtid); } | |||
1487 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_chunked " "iterative case\n", gtid); }; | |||
1488 | trip = pr->u.p.tc; | |||
1489 | // Start atomic part of calculations | |||
1490 | while (1) { | |||
1491 | ST remaining; // signed, because can be < 0 | |||
1492 | init = sh->u.s.iteration; // shared value | |||
1493 | remaining = trip - init; | |||
1494 | if (remaining <= 0) { // AC: need to compare with 0 first | |||
1495 | // nothing to do, don't try atomic op | |||
1496 | status = 0; | |||
1497 | break; | |||
1498 | } | |||
1499 | if ((T)remaining < | |||
1500 | pr->u.p.parm2) { // compare with K*nproc*(chunk+1), K=2 by default | |||
1501 | // use dynamic-style shcedule | |||
1502 | // atomically inrement iterations, get old value | |||
1503 | init = test_then_add<ST>(RCAST(volatile ST *, &sh->u.s.iteration)reinterpret_cast<volatile ST *>(&sh->u.s.iteration ), | |||
1504 | (ST)chunkspec); | |||
1505 | remaining = trip - init; | |||
1506 | if (remaining <= 0) { | |||
1507 | status = 0; // all iterations got by other threads | |||
1508 | } else { | |||
1509 | // got some iterations to work on | |||
1510 | status = 1; | |||
1511 | if ((T)remaining > chunkspec) { | |||
1512 | limit = init + chunkspec - 1; | |||
1513 | } else { | |||
1514 | last = 1; // the last chunk | |||
1515 | limit = init + remaining - 1; | |||
1516 | } // if | |||
1517 | } // if | |||
1518 | break; | |||
1519 | } // if | |||
1520 | limit = init + | |||
1521 | (UT)(remaining * *(double *)&pr->u.p.parm3); // divide by K*nproc | |||
1522 | if (compare_and_swap<ST>(RCAST(volatile ST *, &sh->u.s.iteration)reinterpret_cast<volatile ST *>(&sh->u.s.iteration ), | |||
1523 | (ST)init, (ST)limit)) { | |||
1524 | // CAS was successful, chunk obtained | |||
1525 | status = 1; | |||
1526 | --limit; | |||
1527 | break; | |||
1528 | } // if | |||
1529 | } // while | |||
1530 | if (status != 0) { | |||
1531 | start = pr->u.p.lb; | |||
1532 | incr = pr->u.p.st; | |||
1533 | if (p_st != NULL__null) | |||
1534 | *p_st = incr; | |||
1535 | *p_lb = start + init * incr; | |||
1536 | *p_ub = start + limit * incr; | |||
1537 | if (pr->flags.ordered) { | |||
1538 | pr->u.p.ordered_lower = init; | |||
1539 | pr->u.p.ordered_upper = limit; | |||
1540 | } // if | |||
1541 | } else { | |||
1542 | *p_lb = 0; | |||
1543 | *p_ub = 0; | |||
1544 | if (p_st != NULL__null) | |||
1545 | *p_st = 0; | |||
1546 | } // if | |||
1547 | } // case | |||
1548 | break; | |||
1549 | ||||
1550 | #if OMP_45_ENABLED(50 >= 45) | |||
1551 | case kmp_sch_guided_simd: { | |||
1552 | // same as iterative but curr-chunk adjusted to be multiple of given | |||
1553 | // chunk | |||
1554 | T chunk = pr->u.p.parm1; | |||
1555 | KD_TRACE(100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_simd case\n" , gtid); } | |||
1556 | ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_simd case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_simd case\n" , gtid); } | |||
1557 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_simd case\n" , gtid); }; | |||
1558 | trip = pr->u.p.tc; | |||
1559 | // Start atomic part of calculations | |||
1560 | while (1) { | |||
1561 | ST remaining; // signed, because can be < 0 | |||
1562 | init = sh->u.s.iteration; // shared value | |||
1563 | remaining = trip - init; | |||
1564 | if (remaining <= 0) { // AC: need to compare with 0 first | |||
1565 | status = 0; // nothing to do, don't try atomic op | |||
1566 | break; | |||
1567 | } | |||
1568 | KMP_DEBUG_ASSERT(init % chunk == 0)if (!(init % chunk == 0)) { __kmp_debug_assert("init % chunk == 0" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1568); }; | |||
1569 | // compare with K*nproc*(chunk+1), K=2 by default | |||
1570 | if ((T)remaining < pr->u.p.parm2) { | |||
1571 | // use dynamic-style shcedule | |||
1572 | // atomically inrement iterations, get old value | |||
1573 | init = test_then_add<ST>(RCAST(volatile ST *, &sh->u.s.iteration)reinterpret_cast<volatile ST *>(&sh->u.s.iteration ), | |||
1574 | (ST)chunk); | |||
1575 | remaining = trip - init; | |||
1576 | if (remaining <= 0) { | |||
1577 | status = 0; // all iterations got by other threads | |||
1578 | } else { | |||
1579 | // got some iterations to work on | |||
1580 | status = 1; | |||
1581 | if ((T)remaining > chunk) { | |||
1582 | limit = init + chunk - 1; | |||
1583 | } else { | |||
1584 | last = 1; // the last chunk | |||
1585 | limit = init + remaining - 1; | |||
1586 | } // if | |||
1587 | } // if | |||
1588 | break; | |||
1589 | } // if | |||
1590 | // divide by K*nproc | |||
1591 | UT span = remaining * (*(double *)&pr->u.p.parm3); | |||
1592 | UT rem = span % chunk; | |||
1593 | if (rem) // adjust so that span%chunk == 0 | |||
1594 | span += chunk - rem; | |||
1595 | limit = init + span; | |||
1596 | if (compare_and_swap<ST>(RCAST(volatile ST *, &sh->u.s.iteration)reinterpret_cast<volatile ST *>(&sh->u.s.iteration ), | |||
1597 | (ST)init, (ST)limit)) { | |||
1598 | // CAS was successful, chunk obtained | |||
1599 | status = 1; | |||
1600 | --limit; | |||
1601 | break; | |||
1602 | } // if | |||
1603 | } // while | |||
1604 | if (status != 0) { | |||
1605 | start = pr->u.p.lb; | |||
1606 | incr = pr->u.p.st; | |||
1607 | if (p_st != NULL__null) | |||
1608 | *p_st = incr; | |||
1609 | *p_lb = start + init * incr; | |||
1610 | *p_ub = start + limit * incr; | |||
1611 | if (pr->flags.ordered) { | |||
1612 | pr->u.p.ordered_lower = init; | |||
1613 | pr->u.p.ordered_upper = limit; | |||
1614 | } // if | |||
1615 | } else { | |||
1616 | *p_lb = 0; | |||
1617 | *p_ub = 0; | |||
1618 | if (p_st != NULL__null) | |||
1619 | *p_st = 0; | |||
1620 | } // if | |||
1621 | } // case | |||
1622 | break; | |||
1623 | #endif // OMP_45_ENABLED | |||
1624 | ||||
1625 | case kmp_sch_guided_analytical_chunked: { | |||
1626 | T chunkspec = pr->u.p.parm1; | |||
1627 | UT chunkIdx; | |||
1628 | #if KMP_OS_WINDOWS0 && KMP_ARCH_X860 | |||
1629 | /* for storing original FPCW value for Windows* OS on | |||
1630 | IA-32 architecture 8-byte version */ | |||
1631 | unsigned int oldFpcw; | |||
1632 | unsigned int fpcwSet = 0; | |||
1633 | #endif | |||
1634 | KD_TRACE(100, ("__kmp_dispatch_next_algorithm: T#%d "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d " "kmp_sch_guided_analytical_chunked case\n", gtid); } | |||
1635 | "kmp_sch_guided_analytical_chunked case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d " "kmp_sch_guided_analytical_chunked case\n", gtid); } | |||
1636 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d " "kmp_sch_guided_analytical_chunked case\n", gtid); }; | |||
1637 | ||||
1638 | trip = pr->u.p.tc; | |||
1639 | ||||
1640 | KMP_DEBUG_ASSERT(nproc > 1)if (!(nproc > 1)) { __kmp_debug_assert("nproc > 1", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1640); }; | |||
1641 | KMP_DEBUG_ASSERT((2UL * chunkspec + 1) * (UT)nproc < trip)if (!((2UL * chunkspec + 1) * (UT)nproc < trip)) { __kmp_debug_assert ("(2UL * chunkspec + 1) * (UT)nproc < trip", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1641); }; | |||
1642 | ||||
1643 | while (1) { /* this while loop is a safeguard against unexpected zero | |||
1644 | chunk sizes */ | |||
1645 | chunkIdx = test_then_inc_acq<ST>((volatile ST *)&sh->u.s.iteration); | |||
1646 | if (chunkIdx >= (UT)pr->u.p.parm2) { | |||
1647 | --trip; | |||
1648 | /* use dynamic-style scheduling */ | |||
1649 | init = chunkIdx * chunkspec + pr->u.p.count; | |||
1650 | /* need to verify init > 0 in case of overflow in the above | |||
1651 | * calculation */ | |||
1652 | if ((status = (init > 0 && init <= trip)) != 0) { | |||
1653 | limit = init + chunkspec - 1; | |||
1654 | ||||
1655 | if ((last = (limit >= trip)) != 0) | |||
1656 | limit = trip; | |||
1657 | } | |||
1658 | break; | |||
1659 | } else { | |||
1660 | /* use exponential-style scheduling */ | |||
1661 | /* The following check is to workaround the lack of long double precision on | |||
1662 | Windows* OS. | |||
1663 | This check works around the possible effect that init != 0 for chunkIdx == 0. | |||
1664 | */ | |||
1665 | #if KMP_OS_WINDOWS0 && KMP_ARCH_X860 | |||
1666 | /* If we haven't already done so, save original | |||
1667 | FPCW and set precision to 64-bit, as Windows* OS | |||
1668 | on IA-32 architecture defaults to 53-bit */ | |||
1669 | if (!fpcwSet) { | |||
1670 | oldFpcw = _control87(0, 0); | |||
1671 | _control87(_PC_64, _MCW_PC); | |||
1672 | fpcwSet = 0x30000; | |||
1673 | } | |||
1674 | #endif | |||
1675 | if (chunkIdx) { | |||
1676 | init = __kmp_dispatch_guided_remaining<T>( | |||
1677 | trip, *(DBL *)&pr->u.p.parm3, chunkIdx); | |||
1678 | KMP_DEBUG_ASSERT(init)if (!(init)) { __kmp_debug_assert("init", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1678); }; | |||
1679 | init = trip - init; | |||
1680 | } else | |||
1681 | init = 0; | |||
1682 | limit = trip - __kmp_dispatch_guided_remaining<T>( | |||
1683 | trip, *(DBL *)&pr->u.p.parm3, chunkIdx + 1); | |||
1684 | KMP_ASSERT(init <= limit)if (!(init <= limit)) { __kmp_debug_assert("init <= limit" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1684); }; | |||
1685 | if (init < limit) { | |||
1686 | KMP_DEBUG_ASSERT(limit <= trip)if (!(limit <= trip)) { __kmp_debug_assert("limit <= trip" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1686); }; | |||
1687 | --limit; | |||
1688 | status = 1; | |||
1689 | break; | |||
1690 | } // if | |||
1691 | } // if | |||
1692 | } // while (1) | |||
1693 | #if KMP_OS_WINDOWS0 && KMP_ARCH_X860 | |||
1694 | /* restore FPCW if necessary | |||
1695 | AC: check fpcwSet flag first because oldFpcw can be uninitialized here | |||
1696 | */ | |||
1697 | if (fpcwSet && (oldFpcw & fpcwSet)) | |||
1698 | _control87(oldFpcw, _MCW_PC); | |||
1699 | #endif | |||
1700 | if (status != 0) { | |||
1701 | start = pr->u.p.lb; | |||
1702 | incr = pr->u.p.st; | |||
1703 | if (p_st != NULL__null) | |||
1704 | *p_st = incr; | |||
1705 | *p_lb = start + init * incr; | |||
1706 | *p_ub = start + limit * incr; | |||
1707 | if (pr->flags.ordered) { | |||
1708 | pr->u.p.ordered_lower = init; | |||
1709 | pr->u.p.ordered_upper = limit; | |||
1710 | } | |||
1711 | } else { | |||
1712 | *p_lb = 0; | |||
1713 | *p_ub = 0; | |||
1714 | if (p_st != NULL__null) | |||
1715 | *p_st = 0; | |||
1716 | } | |||
1717 | } // case | |||
1718 | break; | |||
1719 | ||||
1720 | case kmp_sch_trapezoidal: { | |||
1721 | UT index; | |||
1722 | T parm2 = pr->u.p.parm2; | |||
1723 | T parm3 = pr->u.p.parm3; | |||
1724 | T parm4 = pr->u.p.parm4; | |||
1725 | KD_TRACE(100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_trapezoidal case\n" , gtid); } | |||
1726 | ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_trapezoidal case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_trapezoidal case\n" , gtid); } | |||
1727 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_trapezoidal case\n" , gtid); }; | |||
1728 | ||||
1729 | index = test_then_inc<ST>((volatile ST *)&sh->u.s.iteration); | |||
1730 | ||||
1731 | init = (index * ((2 * parm2) - (index - 1) * parm4)) / 2; | |||
1732 | trip = pr->u.p.tc - 1; | |||
1733 | ||||
1734 | if ((status = ((T)index < parm3 && init <= trip)) == 0) { | |||
1735 | *p_lb = 0; | |||
1736 | *p_ub = 0; | |||
1737 | if (p_st != NULL__null) | |||
1738 | *p_st = 0; | |||
1739 | } else { | |||
1740 | start = pr->u.p.lb; | |||
1741 | limit = ((index + 1) * (2 * parm2 - index * parm4)) / 2 - 1; | |||
1742 | incr = pr->u.p.st; | |||
1743 | ||||
1744 | if ((last = (limit >= trip)) != 0) | |||
1745 | limit = trip; | |||
1746 | ||||
1747 | if (p_st != NULL__null) | |||
1748 | *p_st = incr; | |||
1749 | ||||
1750 | if (incr == 1) { | |||
1751 | *p_lb = start + init; | |||
1752 | *p_ub = start + limit; | |||
1753 | } else { | |||
1754 | *p_lb = start + init * incr; | |||
1755 | *p_ub = start + limit * incr; | |||
1756 | } | |||
1757 | ||||
1758 | if (pr->flags.ordered) { | |||
1759 | pr->u.p.ordered_lower = init; | |||
1760 | pr->u.p.ordered_upper = limit; | |||
1761 | } // if | |||
1762 | } // if | |||
1763 | } // case | |||
1764 | break; | |||
1765 | default: { | |||
1766 | status = 0; // to avoid complaints on uninitialized variable use | |||
1767 | __kmp_fatal(KMP_MSG(UnknownSchedTypeDetected)__kmp_msg_format(kmp_i18n_msg_UnknownSchedTypeDetected), // Primary message | |||
1768 | KMP_HNT(GetNewerLibrary)__kmp_msg_format(kmp_i18n_hnt_GetNewerLibrary), // Hint | |||
1769 | __kmp_msg_null // Variadic argument list terminator | |||
1770 | ); | |||
1771 | } break; | |||
1772 | } // switch | |||
1773 | if (p_last) | |||
1774 | *p_last = last; | |||
1775 | #ifdef KMP_DEBUG1 | |||
1776 | if (pr->flags.ordered) { | |||
1777 | char *buff; | |||
1778 | // create format specifiers before the debug output | |||
1779 | buff = __kmp_str_format("__kmp_dispatch_next_algorithm: T#%%d " | |||
1780 | "ordered_lower:%%%s ordered_upper:%%%s\n", | |||
1781 | traits_t<UT>::spec, traits_t<UT>::spec); | |||
1782 | KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper))if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper); }; | |||
1783 | __kmp_str_free(&buff); | |||
1784 | } | |||
1785 | { | |||
1786 | char *buff; | |||
1787 | // create format specifiers before the debug output | |||
1788 | buff = __kmp_str_format( | |||
1789 | "__kmp_dispatch_next_algorithm: T#%%d exit status:%%d p_last:%%d " | |||
1790 | "p_lb:%%%s p_ub:%%%s p_st:%%%s\n", | |||
1791 | traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec); | |||
1792 | KD_TRACE(10, (buff, gtid, status, *p_last, *p_lb, *p_ub, *p_st))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, status , *p_last, *p_lb, *p_ub, *p_st); }; | |||
1793 | __kmp_str_free(&buff); | |||
1794 | } | |||
1795 | #endif | |||
1796 | return status; | |||
1797 | } | |||
1798 | ||||
1799 | /* Define a macro for exiting __kmp_dispatch_next(). If status is 0 (no more | |||
1800 | work), then tell OMPT the loop is over. In some cases kmp_dispatch_fini() | |||
1801 | is not called. */ | |||
1802 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
1803 | #define OMPT_LOOP_ENDif (status == 0) { if (ompt_enabled.ompt_callback_work) { ompt_team_info_t *team_info = __ompt_get_teaminfo(0, __null); ompt_task_info_t *task_info = __ompt_get_task_info_object(0); ompt_callbacks. ompt_callback_work_callback( ompt_work_loop, ompt_scope_end, & (team_info->parallel_data), &(task_info->task_data) , 0, codeptr); } } \ | |||
1804 | if (status == 0) { \ | |||
1805 | if (ompt_enabled.ompt_callback_work) { \ | |||
1806 | ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL__null); \ | |||
1807 | ompt_task_info_t *task_info = __ompt_get_task_info_object(0); \ | |||
1808 | ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback( \ | |||
1809 | ompt_work_loop, ompt_scope_end, &(team_info->parallel_data), \ | |||
1810 | &(task_info->task_data), 0, codeptr); \ | |||
1811 | } \ | |||
1812 | } | |||
1813 | // TODO: implement count | |||
1814 | #else | |||
1815 | #define OMPT_LOOP_ENDif (status == 0) { if (ompt_enabled.ompt_callback_work) { ompt_team_info_t *team_info = __ompt_get_teaminfo(0, __null); ompt_task_info_t *task_info = __ompt_get_task_info_object(0); ompt_callbacks. ompt_callback_work_callback( ompt_work_loop, ompt_scope_end, & (team_info->parallel_data), &(task_info->task_data) , 0, codeptr); } } // no-op | |||
1816 | #endif | |||
1817 | ||||
1818 | #if KMP_STATS_ENABLED0 | |||
1819 | #define KMP_STATS_LOOP_END \ | |||
1820 | { \ | |||
1821 | kmp_int64 u, l, t, i; \ | |||
1822 | l = (kmp_int64)(*p_lb); \ | |||
1823 | u = (kmp_int64)(*p_ub); \ | |||
1824 | i = (kmp_int64)(pr->u.p.st); \ | |||
1825 | if (status == 0) { \ | |||
1826 | t = 0; \ | |||
1827 | KMP_POP_PARTITIONED_TIMER()((void)0); \ | |||
1828 | } else if (i == 1) { \ | |||
1829 | if (u >= l) \ | |||
1830 | t = u - l + 1; \ | |||
1831 | else \ | |||
1832 | t = 0; \ | |||
1833 | } else if (i < 0) { \ | |||
1834 | if (l >= u) \ | |||
1835 | t = (l - u) / (-i) + 1; \ | |||
1836 | else \ | |||
1837 | t = 0; \ | |||
1838 | } else { \ | |||
1839 | if (u >= l) \ | |||
1840 | t = (u - l) / i + 1; \ | |||
1841 | else \ | |||
1842 | t = 0; \ | |||
1843 | } \ | |||
1844 | KMP_COUNT_VALUE(OMP_loop_dynamic_iterations, t)((void)0); \ | |||
1845 | } | |||
1846 | #else | |||
1847 | #define KMP_STATS_LOOP_END /* Nothing */ | |||
1848 | #endif | |||
1849 | ||||
1850 | template <typename T> | |||
1851 | static int __kmp_dispatch_next(ident_t *loc, int gtid, kmp_int32 *p_last, | |||
1852 | T *p_lb, T *p_ub, | |||
1853 | typename traits_t<T>::signed_t *p_st | |||
1854 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
1855 | , | |||
1856 | void *codeptr | |||
1857 | #endif | |||
1858 | ) { | |||
1859 | ||||
1860 | typedef typename traits_t<T>::unsigned_t UT; | |||
1861 | typedef typename traits_t<T>::signed_t ST; | |||
1862 | // This is potentially slightly misleading, schedule(runtime) will appear here | |||
1863 | // even if the actual runtme schedule is static. (Which points out a | |||
1864 | // disadavantage of schedule(runtime): even when static scheduling is used it | |||
1865 | // costs more than a compile time choice to use static scheduling would.) | |||
1866 | KMP_TIME_PARTITIONED_BLOCK(OMP_loop_dynamic_scheduling)((void)0); | |||
1867 | ||||
1868 | int status; | |||
1869 | dispatch_private_info_template<T> *pr; | |||
1870 | kmp_info_t *th = __kmp_threads[gtid]; | |||
1871 | kmp_team_t *team = th->th.th_team; | |||
1872 | ||||
1873 | KMP_DEBUG_ASSERT(p_lb && p_ub && p_st)if (!(p_lb && p_ub && p_st)) { __kmp_debug_assert ("p_lb && p_ub && p_st", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1873); }; // AC: these cannot be NULL | |||
1874 | KD_TRACE(if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n" , gtid, p_lb, p_ub, p_st, p_last); } | |||
1875 | 1000,if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n" , gtid, p_lb, p_ub, p_st, p_last); } | |||
1876 | ("__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n",if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n" , gtid, p_lb, p_ub, p_st, p_last); } | |||
1877 | gtid, p_lb, p_ub, p_st, p_last))if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n" , gtid, p_lb, p_ub, p_st, p_last); }; | |||
1878 | ||||
1879 | if (team->t.t_serialized) { | |||
1880 | /* NOTE: serialize this dispatch becase we are not at the active level */ | |||
1881 | pr = reinterpret_cast<dispatch_private_info_template<T> *>( | |||
1882 | th->th.th_dispatch->th_disp_buffer); /* top of the stack */ | |||
1883 | KMP_DEBUG_ASSERT(pr)if (!(pr)) { __kmp_debug_assert("pr", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1883); }; | |||
1884 | ||||
1885 | if ((status = (pr->u.p.tc != 0)) == 0) { | |||
1886 | *p_lb = 0; | |||
1887 | *p_ub = 0; | |||
1888 | // if ( p_last != NULL ) | |||
1889 | // *p_last = 0; | |||
1890 | if (p_st != NULL__null) | |||
1891 | *p_st = 0; | |||
1892 | if (__kmp_env_consistency_check) { | |||
1893 | if (pr->pushed_ws != ct_none) { | |||
1894 | pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc); | |||
1895 | } | |||
1896 | } | |||
1897 | } else if (pr->flags.nomerge) { | |||
1898 | kmp_int32 last; | |||
1899 | T start; | |||
1900 | UT limit, trip, init; | |||
1901 | ST incr; | |||
1902 | T chunk = pr->u.p.parm1; | |||
1903 | ||||
1904 | KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n" , gtid); } | |||
1905 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n" , gtid); }; | |||
1906 | ||||
1907 | init = chunk * pr->u.p.count++; | |||
1908 | trip = pr->u.p.tc - 1; | |||
1909 | ||||
1910 | if ((status = (init <= trip)) == 0) { | |||
1911 | *p_lb = 0; | |||
1912 | *p_ub = 0; | |||
1913 | // if ( p_last != NULL ) | |||
1914 | // *p_last = 0; | |||
1915 | if (p_st != NULL__null) | |||
1916 | *p_st = 0; | |||
1917 | if (__kmp_env_consistency_check) { | |||
1918 | if (pr->pushed_ws != ct_none) { | |||
1919 | pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc); | |||
1920 | } | |||
1921 | } | |||
1922 | } else { | |||
1923 | start = pr->u.p.lb; | |||
1924 | limit = chunk + init - 1; | |||
1925 | incr = pr->u.p.st; | |||
1926 | ||||
1927 | if ((last = (limit >= trip)) != 0) { | |||
1928 | limit = trip; | |||
1929 | #if KMP_OS_WINDOWS0 | |||
1930 | pr->u.p.last_upper = pr->u.p.ub; | |||
1931 | #endif /* KMP_OS_WINDOWS */ | |||
1932 | } | |||
1933 | if (p_last != NULL__null) | |||
1934 | *p_last = last; | |||
1935 | if (p_st != NULL__null) | |||
1936 | *p_st = incr; | |||
1937 | if (incr == 1) { | |||
1938 | *p_lb = start + init; | |||
1939 | *p_ub = start + limit; | |||
1940 | } else { | |||
1941 | *p_lb = start + init * incr; | |||
1942 | *p_ub = start + limit * incr; | |||
1943 | } | |||
1944 | ||||
1945 | if (pr->flags.ordered) { | |||
1946 | pr->u.p.ordered_lower = init; | |||
1947 | pr->u.p.ordered_upper = limit; | |||
1948 | #ifdef KMP_DEBUG1 | |||
1949 | { | |||
1950 | char *buff; | |||
1951 | // create format specifiers before the debug output | |||
1952 | buff = __kmp_str_format("__kmp_dispatch_next: T#%%d " | |||
1953 | "ordered_lower:%%%s ordered_upper:%%%s\n", | |||
1954 | traits_t<UT>::spec, traits_t<UT>::spec); | |||
1955 | KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper); } | |||
1956 | pr->u.p.ordered_upper))if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper); }; | |||
1957 | __kmp_str_free(&buff); | |||
1958 | } | |||
1959 | #endif | |||
1960 | } // if | |||
1961 | } // if | |||
1962 | } else { | |||
1963 | pr->u.p.tc = 0; | |||
1964 | *p_lb = pr->u.p.lb; | |||
1965 | *p_ub = pr->u.p.ub; | |||
1966 | #if KMP_OS_WINDOWS0 | |||
1967 | pr->u.p.last_upper = *p_ub; | |||
1968 | #endif /* KMP_OS_WINDOWS */ | |||
1969 | if (p_last != NULL__null) | |||
1970 | *p_last = TRUE(!0); | |||
1971 | if (p_st != NULL__null) | |||
1972 | *p_st = pr->u.p.st; | |||
1973 | } // if | |||
1974 | #ifdef KMP_DEBUG1 | |||
1975 | { | |||
1976 | char *buff; | |||
1977 | // create format specifiers before the debug output | |||
1978 | buff = __kmp_str_format( | |||
1979 | "__kmp_dispatch_next: T#%%d serialized case: p_lb:%%%s " | |||
1980 | "p_ub:%%%s p_st:%%%s p_last:%%p %%d returning:%%d\n", | |||
1981 | traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec); | |||
1982 | KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, *p_st, p_last, *p_last, status))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, * p_lb, *p_ub, *p_st, p_last, *p_last, status); }; | |||
1983 | __kmp_str_free(&buff); | |||
1984 | } | |||
1985 | #endif | |||
1986 | #if INCLUDE_SSC_MARKS(1 && 1) | |||
1987 | SSC_MARK_DISPATCH_NEXT()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(0xd697) : "%ebx"); | |||
1988 | #endif | |||
1989 | OMPT_LOOP_ENDif (status == 0) { if (ompt_enabled.ompt_callback_work) { ompt_team_info_t *team_info = __ompt_get_teaminfo(0, __null); ompt_task_info_t *task_info = __ompt_get_task_info_object(0); ompt_callbacks. ompt_callback_work_callback( ompt_work_loop, ompt_scope_end, & (team_info->parallel_data), &(task_info->task_data) , 0, codeptr); } }; | |||
1990 | KMP_STATS_LOOP_END; | |||
1991 | return status; | |||
1992 | } else { | |||
1993 | kmp_int32 last = 0; | |||
1994 | dispatch_shared_info_template<T> volatile *sh; | |||
1995 | ||||
1996 | KMP_DEBUG_ASSERT(th->th.th_dispatch ==if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch [th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1997); } | |||
1997 | &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid])if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch [th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1997); }; | |||
1998 | ||||
1999 | pr = reinterpret_cast<dispatch_private_info_template<T> *>( | |||
2000 | th->th.th_dispatch->th_dispatch_pr_current); | |||
2001 | KMP_DEBUG_ASSERT(pr)if (!(pr)) { __kmp_debug_assert("pr", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2001); }; | |||
2002 | sh = reinterpret_cast<dispatch_shared_info_template<T> volatile *>( | |||
2003 | th->th.th_dispatch->th_dispatch_sh_current); | |||
2004 | KMP_DEBUG_ASSERT(sh)if (!(sh)) { __kmp_debug_assert("sh", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2004); }; | |||
2005 | ||||
2006 | #if KMP_USE_HIER_SCHED0 | |||
2007 | if (pr->flags.use_hier) | |||
2008 | status = sh->hier->next(loc, gtid, pr, &last, p_lb, p_ub, p_st); | |||
2009 | else | |||
2010 | #endif // KMP_USE_HIER_SCHED | |||
2011 | status = __kmp_dispatch_next_algorithm<T>(gtid, pr, sh, &last, p_lb, p_ub, | |||
2012 | p_st, th->th.th_team_nproc, | |||
2013 | th->th.th_info.ds.ds_tid); | |||
2014 | // status == 0: no more iterations to execute | |||
2015 | if (status == 0) { | |||
2016 | UT num_done; | |||
2017 | ||||
2018 | num_done = test_then_inc<ST>((volatile ST *)&sh->u.s.num_done); | |||
2019 | #ifdef KMP_DEBUG1 | |||
2020 | { | |||
2021 | char *buff; | |||
2022 | // create format specifiers before the debug output | |||
2023 | buff = __kmp_str_format( | |||
2024 | "__kmp_dispatch_next: T#%%d increment num_done:%%%s\n", | |||
2025 | traits_t<UT>::spec); | |||
2026 | KD_TRACE(10, (buff, gtid, sh->u.s.num_done))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, sh ->u.s.num_done); }; | |||
2027 | __kmp_str_free(&buff); | |||
2028 | } | |||
2029 | #endif | |||
2030 | ||||
2031 | #if KMP_USE_HIER_SCHED0 | |||
2032 | pr->flags.use_hier = FALSE0; | |||
2033 | #endif | |||
2034 | if ((ST)num_done == th->th.th_team_nproc - 1) { | |||
2035 | #if (KMP_STATIC_STEAL_ENABLED1) | |||
2036 | if (pr->schedule == kmp_sch_static_steal && | |||
2037 | traits_t<T>::type_size > 4) { | |||
2038 | int i; | |||
2039 | kmp_info_t **other_threads = team->t.t_threads; | |||
2040 | // loop complete, safe to destroy locks used for stealing | |||
2041 | for (i = 0; i < th->th.th_team_nproc; ++i) { | |||
2042 | kmp_lock_t *lck = other_threads[i]->th.th_dispatch->th_steal_lock; | |||
2043 | KMP_ASSERT(lck != NULL)if (!(lck != __null)) { __kmp_debug_assert("lck != NULL", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2043); }; | |||
2044 | __kmp_destroy_lock(lck); | |||
2045 | __kmp_free(lck)___kmp_free((lck), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2045); | |||
2046 | other_threads[i]->th.th_dispatch->th_steal_lock = NULL__null; | |||
2047 | } | |||
2048 | } | |||
2049 | #endif | |||
2050 | /* NOTE: release this buffer to be reused */ | |||
2051 | ||||
2052 | KMP_MB(); /* Flush all pending memory write invalidates. */ | |||
2053 | ||||
2054 | sh->u.s.num_done = 0; | |||
2055 | sh->u.s.iteration = 0; | |||
2056 | ||||
2057 | /* TODO replace with general release procedure? */ | |||
2058 | if (pr->flags.ordered) { | |||
2059 | sh->u.s.ordered_iteration = 0; | |||
2060 | } | |||
2061 | ||||
2062 | KMP_MB(); /* Flush all pending memory write invalidates. */ | |||
2063 | ||||
2064 | sh->buffer_index += __kmp_dispatch_num_buffers; | |||
2065 | KD_TRACE(100, ("__kmp_dispatch_next: T#%d change buffer_index:%d\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d change buffer_index:%d\n" , gtid, sh->buffer_index); } | |||
2066 | gtid, sh->buffer_index))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d change buffer_index:%d\n" , gtid, sh->buffer_index); }; | |||
2067 | ||||
2068 | KMP_MB(); /* Flush all pending memory write invalidates. */ | |||
2069 | ||||
2070 | } // if | |||
2071 | if (__kmp_env_consistency_check) { | |||
2072 | if (pr->pushed_ws != ct_none) { | |||
2073 | pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc); | |||
2074 | } | |||
2075 | } | |||
2076 | ||||
2077 | th->th.th_dispatch->th_deo_fcn = NULL__null; | |||
2078 | th->th.th_dispatch->th_dxo_fcn = NULL__null; | |||
2079 | th->th.th_dispatch->th_dispatch_sh_current = NULL__null; | |||
2080 | th->th.th_dispatch->th_dispatch_pr_current = NULL__null; | |||
2081 | } // if (status == 0) | |||
2082 | #if KMP_OS_WINDOWS0 | |||
2083 | else if (last) { | |||
2084 | pr->u.p.last_upper = pr->u.p.ub; | |||
2085 | } | |||
2086 | #endif /* KMP_OS_WINDOWS */ | |||
2087 | if (p_last != NULL__null && status != 0) | |||
2088 | *p_last = last; | |||
2089 | } // if | |||
2090 | ||||
2091 | #ifdef KMP_DEBUG1 | |||
2092 | { | |||
2093 | char *buff; | |||
2094 | // create format specifiers before the debug output | |||
2095 | buff = __kmp_str_format( | |||
2096 | "__kmp_dispatch_next: T#%%d normal case: " | |||
2097 | "p_lb:%%%s p_ub:%%%s p_st:%%%s p_last:%%p (%%d) returning:%%d\n", | |||
2098 | traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec); | |||
2099 | KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last,if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, * p_lb, *p_ub, p_st ? *p_st : 0, p_last, (p_last ? *p_last : 0) , status); } | |||
2100 | (p_last ? *p_last : 0), status))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, * p_lb, *p_ub, p_st ? *p_st : 0, p_last, (p_last ? *p_last : 0) , status); }; | |||
2101 | __kmp_str_free(&buff); | |||
2102 | } | |||
2103 | #endif | |||
2104 | #if INCLUDE_SSC_MARKS(1 && 1) | |||
2105 | SSC_MARK_DISPATCH_NEXT()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(0xd697) : "%ebx"); | |||
2106 | #endif | |||
2107 | OMPT_LOOP_ENDif (status == 0) { if (ompt_enabled.ompt_callback_work) { ompt_team_info_t *team_info = __ompt_get_teaminfo(0, __null); ompt_task_info_t *task_info = __ompt_get_task_info_object(0); ompt_callbacks. ompt_callback_work_callback( ompt_work_loop, ompt_scope_end, & (team_info->parallel_data), &(task_info->task_data) , 0, codeptr); } }; | |||
2108 | KMP_STATS_LOOP_END; | |||
2109 | return status; | |||
2110 | } | |||
2111 | ||||
2112 | template <typename T> | |||
2113 | static void __kmp_dist_get_bounds(ident_t *loc, kmp_int32 gtid, | |||
2114 | kmp_int32 *plastiter, T *plower, T *pupper, | |||
2115 | typename traits_t<T>::signed_t incr) { | |||
2116 | typedef typename traits_t<T>::unsigned_t UT; | |||
2117 | kmp_uint32 team_id; | |||
2118 | kmp_uint32 nteams; | |||
2119 | UT trip_count; | |||
2120 | kmp_team_t *team; | |||
2121 | kmp_info_t *th; | |||
2122 | ||||
2123 | KMP_DEBUG_ASSERT(plastiter && plower && pupper)if (!(plastiter && plower && pupper)) { __kmp_debug_assert ("plastiter && plower && pupper", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2123); }; | |||
2124 | KE_TRACE(10, ("__kmpc_dist_get_bounds called (%d)\n", gtid))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_dist_get_bounds called (%d)\n" , gtid); }; | |||
2125 | #ifdef KMP_DEBUG1 | |||
2126 | typedef typename traits_t<T>::signed_t ST; | |||
2127 | { | |||
2128 | char *buff; | |||
2129 | // create format specifiers before the debug output | |||
2130 | buff = __kmp_str_format("__kmpc_dist_get_bounds: T#%%d liter=%%d " | |||
2131 | "iter=(%%%s, %%%s, %%%s) signed?<%s>\n", | |||
2132 | traits_t<T>::spec, traits_t<T>::spec, | |||
2133 | traits_t<ST>::spec, traits_t<T>::spec); | |||
2134 | KD_TRACE(100, (buff, gtid, *plastiter, *plower, *pupper, incr))if (kmp_d_debug >= 100) { __kmp_debug_printf (buff, gtid, * plastiter, *plower, *pupper, incr); }; | |||
2135 | __kmp_str_free(&buff); | |||
2136 | } | |||
2137 | #endif | |||
2138 | ||||
2139 | if (__kmp_env_consistency_check) { | |||
2140 | if (incr == 0) { | |||
2141 | __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, | |||
2142 | loc); | |||
2143 | } | |||
2144 | if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { | |||
2145 | // The loop is illegal. | |||
2146 | // Some zero-trip loops maintained by compiler, e.g.: | |||
2147 | // for(i=10;i<0;++i) // lower >= upper - run-time check | |||
2148 | // for(i=0;i>10;--i) // lower <= upper - run-time check | |||
2149 | // for(i=0;i>10;++i) // incr > 0 - compile-time check | |||
2150 | // for(i=10;i<0;--i) // incr < 0 - compile-time check | |||
2151 | // Compiler does not check the following illegal loops: | |||
2152 | // for(i=0;i<10;i+=incr) // where incr<0 | |||
2153 | // for(i=10;i>0;i-=incr) // where incr<0 | |||
2154 | __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); | |||
2155 | } | |||
2156 | } | |||
2157 | th = __kmp_threads[gtid]; | |||
2158 | team = th->th.th_team; | |||
2159 | #if OMP_40_ENABLED(50 >= 40) | |||
2160 | KMP_DEBUG_ASSERT(th->th.th_teams_microtask)if (!(th->th.th_teams_microtask)) { __kmp_debug_assert("th->th.th_teams_microtask" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2160); }; // we are in the teams construct | |||
2161 | nteams = th->th.th_teams_size.nteams; | |||
2162 | #endif | |||
2163 | team_id = team->t.t_master_tid; | |||
2164 | KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc)if (!(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc )) { __kmp_debug_assert("nteams == (kmp_uint32)team->t.t_parent->t.t_nproc" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2164); }; | |||
2165 | ||||
2166 | // compute global trip count | |||
2167 | if (incr == 1) { | |||
2168 | trip_count = *pupper - *plower + 1; | |||
2169 | } else if (incr == -1) { | |||
2170 | trip_count = *plower - *pupper + 1; | |||
2171 | } else if (incr > 0) { | |||
2172 | // upper-lower can exceed the limit of signed type | |||
2173 | trip_count = (UT)(*pupper - *plower) / incr + 1; | |||
2174 | } else { | |||
2175 | trip_count = (UT)(*plower - *pupper) / (-incr) + 1; | |||
2176 | } | |||
2177 | ||||
2178 | if (trip_count <= nteams) { | |||
2179 | KMP_DEBUG_ASSERT(if (!(__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced)) { __kmp_debug_assert("__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2182); } | |||
2180 | __kmp_static == kmp_sch_static_greedy ||if (!(__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced)) { __kmp_debug_assert("__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2182); } | |||
2181 | __kmp_static ==if (!(__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced)) { __kmp_debug_assert("__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2182); } | |||
2182 | kmp_sch_static_balanced)if (!(__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced)) { __kmp_debug_assert("__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2182); }; // Unknown static scheduling type. | |||
2183 | // only some teams get single iteration, others get nothing | |||
2184 | if (team_id < trip_count) { | |||
2185 | *pupper = *plower = *plower + team_id * incr; | |||
2186 | } else { | |||
2187 | *plower = *pupper + incr; // zero-trip loop | |||
2188 | } | |||
2189 | if (plastiter != NULL__null) | |||
2190 | *plastiter = (team_id == trip_count - 1); | |||
2191 | } else { | |||
2192 | if (__kmp_static == kmp_sch_static_balanced) { | |||
2193 | UT chunk = trip_count / nteams; | |||
2194 | UT extras = trip_count % nteams; | |||
2195 | *plower += | |||
2196 | incr * (team_id * chunk + (team_id < extras ? team_id : extras)); | |||
2197 | *pupper = *plower + chunk * incr - (team_id < extras ? 0 : incr); | |||
2198 | if (plastiter != NULL__null) | |||
2199 | *plastiter = (team_id == nteams - 1); | |||
2200 | } else { | |||
2201 | T chunk_inc_count = | |||
2202 | (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr; | |||
2203 | T upper = *pupper; | |||
2204 | KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy)if (!(__kmp_static == kmp_sch_static_greedy)) { __kmp_debug_assert ("__kmp_static == kmp_sch_static_greedy", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2204); }; | |||
2205 | // Unknown static scheduling type. | |||
2206 | *plower += team_id * chunk_inc_count; | |||
2207 | *pupper = *plower + chunk_inc_count - incr; | |||
2208 | // Check/correct bounds if needed | |||
2209 | if (incr > 0) { | |||
2210 | if (*pupper < *plower) | |||
2211 | *pupper = traits_t<T>::max_value; | |||
2212 | if (plastiter != NULL__null) | |||
2213 | *plastiter = *plower <= upper && *pupper > upper - incr; | |||
2214 | if (*pupper > upper) | |||
2215 | *pupper = upper; // tracker C73258 | |||
2216 | } else { | |||
2217 | if (*pupper > *plower) | |||
2218 | *pupper = traits_t<T>::min_value; | |||
2219 | if (plastiter != NULL__null) | |||
2220 | *plastiter = *plower >= upper && *pupper < upper - incr; | |||
2221 | if (*pupper < upper) | |||
2222 | *pupper = upper; // tracker C73258 | |||
2223 | } | |||
2224 | } | |||
2225 | } | |||
2226 | } | |||
2227 | ||||
2228 | //----------------------------------------------------------------------------- | |||
2229 | // Dispatch routines | |||
2230 | // Transfer call to template< type T > | |||
2231 | // __kmp_dispatch_init( ident_t *loc, int gtid, enum sched_type schedule, | |||
2232 | // T lb, T ub, ST st, ST chunk ) | |||
2233 | extern "C" { | |||
2234 | ||||
2235 | /*! | |||
2236 | @ingroup WORK_SHARING | |||
2237 | @{ | |||
2238 | @param loc Source location | |||
2239 | @param gtid Global thread id | |||
2240 | @param schedule Schedule type | |||
2241 | @param lb Lower bound | |||
2242 | @param ub Upper bound | |||
2243 | @param st Step (or increment if you prefer) | |||
2244 | @param chunk The chunk size to block with | |||
2245 | ||||
2246 | This function prepares the runtime to start a dynamically scheduled for loop, | |||
2247 | saving the loop arguments. | |||
2248 | These functions are all identical apart from the types of the arguments. | |||
2249 | */ | |||
2250 | ||||
2251 | void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid, | |||
2252 | enum sched_type schedule, kmp_int32 lb, | |||
2253 | kmp_int32 ub, kmp_int32 st, kmp_int32 chunk) { | |||
2254 | KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2254); }; | |||
2255 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
2256 | OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads [gtid] && !__kmp_threads[gtid]->th.ompt_thread_info .return_address) __kmp_threads[gtid]->th.ompt_thread_info. return_address = __builtin_return_address(0); | |||
2257 | #endif | |||
2258 | __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk, true); | |||
2259 | } | |||
2260 | /*! | |||
2261 | See @ref __kmpc_dispatch_init_4 | |||
2262 | */ | |||
2263 | void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, | |||
2264 | enum sched_type schedule, kmp_uint32 lb, | |||
2265 | kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk) { | |||
2266 | KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2266); }; | |||
2267 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
2268 | OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads [gtid] && !__kmp_threads[gtid]->th.ompt_thread_info .return_address) __kmp_threads[gtid]->th.ompt_thread_info. return_address = __builtin_return_address(0); | |||
2269 | #endif | |||
2270 | __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk, true); | |||
2271 | } | |||
2272 | ||||
2273 | /*! | |||
2274 | See @ref __kmpc_dispatch_init_4 | |||
2275 | */ | |||
2276 | void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid, | |||
2277 | enum sched_type schedule, kmp_int64 lb, | |||
2278 | kmp_int64 ub, kmp_int64 st, kmp_int64 chunk) { | |||
2279 | KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2279); }; | |||
2280 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
2281 | OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads [gtid] && !__kmp_threads[gtid]->th.ompt_thread_info .return_address) __kmp_threads[gtid]->th.ompt_thread_info. return_address = __builtin_return_address(0); | |||
2282 | #endif | |||
2283 | __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk, true); | |||
2284 | } | |||
2285 | ||||
2286 | /*! | |||
2287 | See @ref __kmpc_dispatch_init_4 | |||
2288 | */ | |||
2289 | void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, | |||
2290 | enum sched_type schedule, kmp_uint64 lb, | |||
2291 | kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk) { | |||
2292 | KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2292); }; | |||
2293 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
2294 | OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads [gtid] && !__kmp_threads[gtid]->th.ompt_thread_info .return_address) __kmp_threads[gtid]->th.ompt_thread_info. return_address = __builtin_return_address(0); | |||
2295 | #endif | |||
2296 | __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk, true); | |||
2297 | } | |||
2298 | ||||
2299 | /*! | |||
2300 | See @ref __kmpc_dispatch_init_4 | |||
2301 | ||||
2302 | Difference from __kmpc_dispatch_init set of functions is these functions | |||
2303 | are called for composite distribute parallel for construct. Thus before | |||
2304 | regular iterations dispatching we need to calc per-team iteration space. | |||
2305 | ||||
2306 | These functions are all identical apart from the types of the arguments. | |||
2307 | */ | |||
2308 | void __kmpc_dist_dispatch_init_4(ident_t *loc, kmp_int32 gtid, | |||
2309 | enum sched_type schedule, kmp_int32 *p_last, | |||
2310 | kmp_int32 lb, kmp_int32 ub, kmp_int32 st, | |||
2311 | kmp_int32 chunk) { | |||
2312 | KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2312); }; | |||
2313 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
2314 | OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads [gtid] && !__kmp_threads[gtid]->th.ompt_thread_info .return_address) __kmp_threads[gtid]->th.ompt_thread_info. return_address = __builtin_return_address(0); | |||
2315 | #endif | |||
2316 | __kmp_dist_get_bounds<kmp_int32>(loc, gtid, p_last, &lb, &ub, st); | |||
2317 | __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk, true); | |||
2318 | } | |||
2319 | ||||
2320 | void __kmpc_dist_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, | |||
2321 | enum sched_type schedule, kmp_int32 *p_last, | |||
2322 | kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, | |||
2323 | kmp_int32 chunk) { | |||
2324 | KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2324); }; | |||
2325 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
2326 | OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads [gtid] && !__kmp_threads[gtid]->th.ompt_thread_info .return_address) __kmp_threads[gtid]->th.ompt_thread_info. return_address = __builtin_return_address(0); | |||
2327 | #endif | |||
2328 | __kmp_dist_get_bounds<kmp_uint32>(loc, gtid, p_last, &lb, &ub, st); | |||
2329 | __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk, true); | |||
2330 | } | |||
2331 | ||||
2332 | void __kmpc_dist_dispatch_init_8(ident_t *loc, kmp_int32 gtid, | |||
2333 | enum sched_type schedule, kmp_int32 *p_last, | |||
2334 | kmp_int64 lb, kmp_int64 ub, kmp_int64 st, | |||
2335 | kmp_int64 chunk) { | |||
2336 | KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2336); }; | |||
2337 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
2338 | OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads [gtid] && !__kmp_threads[gtid]->th.ompt_thread_info .return_address) __kmp_threads[gtid]->th.ompt_thread_info. return_address = __builtin_return_address(0); | |||
2339 | #endif | |||
2340 | __kmp_dist_get_bounds<kmp_int64>(loc, gtid, p_last, &lb, &ub, st); | |||
2341 | __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk, true); | |||
2342 | } | |||
2343 | ||||
2344 | void __kmpc_dist_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, | |||
2345 | enum sched_type schedule, kmp_int32 *p_last, | |||
2346 | kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, | |||
2347 | kmp_int64 chunk) { | |||
2348 | KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2348); }; | |||
2349 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
2350 | OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads [gtid] && !__kmp_threads[gtid]->th.ompt_thread_info .return_address) __kmp_threads[gtid]->th.ompt_thread_info. return_address = __builtin_return_address(0); | |||
2351 | #endif | |||
2352 | __kmp_dist_get_bounds<kmp_uint64>(loc, gtid, p_last, &lb, &ub, st); | |||
2353 | __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk, true); | |||
2354 | } | |||
2355 | ||||
2356 | /*! | |||
2357 | @param loc Source code location | |||
2358 | @param gtid Global thread id | |||
2359 | @param p_last Pointer to a flag set to one if this is the last chunk or zero | |||
2360 | otherwise | |||
2361 | @param p_lb Pointer to the lower bound for the next chunk of work | |||
2362 | @param p_ub Pointer to the upper bound for the next chunk of work | |||
2363 | @param p_st Pointer to the stride for the next chunk of work | |||
2364 | @return one if there is work to be done, zero otherwise | |||
2365 | ||||
2366 | Get the next dynamically allocated chunk of work for this thread. | |||
2367 | If there is no more work, then the lb,ub and stride need not be modified. | |||
2368 | */ | |||
2369 | int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, | |||
2370 | kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st) { | |||
2371 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
2372 | OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads [gtid] && !__kmp_threads[gtid]->th.ompt_thread_info .return_address) __kmp_threads[gtid]->th.ompt_thread_info. return_address = __builtin_return_address(0); | |||
2373 | #endif | |||
2374 | return __kmp_dispatch_next<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st | |||
2375 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
2376 | , | |||
2377 | OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid) | |||
2378 | #endif | |||
2379 | ); | |||
2380 | } | |||
2381 | ||||
2382 | /*! | |||
2383 | See @ref __kmpc_dispatch_next_4 | |||
2384 | */ | |||
2385 | int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, | |||
2386 | kmp_uint32 *p_lb, kmp_uint32 *p_ub, | |||
2387 | kmp_int32 *p_st) { | |||
2388 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
2389 | OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads [gtid] && !__kmp_threads[gtid]->th.ompt_thread_info .return_address) __kmp_threads[gtid]->th.ompt_thread_info. return_address = __builtin_return_address(0); | |||
2390 | #endif | |||
2391 | return __kmp_dispatch_next<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st | |||
2392 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
2393 | , | |||
2394 | OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid) | |||
2395 | #endif | |||
2396 | ); | |||
2397 | } | |||
2398 | ||||
2399 | /*! | |||
2400 | See @ref __kmpc_dispatch_next_4 | |||
2401 | */ | |||
2402 | int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, | |||
2403 | kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st) { | |||
2404 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
2405 | OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads [gtid] && !__kmp_threads[gtid]->th.ompt_thread_info .return_address) __kmp_threads[gtid]->th.ompt_thread_info. return_address = __builtin_return_address(0); | |||
2406 | #endif | |||
2407 | return __kmp_dispatch_next<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st | |||
2408 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
2409 | , | |||
2410 | OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid) | |||
2411 | #endif | |||
2412 | ); | |||
2413 | } | |||
2414 | ||||
2415 | /*! | |||
2416 | See @ref __kmpc_dispatch_next_4 | |||
2417 | */ | |||
2418 | int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, | |||
2419 | kmp_uint64 *p_lb, kmp_uint64 *p_ub, | |||
2420 | kmp_int64 *p_st) { | |||
2421 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
2422 | OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads [gtid] && !__kmp_threads[gtid]->th.ompt_thread_info .return_address) __kmp_threads[gtid]->th.ompt_thread_info. return_address = __builtin_return_address(0); | |||
2423 | #endif | |||
2424 | return __kmp_dispatch_next<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st | |||
2425 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
2426 | , | |||
2427 | OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid) | |||
2428 | #endif | |||
2429 | ); | |||
2430 | } | |||
2431 | ||||
2432 | /*! | |||
2433 | @param loc Source code location | |||
2434 | @param gtid Global thread id | |||
2435 | ||||
2436 | Mark the end of a dynamic loop. | |||
2437 | */ | |||
2438 | void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid) { | |||
2439 | __kmp_dispatch_finish<kmp_uint32>(gtid, loc); | |||
2440 | } | |||
2441 | ||||
2442 | /*! | |||
2443 | See @ref __kmpc_dispatch_fini_4 | |||
2444 | */ | |||
2445 | void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid) { | |||
2446 | __kmp_dispatch_finish<kmp_uint64>(gtid, loc); | |||
2447 | } | |||
2448 | ||||
2449 | /*! | |||
2450 | See @ref __kmpc_dispatch_fini_4 | |||
2451 | */ | |||
2452 | void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid) { | |||
2453 | __kmp_dispatch_finish<kmp_uint32>(gtid, loc); | |||
2454 | } | |||
2455 | ||||
2456 | /*! | |||
2457 | See @ref __kmpc_dispatch_fini_4 | |||
2458 | */ | |||
2459 | void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid) { | |||
2460 | __kmp_dispatch_finish<kmp_uint64>(gtid, loc); | |||
2461 | } | |||
2462 | /*! @} */ | |||
2463 | ||||
2464 | //----------------------------------------------------------------------------- | |||
2465 | // Non-template routines from kmp_dispatch.cpp used in other sources | |||
2466 | ||||
2467 | kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker) { | |||
2468 | return value == checker; | |||
2469 | } | |||
2470 | ||||
2471 | kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker) { | |||
2472 | return value != checker; | |||
2473 | } | |||
2474 | ||||
2475 | kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker) { | |||
2476 | return value < checker; | |||
2477 | } | |||
2478 | ||||
2479 | kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker) { | |||
2480 | return value >= checker; | |||
2481 | } | |||
2482 | ||||
2483 | kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker) { | |||
2484 | return value <= checker; | |||
2485 | } | |||
2486 | ||||
2487 | kmp_uint32 | |||
2488 | __kmp_wait_yield_4(volatile kmp_uint32 *spinner, kmp_uint32 checker, | |||
2489 | kmp_uint32 (*pred)(kmp_uint32, kmp_uint32), | |||
2490 | void *obj // Higher-level synchronization object, or NULL. | |||
2491 | ) { | |||
2492 | // note: we may not belong to a team at this point | |||
2493 | volatile kmp_uint32 *spin = spinner; | |||
2494 | kmp_uint32 check = checker; | |||
2495 | kmp_uint32 spins; | |||
2496 | kmp_uint32 (*f)(kmp_uint32, kmp_uint32) = pred; | |||
2497 | kmp_uint32 r; | |||
2498 | ||||
2499 | KMP_FSYNC_SPIN_INIT(obj, CCAST(kmp_uint32 *, spin))int sync_iters = 0; if (__kmp_itt_fsync_prepare_ptr__3_0) { if (obj == __null) { obj = const_cast<kmp_uint32 *>(spin) ; } } __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(0x4376) : "%ebx"); | |||
2500 | KMP_INIT_YIELD(spins){ (spins) = __kmp_yield_init; }; | |||
2501 | // main wait spin loop | |||
2502 | while (!f(r = TCR_4(*spin)(*spin), check)) { | |||
2503 | KMP_FSYNC_SPIN_PREPARE(obj)do { if (__kmp_itt_fsync_prepare_ptr__3_0 && sync_iters < __kmp_itt_prepare_delay) { ++sync_iters; if (sync_iters >= __kmp_itt_prepare_delay) { (!__kmp_itt_fsync_prepare_ptr__3_0 ) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)((void *)obj)); } } } while (0); | |||
2504 | /* GEH - remove this since it was accidentally introduced when kmp_wait was | |||
2505 | split. It causes problems with infinite recursion because of exit lock */ | |||
2506 | /* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort) | |||
2507 | __kmp_abort_thread(); */ | |||
2508 | ||||
2509 | /* if we have waited a bit, or are oversubscribed, yield */ | |||
2510 | /* pause is in the following code */ | |||
2511 | KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc){ __kmp_x86_pause(); __kmp_yield(((__kmp_nth) > __kmp_avail_proc )); }; | |||
2512 | KMP_YIELD_SPIN(spins){ __kmp_x86_pause(); (spins) -= 2; if (!(spins)) { __kmp_yield (1); (spins) = __kmp_yield_next; } }; | |||
2513 | } | |||
2514 | KMP_FSYNC_SPIN_ACQUIRED(obj)do { __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(0x4377) : "%ebx"); if (sync_iters >= __kmp_itt_prepare_delay ) { (!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0 ((void *)((void *)obj)); } } while (0); | |||
2515 | return r; | |||
2516 | } | |||
2517 | ||||
2518 | void __kmp_wait_yield_4_ptr( | |||
2519 | void *spinner, kmp_uint32 checker, kmp_uint32 (*pred)(void *, kmp_uint32), | |||
2520 | void *obj // Higher-level synchronization object, or NULL. | |||
2521 | ) { | |||
2522 | // note: we may not belong to a team at this point | |||
2523 | void *spin = spinner; | |||
2524 | kmp_uint32 check = checker; | |||
2525 | kmp_uint32 spins; | |||
2526 | kmp_uint32 (*f)(void *, kmp_uint32) = pred; | |||
2527 | ||||
2528 | KMP_FSYNC_SPIN_INIT(obj, spin)int sync_iters = 0; if (__kmp_itt_fsync_prepare_ptr__3_0) { if (obj == __null) { obj = spin; } } __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(0x4376) : "%ebx"); | |||
2529 | KMP_INIT_YIELD(spins){ (spins) = __kmp_yield_init; }; | |||
2530 | // main wait spin loop | |||
2531 | while (!f(spin, check)) { | |||
2532 | KMP_FSYNC_SPIN_PREPARE(obj)do { if (__kmp_itt_fsync_prepare_ptr__3_0 && sync_iters < __kmp_itt_prepare_delay) { ++sync_iters; if (sync_iters >= __kmp_itt_prepare_delay) { (!__kmp_itt_fsync_prepare_ptr__3_0 ) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)((void *)obj)); } } } while (0); | |||
2533 | /* if we have waited a bit, or are oversubscribed, yield */ | |||
2534 | /* pause is in the following code */ | |||
2535 | KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc){ __kmp_x86_pause(); __kmp_yield(((__kmp_nth) > __kmp_avail_proc )); }; | |||
2536 | KMP_YIELD_SPIN(spins){ __kmp_x86_pause(); (spins) -= 2; if (!(spins)) { __kmp_yield (1); (spins) = __kmp_yield_next; } }; | |||
2537 | } | |||
2538 | KMP_FSYNC_SPIN_ACQUIRED(obj)do { __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(0x4377) : "%ebx"); if (sync_iters >= __kmp_itt_prepare_delay ) { (!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0 ((void *)((void *)obj)); } } while (0); | |||
2539 | } | |||
2540 | ||||
2541 | } // extern "C" | |||
2542 | ||||
2543 | #ifdef KMP_GOMP_COMPAT | |||
2544 | ||||
2545 | void __kmp_aux_dispatch_init_4(ident_t *loc, kmp_int32 gtid, | |||
2546 | enum sched_type schedule, kmp_int32 lb, | |||
2547 | kmp_int32 ub, kmp_int32 st, kmp_int32 chunk, | |||
2548 | int push_ws) { | |||
2549 | __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk, | |||
2550 | push_ws); | |||
2551 | } | |||
2552 | ||||
2553 | void __kmp_aux_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, | |||
2554 | enum sched_type schedule, kmp_uint32 lb, | |||
2555 | kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk, | |||
2556 | int push_ws) { | |||
2557 | __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk, | |||
2558 | push_ws); | |||
2559 | } | |||
2560 | ||||
2561 | void __kmp_aux_dispatch_init_8(ident_t *loc, kmp_int32 gtid, | |||
2562 | enum sched_type schedule, kmp_int64 lb, | |||
2563 | kmp_int64 ub, kmp_int64 st, kmp_int64 chunk, | |||
2564 | int push_ws) { | |||
2565 | __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk, | |||
2566 | push_ws); | |||
2567 | } | |||
2568 | ||||
2569 | void __kmp_aux_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, | |||
2570 | enum sched_type schedule, kmp_uint64 lb, | |||
2571 | kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk, | |||
2572 | int push_ws) { | |||
2573 | __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk, | |||
2574 | push_ws); | |||
2575 | } | |||
2576 | ||||
2577 | void __kmp_aux_dispatch_fini_chunk_4(ident_t *loc, kmp_int32 gtid) { | |||
2578 | __kmp_dispatch_finish_chunk<kmp_uint32>(gtid, loc); | |||
2579 | } | |||
2580 | ||||
2581 | void __kmp_aux_dispatch_fini_chunk_8(ident_t *loc, kmp_int32 gtid) { | |||
2582 | __kmp_dispatch_finish_chunk<kmp_uint64>(gtid, loc); | |||
2583 | } | |||
2584 | ||||
2585 | void __kmp_aux_dispatch_fini_chunk_4u(ident_t *loc, kmp_int32 gtid) { | |||
2586 | __kmp_dispatch_finish_chunk<kmp_uint32>(gtid, loc); | |||
2587 | } | |||
2588 | ||||
2589 | void __kmp_aux_dispatch_fini_chunk_8u(ident_t *loc, kmp_int32 gtid) { | |||
2590 | __kmp_dispatch_finish_chunk<kmp_uint64>(gtid, loc); | |||
2591 | } | |||
2592 | ||||
2593 | #endif /* KMP_GOMP_COMPAT */ | |||
2594 | ||||
2595 | /* ------------------------------------------------------------------------ */ |