File: | projects/openmp/runtime/src/kmp_dispatch.cpp |
Warning: | line 1982, column 7 Dereference of null pointer (loaded from variable 'p_last') |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* | |||||||
2 | * kmp_dispatch.cpp: dynamic scheduling - iteration initialization and dispatch. | |||||||
3 | */ | |||||||
4 | ||||||||
5 | //===----------------------------------------------------------------------===// | |||||||
6 | // | |||||||
7 | // The LLVM Compiler Infrastructure | |||||||
8 | // | |||||||
9 | // This file is dual licensed under the MIT and the University of Illinois Open | |||||||
10 | // Source Licenses. See LICENSE.txt for details. | |||||||
11 | // | |||||||
12 | //===----------------------------------------------------------------------===// | |||||||
13 | ||||||||
14 | /* Dynamic scheduling initialization and dispatch. | |||||||
15 | * | |||||||
16 | * NOTE: __kmp_nth is a constant inside of any dispatch loop, however | |||||||
17 | * it may change values between parallel regions. __kmp_max_nth | |||||||
18 | * is the largest value __kmp_nth may take, 1 is the smallest. | |||||||
19 | */ | |||||||
20 | ||||||||
21 | #include "kmp.h" | |||||||
22 | #include "kmp_error.h" | |||||||
23 | #include "kmp_i18n.h" | |||||||
24 | #include "kmp_itt.h" | |||||||
25 | #include "kmp_stats.h" | |||||||
26 | #include "kmp_str.h" | |||||||
27 | #if KMP_OS_WINDOWS0 && KMP_ARCH_X860 | |||||||
28 | #include <float.h> | |||||||
29 | #endif | |||||||
30 | #include "kmp_lock.h" | |||||||
31 | #include "kmp_dispatch.h" | |||||||
32 | #if KMP_USE_HIER_SCHED0 | |||||||
33 | #include "kmp_dispatch_hier.h" | |||||||
34 | #endif | |||||||
35 | ||||||||
36 | #if OMPT_SUPPORT1 | |||||||
37 | #include "ompt-specific.h" | |||||||
38 | #endif | |||||||
39 | ||||||||
40 | /* ------------------------------------------------------------------------ */ | |||||||
41 | /* ------------------------------------------------------------------------ */ | |||||||
42 | ||||||||
43 | void __kmp_dispatch_deo_error(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { | |||||||
44 | kmp_info_t *th; | |||||||
45 | ||||||||
46 | KMP_DEBUG_ASSERT(gtid_ref)if (!(gtid_ref)) { __kmp_debug_assert("gtid_ref", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 46); }; | |||||||
47 | ||||||||
48 | if (__kmp_env_consistency_check) { | |||||||
49 | th = __kmp_threads[*gtid_ref]; | |||||||
50 | if (th->th.th_root->r.r_active && | |||||||
51 | (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws != ct_none)) { | |||||||
52 | #if KMP_USE_DYNAMIC_LOCK1 | |||||||
53 | __kmp_push_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref, NULL__null, 0); | |||||||
54 | #else | |||||||
55 | __kmp_push_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref, NULL__null); | |||||||
56 | #endif | |||||||
57 | } | |||||||
58 | } | |||||||
59 | } | |||||||
60 | ||||||||
61 | void __kmp_dispatch_dxo_error(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { | |||||||
62 | kmp_info_t *th; | |||||||
63 | ||||||||
64 | if (__kmp_env_consistency_check) { | |||||||
65 | th = __kmp_threads[*gtid_ref]; | |||||||
66 | if (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws != ct_none) { | |||||||
67 | __kmp_pop_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref); | |||||||
68 | } | |||||||
69 | } | |||||||
70 | } | |||||||
71 | ||||||||
72 | // Initialize a dispatch_private_info_template<T> buffer for a particular | |||||||
73 | // type of schedule,chunk. The loop description is found in lb (lower bound), | |||||||
74 | // ub (upper bound), and st (stride). nproc is the number of threads relevant | |||||||
75 | // to the scheduling (often the number of threads in a team, but not always if | |||||||
76 | // hierarchical scheduling is used). tid is the id of the thread calling | |||||||
77 | // the function within the group of nproc threads. It will have a value | |||||||
78 | // between 0 and nproc - 1. This is often just the thread id within a team, but | |||||||
79 | // is not necessarily the case when using hierarchical scheduling. | |||||||
80 | // loc is the source file location of the corresponding loop | |||||||
81 | // gtid is the global thread id | |||||||
82 | template <typename T> | |||||||
83 | void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid, | |||||||
84 | dispatch_private_info_template<T> *pr, | |||||||
85 | enum sched_type schedule, T lb, T ub, | |||||||
86 | typename traits_t<T>::signed_t st, | |||||||
87 | #if USE_ITT_BUILD1 | |||||||
88 | kmp_uint64 *cur_chunk, | |||||||
89 | #endif | |||||||
90 | typename traits_t<T>::signed_t chunk, | |||||||
91 | T nproc, T tid) { | |||||||
92 | typedef typename traits_t<T>::unsigned_t UT; | |||||||
93 | typedef typename traits_t<T>::floating_t DBL; | |||||||
94 | ||||||||
95 | int active; | |||||||
96 | T tc; | |||||||
97 | kmp_info_t *th; | |||||||
98 | kmp_team_t *team; | |||||||
99 | ||||||||
100 | #ifdef KMP_DEBUG1 | |||||||
101 | typedef typename traits_t<T>::signed_t ST; | |||||||
102 | { | |||||||
103 | char *buff; | |||||||
104 | // create format specifiers before the debug output | |||||||
105 | buff = __kmp_str_format("__kmp_dispatch_init_algorithm: T#%%d called " | |||||||
106 | "pr:%%p lb:%%%s ub:%%%s st:%%%s " | |||||||
107 | "schedule:%%d chunk:%%%s nproc:%%%s tid:%%%s\n", | |||||||
108 | traits_t<T>::spec, traits_t<T>::spec, | |||||||
109 | traits_t<ST>::spec, traits_t<ST>::spec, | |||||||
110 | traits_t<T>::spec, traits_t<T>::spec); | |||||||
111 | KD_TRACE(10, (buff, gtid, pr, lb, ub, st, schedule, chunk, nproc, tid))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, pr , lb, ub, st, schedule, chunk, nproc, tid); }; | |||||||
112 | __kmp_str_free(&buff); | |||||||
113 | } | |||||||
114 | #endif | |||||||
115 | /* setup data */ | |||||||
116 | th = __kmp_threads[gtid]; | |||||||
117 | team = th->th.th_team; | |||||||
118 | active = !team->t.t_serialized; | |||||||
119 | ||||||||
120 | #if USE_ITT_BUILD1 | |||||||
121 | int itt_need_metadata_reporting = __itt_metadata_add_ptr__kmp_itt_metadata_add_ptr__3_0 && | |||||||
122 | __kmp_forkjoin_frames_mode == 3 && | |||||||
123 | KMP_MASTER_GTID(gtid)(__kmp_tid_from_gtid((gtid)) == 0) && | |||||||
124 | #if OMP_40_ENABLED(50 >= 40) | |||||||
125 | th->th.th_teams_microtask == NULL__null && | |||||||
126 | #endif | |||||||
127 | team->t.t_active_level == 1; | |||||||
128 | #endif | |||||||
129 | #if (KMP_STATIC_STEAL_ENABLED1) | |||||||
130 | if (SCHEDULE_HAS_NONMONOTONIC(schedule)(((schedule)&kmp_sch_modifier_nonmonotonic) != 0)) | |||||||
131 | // AC: we now have only one implementation of stealing, so use it | |||||||
132 | schedule = kmp_sch_static_steal; | |||||||
133 | else | |||||||
134 | #endif | |||||||
135 | schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule)(enum sched_type)( (schedule) & ~(kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)); | |||||||
136 | ||||||||
137 | /* Pick up the nomerge/ordered bits from the scheduling type */ | |||||||
138 | if ((schedule >= kmp_nm_lower) && (schedule < kmp_nm_upper)) { | |||||||
139 | pr->flags.nomerge = TRUE(!0); | |||||||
140 | schedule = | |||||||
141 | (enum sched_type)(((int)schedule) - (kmp_nm_lower - kmp_sch_lower)); | |||||||
142 | } else { | |||||||
143 | pr->flags.nomerge = FALSE0; | |||||||
144 | } | |||||||
145 | pr->type_size = traits_t<T>::type_size; // remember the size of variables | |||||||
146 | if (kmp_ord_lower & schedule) { | |||||||
147 | pr->flags.ordered = TRUE(!0); | |||||||
148 | schedule = | |||||||
149 | (enum sched_type)(((int)schedule) - (kmp_ord_lower - kmp_sch_lower)); | |||||||
150 | } else { | |||||||
151 | pr->flags.ordered = FALSE0; | |||||||
152 | } | |||||||
153 | ||||||||
154 | if (schedule == kmp_sch_static) { | |||||||
155 | schedule = __kmp_static; | |||||||
156 | } else { | |||||||
157 | if (schedule == kmp_sch_runtime) { | |||||||
158 | // Use the scheduling specified by OMP_SCHEDULE (or __kmp_sch_default if | |||||||
159 | // not specified) | |||||||
160 | schedule = team->t.t_sched.r_sched_type; | |||||||
161 | // Detail the schedule if needed (global controls are differentiated | |||||||
162 | // appropriately) | |||||||
163 | if (schedule == kmp_sch_guided_chunked) { | |||||||
164 | schedule = __kmp_guided; | |||||||
165 | } else if (schedule == kmp_sch_static) { | |||||||
166 | schedule = __kmp_static; | |||||||
167 | } | |||||||
168 | // Use the chunk size specified by OMP_SCHEDULE (or default if not | |||||||
169 | // specified) | |||||||
170 | chunk = team->t.t_sched.chunk; | |||||||
171 | #if USE_ITT_BUILD1 | |||||||
172 | if (cur_chunk) | |||||||
173 | *cur_chunk = chunk; | |||||||
174 | #endif | |||||||
175 | #ifdef KMP_DEBUG1 | |||||||
176 | { | |||||||
177 | char *buff; | |||||||
178 | // create format specifiers before the debug output | |||||||
179 | buff = __kmp_str_format("__kmp_dispatch_init_algorithm: T#%%d new: " | |||||||
180 | "schedule:%%d chunk:%%%s\n", | |||||||
181 | traits_t<ST>::spec); | |||||||
182 | KD_TRACE(10, (buff, gtid, schedule, chunk))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, schedule , chunk); }; | |||||||
183 | __kmp_str_free(&buff); | |||||||
184 | } | |||||||
185 | #endif | |||||||
186 | } else { | |||||||
187 | if (schedule == kmp_sch_guided_chunked) { | |||||||
188 | schedule = __kmp_guided; | |||||||
189 | } | |||||||
190 | if (chunk <= 0) { | |||||||
191 | chunk = KMP_DEFAULT_CHUNK1; | |||||||
192 | } | |||||||
193 | } | |||||||
194 | ||||||||
195 | if (schedule == kmp_sch_auto) { | |||||||
196 | // mapping and differentiation: in the __kmp_do_serial_initialize() | |||||||
197 | schedule = __kmp_auto; | |||||||
198 | #ifdef KMP_DEBUG1 | |||||||
199 | { | |||||||
200 | char *buff; | |||||||
201 | // create format specifiers before the debug output | |||||||
202 | buff = __kmp_str_format( | |||||||
203 | "__kmp_dispatch_init_algorithm: kmp_sch_auto: T#%%d new: " | |||||||
204 | "schedule:%%d chunk:%%%s\n", | |||||||
205 | traits_t<ST>::spec); | |||||||
206 | KD_TRACE(10, (buff, gtid, schedule, chunk))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, schedule , chunk); }; | |||||||
207 | __kmp_str_free(&buff); | |||||||
208 | } | |||||||
209 | #endif | |||||||
210 | } | |||||||
211 | ||||||||
212 | /* guided analytical not safe for too many threads */ | |||||||
213 | if (schedule == kmp_sch_guided_analytical_chunked && nproc > 1 << 20) { | |||||||
214 | schedule = kmp_sch_guided_iterative_chunked; | |||||||
215 | KMP_WARNING(DispatchManyThreads)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_DispatchManyThreads ), __kmp_msg_null); | |||||||
216 | } | |||||||
217 | #if OMP_45_ENABLED(50 >= 45) | |||||||
218 | if (schedule == kmp_sch_runtime_simd) { | |||||||
219 | // compiler provides simd_width in the chunk parameter | |||||||
220 | schedule = team->t.t_sched.r_sched_type; | |||||||
221 | // Detail the schedule if needed (global controls are differentiated | |||||||
222 | // appropriately) | |||||||
223 | if (schedule == kmp_sch_static || schedule == kmp_sch_auto || | |||||||
224 | schedule == __kmp_static) { | |||||||
225 | schedule = kmp_sch_static_balanced_chunked; | |||||||
226 | } else { | |||||||
227 | if (schedule == kmp_sch_guided_chunked || schedule == __kmp_guided) { | |||||||
228 | schedule = kmp_sch_guided_simd; | |||||||
229 | } | |||||||
230 | chunk = team->t.t_sched.chunk * chunk; | |||||||
231 | } | |||||||
232 | #if USE_ITT_BUILD1 | |||||||
233 | if (cur_chunk) | |||||||
234 | *cur_chunk = chunk; | |||||||
235 | #endif | |||||||
236 | #ifdef KMP_DEBUG1 | |||||||
237 | { | |||||||
238 | char *buff; | |||||||
239 | // create format specifiers before the debug output | |||||||
240 | buff = __kmp_str_format("__kmp_dispatch_init: T#%%d new: schedule:%%d" | |||||||
241 | " chunk:%%%s\n", | |||||||
242 | traits_t<ST>::spec); | |||||||
243 | KD_TRACE(10, (buff, gtid, schedule, chunk))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, schedule , chunk); }; | |||||||
244 | __kmp_str_free(&buff); | |||||||
245 | } | |||||||
246 | #endif | |||||||
247 | } | |||||||
248 | #endif // OMP_45_ENABLED | |||||||
249 | pr->u.p.parm1 = chunk; | |||||||
250 | } | |||||||
251 | KMP_ASSERT2((kmp_sch_lower < schedule && schedule < kmp_sch_upper),if (!((kmp_sch_lower < schedule && schedule < kmp_sch_upper ))) { __kmp_debug_assert(("unknown scheduling type"), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 252); } | |||||||
252 | "unknown scheduling type")if (!((kmp_sch_lower < schedule && schedule < kmp_sch_upper ))) { __kmp_debug_assert(("unknown scheduling type"), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 252); }; | |||||||
253 | ||||||||
254 | pr->u.p.count = 0; | |||||||
255 | ||||||||
256 | if (__kmp_env_consistency_check) { | |||||||
257 | if (st == 0) { | |||||||
258 | __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, | |||||||
259 | (pr->flags.ordered ? ct_pdo_ordered : ct_pdo), loc); | |||||||
260 | } | |||||||
261 | } | |||||||
262 | // compute trip count | |||||||
263 | if (st == 1) { // most common case | |||||||
264 | if (ub >= lb) { | |||||||
265 | tc = ub - lb + 1; | |||||||
266 | } else { // ub < lb | |||||||
267 | tc = 0; // zero-trip | |||||||
268 | } | |||||||
269 | } else if (st < 0) { | |||||||
270 | if (lb >= ub) { | |||||||
271 | // AC: cast to unsigned is needed for loops like (i=2B; i>-2B; i-=1B), | |||||||
272 | // where the division needs to be unsigned regardless of the result type | |||||||
273 | tc = (UT)(lb - ub) / (-st) + 1; | |||||||
274 | } else { // lb < ub | |||||||
275 | tc = 0; // zero-trip | |||||||
276 | } | |||||||
277 | } else { // st > 0 | |||||||
278 | if (ub >= lb) { | |||||||
279 | // AC: cast to unsigned is needed for loops like (i=-2B; i<2B; i+=1B), | |||||||
280 | // where the division needs to be unsigned regardless of the result type | |||||||
281 | tc = (UT)(ub - lb) / st + 1; | |||||||
282 | } else { // ub < lb | |||||||
283 | tc = 0; // zero-trip | |||||||
284 | } | |||||||
285 | } | |||||||
286 | ||||||||
287 | pr->u.p.lb = lb; | |||||||
288 | pr->u.p.ub = ub; | |||||||
289 | pr->u.p.st = st; | |||||||
290 | pr->u.p.tc = tc; | |||||||
291 | ||||||||
292 | #if KMP_OS_WINDOWS0 | |||||||
293 | pr->u.p.last_upper = ub + st; | |||||||
294 | #endif /* KMP_OS_WINDOWS */ | |||||||
295 | ||||||||
296 | /* NOTE: only the active parallel region(s) has active ordered sections */ | |||||||
297 | ||||||||
298 | if (active) { | |||||||
299 | if (pr->flags.ordered) { | |||||||
300 | pr->ordered_bumped = 0; | |||||||
301 | pr->u.p.ordered_lower = 1; | |||||||
302 | pr->u.p.ordered_upper = 0; | |||||||
303 | } | |||||||
304 | } | |||||||
305 | ||||||||
306 | switch (schedule) { | |||||||
307 | #if (KMP_STATIC_STEAL_ENABLED1) | |||||||
308 | case kmp_sch_static_steal: { | |||||||
309 | T ntc, init; | |||||||
310 | ||||||||
311 | KD_TRACE(100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_steal case\n" , gtid); } | |||||||
312 | ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_steal case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_steal case\n" , gtid); } | |||||||
313 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_steal case\n" , gtid); }; | |||||||
314 | ||||||||
315 | ntc = (tc % chunk ? 1 : 0) + tc / chunk; | |||||||
316 | if (nproc > 1 && ntc >= nproc) { | |||||||
317 | KMP_COUNT_BLOCK(OMP_LOOP_STATIC_STEAL)((void)0); | |||||||
318 | T id = tid; | |||||||
319 | T small_chunk, extras; | |||||||
320 | ||||||||
321 | small_chunk = ntc / nproc; | |||||||
322 | extras = ntc % nproc; | |||||||
323 | ||||||||
324 | init = id * small_chunk + (id < extras ? id : extras); | |||||||
325 | pr->u.p.count = init; | |||||||
326 | pr->u.p.ub = init + small_chunk + (id < extras ? 1 : 0); | |||||||
327 | ||||||||
328 | pr->u.p.parm2 = lb; | |||||||
329 | // pr->pfields.parm3 = 0; // it's not used in static_steal | |||||||
330 | pr->u.p.parm4 = (id + 1) % nproc; // remember neighbour tid | |||||||
331 | pr->u.p.st = st; | |||||||
332 | if (traits_t<T>::type_size > 4) { | |||||||
333 | // AC: TODO: check if 16-byte CAS available and use it to | |||||||
334 | // improve performance (probably wait for explicit request | |||||||
335 | // before spending time on this). | |||||||
336 | // For now use dynamically allocated per-thread lock, | |||||||
337 | // free memory in __kmp_dispatch_next when status==0. | |||||||
338 | KMP_DEBUG_ASSERT(th->th.th_dispatch->th_steal_lock == NULL)if (!(th->th.th_dispatch->th_steal_lock == __null)) { __kmp_debug_assert ("th->th.th_dispatch->th_steal_lock == __null", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 338); }; | |||||||
339 | th->th.th_dispatch->th_steal_lock = | |||||||
340 | (kmp_lock_t *)__kmp_allocate(sizeof(kmp_lock_t))___kmp_allocate((sizeof(kmp_lock_t)), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 340); | |||||||
341 | __kmp_init_lock(th->th.th_dispatch->th_steal_lock); | |||||||
342 | } | |||||||
343 | break; | |||||||
344 | } else { | |||||||
345 | KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d falling-through to "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to " "kmp_sch_static_balanced\n", gtid); } | |||||||
346 | "kmp_sch_static_balanced\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to " "kmp_sch_static_balanced\n", gtid); } | |||||||
347 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to " "kmp_sch_static_balanced\n", gtid); }; | |||||||
348 | schedule = kmp_sch_static_balanced; | |||||||
349 | /* too few iterations: fall-through to kmp_sch_static_balanced */ | |||||||
350 | } // if | |||||||
351 | /* FALL-THROUGH to static balanced */ | |||||||
352 | } // case | |||||||
353 | #endif | |||||||
354 | case kmp_sch_static_balanced: { | |||||||
355 | T init, limit; | |||||||
356 | ||||||||
357 | KD_TRACE(if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n" , gtid); } | |||||||
358 | 100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n" , gtid); } | |||||||
359 | ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n" , gtid); } | |||||||
360 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n" , gtid); }; | |||||||
361 | ||||||||
362 | if (nproc > 1) { | |||||||
363 | T id = tid; | |||||||
364 | ||||||||
365 | if (tc < nproc) { | |||||||
366 | if (id < tc) { | |||||||
367 | init = id; | |||||||
368 | limit = id; | |||||||
369 | pr->u.p.parm1 = (id == tc - 1); /* parm1 stores *plastiter */ | |||||||
370 | } else { | |||||||
371 | pr->u.p.count = 1; /* means no more chunks to execute */ | |||||||
372 | pr->u.p.parm1 = FALSE0; | |||||||
373 | break; | |||||||
374 | } | |||||||
375 | } else { | |||||||
376 | T small_chunk = tc / nproc; | |||||||
377 | T extras = tc % nproc; | |||||||
378 | init = id * small_chunk + (id < extras ? id : extras); | |||||||
379 | limit = init + small_chunk - (id < extras ? 0 : 1); | |||||||
380 | pr->u.p.parm1 = (id == nproc - 1); | |||||||
381 | } | |||||||
382 | } else { | |||||||
383 | if (tc > 0) { | |||||||
384 | init = 0; | |||||||
385 | limit = tc - 1; | |||||||
386 | pr->u.p.parm1 = TRUE(!0); | |||||||
387 | } else { | |||||||
388 | // zero trip count | |||||||
389 | pr->u.p.count = 1; /* means no more chunks to execute */ | |||||||
390 | pr->u.p.parm1 = FALSE0; | |||||||
391 | break; | |||||||
392 | } | |||||||
393 | } | |||||||
394 | #if USE_ITT_BUILD1 | |||||||
395 | // Calculate chunk for metadata report | |||||||
396 | if (itt_need_metadata_reporting) | |||||||
397 | if (cur_chunk) | |||||||
398 | *cur_chunk = limit - init + 1; | |||||||
399 | #endif | |||||||
400 | if (st == 1) { | |||||||
401 | pr->u.p.lb = lb + init; | |||||||
402 | pr->u.p.ub = lb + limit; | |||||||
403 | } else { | |||||||
404 | // calculated upper bound, "ub" is user-defined upper bound | |||||||
405 | T ub_tmp = lb + limit * st; | |||||||
406 | pr->u.p.lb = lb + init * st; | |||||||
407 | // adjust upper bound to "ub" if needed, so that MS lastprivate will match | |||||||
408 | // it exactly | |||||||
409 | if (st > 0) { | |||||||
410 | pr->u.p.ub = (ub_tmp + st > ub ? ub : ub_tmp); | |||||||
411 | } else { | |||||||
412 | pr->u.p.ub = (ub_tmp + st < ub ? ub : ub_tmp); | |||||||
413 | } | |||||||
414 | } | |||||||
415 | if (pr->flags.ordered) { | |||||||
416 | pr->u.p.ordered_lower = init; | |||||||
417 | pr->u.p.ordered_upper = limit; | |||||||
418 | } | |||||||
419 | break; | |||||||
420 | } // case | |||||||
421 | #if OMP_45_ENABLED(50 >= 45) | |||||||
422 | case kmp_sch_static_balanced_chunked: { | |||||||
423 | // similar to balanced, but chunk adjusted to multiple of simd width | |||||||
424 | T nth = nproc; | |||||||
425 | KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d runtime(simd:static)"if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d runtime(simd:static)" " -> falling-through to static_greedy\n", gtid); } | |||||||
426 | " -> falling-through to static_greedy\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d runtime(simd:static)" " -> falling-through to static_greedy\n", gtid); } | |||||||
427 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d runtime(simd:static)" " -> falling-through to static_greedy\n", gtid); }; | |||||||
428 | schedule = kmp_sch_static_greedy; | |||||||
429 | if (nth > 1) | |||||||
430 | pr->u.p.parm1 = ((tc + nth - 1) / nth + chunk - 1) & ~(chunk - 1); | |||||||
431 | else | |||||||
432 | pr->u.p.parm1 = tc; | |||||||
433 | break; | |||||||
434 | } // case | |||||||
435 | case kmp_sch_guided_simd: | |||||||
436 | #endif // OMP_45_ENABLED | |||||||
437 | case kmp_sch_guided_iterative_chunked: { | |||||||
438 | KD_TRACE(if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked" " case\n", gtid); } | |||||||
439 | 100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked" " case\n", gtid); } | |||||||
440 | ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked"if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked" " case\n", gtid); } | |||||||
441 | " case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked" " case\n", gtid); } | |||||||
442 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked" " case\n", gtid); }; | |||||||
443 | ||||||||
444 | if (nproc > 1) { | |||||||
445 | if ((2L * chunk + 1) * nproc >= tc) { | |||||||
446 | /* chunk size too large, switch to dynamic */ | |||||||
447 | schedule = kmp_sch_dynamic_chunked; | |||||||
448 | } else { | |||||||
449 | // when remaining iters become less than parm2 - switch to dynamic | |||||||
450 | pr->u.p.parm2 = guided_int_param * nproc * (chunk + 1); | |||||||
451 | *(double *)&pr->u.p.parm3 = | |||||||
452 | guided_flt_param / nproc; // may occupy parm3 and parm4 | |||||||
453 | } | |||||||
454 | } else { | |||||||
455 | KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d falling-through to "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to " "kmp_sch_static_greedy\n", gtid); } | |||||||
456 | "kmp_sch_static_greedy\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to " "kmp_sch_static_greedy\n", gtid); } | |||||||
457 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to " "kmp_sch_static_greedy\n", gtid); }; | |||||||
458 | schedule = kmp_sch_static_greedy; | |||||||
459 | /* team->t.t_nproc == 1: fall-through to kmp_sch_static_greedy */ | |||||||
460 | KD_TRACE(if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n" , gtid); } | |||||||
461 | 100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n" , gtid); } | |||||||
462 | ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n" , gtid); } | |||||||
463 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n" , gtid); }; | |||||||
464 | pr->u.p.parm1 = tc; | |||||||
465 | } // if | |||||||
466 | } // case | |||||||
467 | break; | |||||||
468 | case kmp_sch_guided_analytical_chunked: { | |||||||
469 | KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d " "kmp_sch_guided_analytical_chunked case\n", gtid); } | |||||||
470 | "kmp_sch_guided_analytical_chunked case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d " "kmp_sch_guided_analytical_chunked case\n", gtid); } | |||||||
471 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d " "kmp_sch_guided_analytical_chunked case\n", gtid); }; | |||||||
472 | ||||||||
473 | if (nproc > 1) { | |||||||
474 | if ((2L * chunk + 1) * nproc >= tc) { | |||||||
475 | /* chunk size too large, switch to dynamic */ | |||||||
476 | schedule = kmp_sch_dynamic_chunked; | |||||||
477 | } else { | |||||||
478 | /* commonly used term: (2 nproc - 1)/(2 nproc) */ | |||||||
479 | DBL x; | |||||||
480 | ||||||||
481 | #if KMP_OS_WINDOWS0 && KMP_ARCH_X860 | |||||||
482 | /* Linux* OS already has 64-bit computation by default for long double, | |||||||
483 | and on Windows* OS on Intel(R) 64, /Qlong_double doesn't work. On | |||||||
484 | Windows* OS on IA-32 architecture, we need to set precision to 64-bit | |||||||
485 | instead of the default 53-bit. Even though long double doesn't work | |||||||
486 | on Windows* OS on Intel(R) 64, the resulting lack of precision is not | |||||||
487 | expected to impact the correctness of the algorithm, but this has not | |||||||
488 | been mathematically proven. */ | |||||||
489 | // save original FPCW and set precision to 64-bit, as | |||||||
490 | // Windows* OS on IA-32 architecture defaults to 53-bit | |||||||
491 | unsigned int oldFpcw = _control87(0, 0); | |||||||
492 | _control87(_PC_64, _MCW_PC); // 0,0x30000 | |||||||
493 | #endif | |||||||
494 | /* value used for comparison in solver for cross-over point */ | |||||||
495 | long double target = ((long double)chunk * 2 + 1) * nproc / tc; | |||||||
496 | ||||||||
497 | /* crossover point--chunk indexes equal to or greater than | |||||||
498 | this point switch to dynamic-style scheduling */ | |||||||
499 | UT cross; | |||||||
500 | ||||||||
501 | /* commonly used term: (2 nproc - 1)/(2 nproc) */ | |||||||
502 | x = (long double)1.0 - (long double)0.5 / nproc; | |||||||
503 | ||||||||
504 | #ifdef KMP_DEBUG1 | |||||||
505 | { // test natural alignment | |||||||
506 | struct _test_a { | |||||||
507 | char a; | |||||||
508 | union { | |||||||
509 | char b; | |||||||
510 | DBL d; | |||||||
511 | }; | |||||||
512 | } t; | |||||||
513 | ptrdiff_t natural_alignment = | |||||||
514 | (ptrdiff_t)&t.b - (ptrdiff_t)&t - (ptrdiff_t)1; | |||||||
515 | //__kmp_warn( " %llx %llx %lld", (long long)&t.d, (long long)&t, (long | |||||||
516 | // long)natural_alignment ); | |||||||
517 | KMP_DEBUG_ASSERT(if (!((((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment )) == 0)) { __kmp_debug_assert("(((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment)) == 0" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 518); } | |||||||
518 | (((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment)) == 0)if (!((((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment )) == 0)) { __kmp_debug_assert("(((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment)) == 0" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 518); }; | |||||||
519 | } | |||||||
520 | #endif // KMP_DEBUG | |||||||
521 | ||||||||
522 | /* save the term in thread private dispatch structure */ | |||||||
523 | *(DBL *)&pr->u.p.parm3 = x; | |||||||
524 | ||||||||
525 | /* solve for the crossover point to the nearest integer i for which C_i | |||||||
526 | <= chunk */ | |||||||
527 | { | |||||||
528 | UT left, right, mid; | |||||||
529 | long double p; | |||||||
530 | ||||||||
531 | /* estimate initial upper and lower bound */ | |||||||
532 | ||||||||
533 | /* doesn't matter what value right is as long as it is positive, but | |||||||
534 | it affects performance of the solver */ | |||||||
535 | right = 229; | |||||||
536 | p = __kmp_pow<UT>(x, right); | |||||||
537 | if (p > target) { | |||||||
538 | do { | |||||||
539 | p *= p; | |||||||
540 | right <<= 1; | |||||||
541 | } while (p > target && right < (1 << 27)); | |||||||
542 | /* lower bound is previous (failed) estimate of upper bound */ | |||||||
543 | left = right >> 1; | |||||||
544 | } else { | |||||||
545 | left = 0; | |||||||
546 | } | |||||||
547 | ||||||||
548 | /* bisection root-finding method */ | |||||||
549 | while (left + 1 < right) { | |||||||
550 | mid = (left + right) / 2; | |||||||
551 | if (__kmp_pow<UT>(x, mid) > target) { | |||||||
552 | left = mid; | |||||||
553 | } else { | |||||||
554 | right = mid; | |||||||
555 | } | |||||||
556 | } // while | |||||||
557 | cross = right; | |||||||
558 | } | |||||||
559 | /* assert sanity of computed crossover point */ | |||||||
560 | KMP_ASSERT(cross && __kmp_pow<UT>(x, cross - 1) > target &&if (!(cross && __kmp_pow<UT>(x, cross - 1) > target && __kmp_pow<UT>(x, cross) <= target )) { __kmp_debug_assert("cross && __kmp_pow<UT>(x, cross - 1) > target && __kmp_pow<UT>(x, cross) <= target" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 561); } | |||||||
561 | __kmp_pow<UT>(x, cross) <= target)if (!(cross && __kmp_pow<UT>(x, cross - 1) > target && __kmp_pow<UT>(x, cross) <= target )) { __kmp_debug_assert("cross && __kmp_pow<UT>(x, cross - 1) > target && __kmp_pow<UT>(x, cross) <= target" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 561); }; | |||||||
562 | ||||||||
563 | /* save the crossover point in thread private dispatch structure */ | |||||||
564 | pr->u.p.parm2 = cross; | |||||||
565 | ||||||||
566 | // C75803 | |||||||
567 | #if ((KMP_OS_LINUX1 || KMP_OS_WINDOWS0) && KMP_ARCH_X860) && (!defined(KMP_I8)) | |||||||
568 | #define GUIDED_ANALYTICAL_WORKAROUND(x) (*(DBL *)&pr->u.p.parm3) | |||||||
569 | #else | |||||||
570 | #define GUIDED_ANALYTICAL_WORKAROUND(x) (x) | |||||||
571 | #endif | |||||||
572 | /* dynamic-style scheduling offset */ | |||||||
573 | pr->u.p.count = tc - __kmp_dispatch_guided_remaining( | |||||||
574 | tc, GUIDED_ANALYTICAL_WORKAROUND(x), cross) - | |||||||
575 | cross * chunk; | |||||||
576 | #if KMP_OS_WINDOWS0 && KMP_ARCH_X860 | |||||||
577 | // restore FPCW | |||||||
578 | _control87(oldFpcw, _MCW_PC); | |||||||
579 | #endif | |||||||
580 | } // if | |||||||
581 | } else { | |||||||
582 | KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d falling-through to "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to " "kmp_sch_static_greedy\n", gtid); } | |||||||
583 | "kmp_sch_static_greedy\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to " "kmp_sch_static_greedy\n", gtid); } | |||||||
584 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to " "kmp_sch_static_greedy\n", gtid); }; | |||||||
585 | schedule = kmp_sch_static_greedy; | |||||||
586 | /* team->t.t_nproc == 1: fall-through to kmp_sch_static_greedy */ | |||||||
587 | pr->u.p.parm1 = tc; | |||||||
588 | } // if | |||||||
589 | } // case | |||||||
590 | break; | |||||||
591 | case kmp_sch_static_greedy: | |||||||
592 | KD_TRACE(if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n" , gtid); } | |||||||
593 | 100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n" , gtid); } | |||||||
594 | ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n" , gtid); } | |||||||
595 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n" , gtid); }; | |||||||
596 | pr->u.p.parm1 = (nproc > 1) ? (tc + nproc - 1) / nproc : tc; | |||||||
597 | break; | |||||||
598 | case kmp_sch_static_chunked: | |||||||
599 | case kmp_sch_dynamic_chunked: | |||||||
600 | if (pr->u.p.parm1 <= 0) { | |||||||
601 | pr->u.p.parm1 = KMP_DEFAULT_CHUNK1; | |||||||
602 | } | |||||||
603 | KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d " "kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", gtid ); } | |||||||
604 | "kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d " "kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", gtid ); } | |||||||
605 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d " "kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", gtid ); }; | |||||||
606 | break; | |||||||
607 | case kmp_sch_trapezoidal: { | |||||||
608 | /* TSS: trapezoid self-scheduling, minimum chunk_size = parm1 */ | |||||||
609 | ||||||||
610 | T parm1, parm2, parm3, parm4; | |||||||
611 | KD_TRACE(100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_trapezoidal case\n" , gtid); } | |||||||
612 | ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_trapezoidal case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_trapezoidal case\n" , gtid); } | |||||||
613 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_trapezoidal case\n" , gtid); }; | |||||||
614 | ||||||||
615 | parm1 = chunk; | |||||||
616 | ||||||||
617 | /* F : size of the first cycle */ | |||||||
618 | parm2 = (tc / (2 * nproc)); | |||||||
619 | ||||||||
620 | if (parm2 < 1) { | |||||||
621 | parm2 = 1; | |||||||
622 | } | |||||||
623 | ||||||||
624 | /* L : size of the last cycle. Make sure the last cycle is not larger | |||||||
625 | than the first cycle. */ | |||||||
626 | if (parm1 < 1) { | |||||||
627 | parm1 = 1; | |||||||
628 | } else if (parm1 > parm2) { | |||||||
629 | parm1 = parm2; | |||||||
630 | } | |||||||
631 | ||||||||
632 | /* N : number of cycles */ | |||||||
633 | parm3 = (parm2 + parm1); | |||||||
634 | parm3 = (2 * tc + parm3 - 1) / parm3; | |||||||
635 | ||||||||
636 | if (parm3 < 2) { | |||||||
637 | parm3 = 2; | |||||||
638 | } | |||||||
639 | ||||||||
640 | /* sigma : decreasing incr of the trapezoid */ | |||||||
641 | parm4 = (parm3 - 1); | |||||||
642 | parm4 = (parm2 - parm1) / parm4; | |||||||
643 | ||||||||
644 | // pointless check, because parm4 >= 0 always | |||||||
645 | // if ( parm4 < 0 ) { | |||||||
646 | // parm4 = 0; | |||||||
647 | //} | |||||||
648 | ||||||||
649 | pr->u.p.parm1 = parm1; | |||||||
650 | pr->u.p.parm2 = parm2; | |||||||
651 | pr->u.p.parm3 = parm3; | |||||||
652 | pr->u.p.parm4 = parm4; | |||||||
653 | } // case | |||||||
654 | break; | |||||||
655 | ||||||||
656 | default: { | |||||||
657 | __kmp_fatal(KMP_MSG(UnknownSchedTypeDetected)__kmp_msg_format(kmp_i18n_msg_UnknownSchedTypeDetected), // Primary message | |||||||
658 | KMP_HNT(GetNewerLibrary)__kmp_msg_format(kmp_i18n_hnt_GetNewerLibrary), // Hint | |||||||
659 | __kmp_msg_null // Variadic argument list terminator | |||||||
660 | ); | |||||||
661 | } break; | |||||||
662 | } // switch | |||||||
663 | pr->schedule = schedule; | |||||||
664 | } | |||||||
665 | ||||||||
666 | #if KMP_USE_HIER_SCHED0 | |||||||
667 | template <typename T> | |||||||
668 | inline void __kmp_dispatch_init_hier_runtime(ident_t *loc, T lb, T ub, | |||||||
669 | typename traits_t<T>::signed_t st); | |||||||
670 | template <> | |||||||
671 | inline void | |||||||
672 | __kmp_dispatch_init_hier_runtime<kmp_int32>(ident_t *loc, kmp_int32 lb, | |||||||
673 | kmp_int32 ub, kmp_int32 st) { | |||||||
674 | __kmp_dispatch_init_hierarchy<kmp_int32>( | |||||||
675 | loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers, | |||||||
676 | __kmp_hier_scheds.scheds, __kmp_hier_scheds.small_chunks, lb, ub, st); | |||||||
677 | } | |||||||
678 | template <> | |||||||
679 | inline void | |||||||
680 | __kmp_dispatch_init_hier_runtime<kmp_uint32>(ident_t *loc, kmp_uint32 lb, | |||||||
681 | kmp_uint32 ub, kmp_int32 st) { | |||||||
682 | __kmp_dispatch_init_hierarchy<kmp_uint32>( | |||||||
683 | loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers, | |||||||
684 | __kmp_hier_scheds.scheds, __kmp_hier_scheds.small_chunks, lb, ub, st); | |||||||
685 | } | |||||||
686 | template <> | |||||||
687 | inline void | |||||||
688 | __kmp_dispatch_init_hier_runtime<kmp_int64>(ident_t *loc, kmp_int64 lb, | |||||||
689 | kmp_int64 ub, kmp_int64 st) { | |||||||
690 | __kmp_dispatch_init_hierarchy<kmp_int64>( | |||||||
691 | loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers, | |||||||
692 | __kmp_hier_scheds.scheds, __kmp_hier_scheds.large_chunks, lb, ub, st); | |||||||
693 | } | |||||||
694 | template <> | |||||||
695 | inline void | |||||||
696 | __kmp_dispatch_init_hier_runtime<kmp_uint64>(ident_t *loc, kmp_uint64 lb, | |||||||
697 | kmp_uint64 ub, kmp_int64 st) { | |||||||
698 | __kmp_dispatch_init_hierarchy<kmp_uint64>( | |||||||
699 | loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers, | |||||||
700 | __kmp_hier_scheds.scheds, __kmp_hier_scheds.large_chunks, lb, ub, st); | |||||||
701 | } | |||||||
702 | ||||||||
703 | // free all the hierarchy scheduling memory associated with the team | |||||||
704 | void __kmp_dispatch_free_hierarchies(kmp_team_t *team) { | |||||||
705 | int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2; | |||||||
706 | for (int i = 0; i < num_disp_buff; ++i) { | |||||||
707 | // type does not matter here so use kmp_int32 | |||||||
708 | auto sh = | |||||||
709 | reinterpret_cast<dispatch_shared_info_template<kmp_int32> volatile *>( | |||||||
710 | &team->t.t_disp_buffer[i]); | |||||||
711 | if (sh->hier) { | |||||||
712 | sh->hier->deallocate(); | |||||||
713 | __kmp_free(sh->hier)___kmp_free((sh->hier), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 713); | |||||||
714 | } | |||||||
715 | } | |||||||
716 | } | |||||||
717 | #endif | |||||||
718 | ||||||||
719 | // UT - unsigned flavor of T, ST - signed flavor of T, | |||||||
720 | // DBL - double if sizeof(T)==4, or long double if sizeof(T)==8 | |||||||
721 | template <typename T> | |||||||
722 | static void | |||||||
723 | __kmp_dispatch_init(ident_t *loc, int gtid, enum sched_type schedule, T lb, | |||||||
724 | T ub, typename traits_t<T>::signed_t st, | |||||||
725 | typename traits_t<T>::signed_t chunk, int push_ws) { | |||||||
726 | typedef typename traits_t<T>::unsigned_t UT; | |||||||
727 | ||||||||
728 | int active; | |||||||
729 | kmp_info_t *th; | |||||||
730 | kmp_team_t *team; | |||||||
731 | kmp_uint32 my_buffer_index; | |||||||
732 | dispatch_private_info_template<T> *pr; | |||||||
733 | dispatch_shared_info_template<T> volatile *sh; | |||||||
734 | ||||||||
735 | KMP_BUILD_ASSERT(sizeof(dispatch_private_info_template<T>) ==static_assert(sizeof(dispatch_private_info_template<T>) == sizeof(dispatch_private_info), "Build condition error") | |||||||
736 | sizeof(dispatch_private_info))static_assert(sizeof(dispatch_private_info_template<T>) == sizeof(dispatch_private_info), "Build condition error"); | |||||||
737 | KMP_BUILD_ASSERT(sizeof(dispatch_shared_info_template<UT>) ==static_assert(sizeof(dispatch_shared_info_template<UT>) == sizeof(dispatch_shared_info), "Build condition error") | |||||||
738 | sizeof(dispatch_shared_info))static_assert(sizeof(dispatch_shared_info_template<UT>) == sizeof(dispatch_shared_info), "Build condition error"); | |||||||
739 | ||||||||
740 | if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel)) | |||||||
741 | __kmp_parallel_initialize(); | |||||||
742 | ||||||||
743 | #if INCLUDE_SSC_MARKS(1 && 1) | |||||||
744 | SSC_MARK_DISPATCH_INIT()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(0xd696) : "%ebx"); | |||||||
745 | #endif | |||||||
746 | #ifdef KMP_DEBUG1 | |||||||
747 | typedef typename traits_t<T>::signed_t ST; | |||||||
748 | { | |||||||
749 | char *buff; | |||||||
750 | // create format specifiers before the debug output | |||||||
751 | buff = __kmp_str_format("__kmp_dispatch_init: T#%%d called: schedule:%%d " | |||||||
752 | "chunk:%%%s lb:%%%s ub:%%%s st:%%%s\n", | |||||||
753 | traits_t<ST>::spec, traits_t<T>::spec, | |||||||
754 | traits_t<T>::spec, traits_t<ST>::spec); | |||||||
755 | KD_TRACE(10, (buff, gtid, schedule, chunk, lb, ub, st))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, schedule , chunk, lb, ub, st); }; | |||||||
756 | __kmp_str_free(&buff); | |||||||
757 | } | |||||||
758 | #endif | |||||||
759 | /* setup data */ | |||||||
760 | th = __kmp_threads[gtid]; | |||||||
761 | team = th->th.th_team; | |||||||
762 | active = !team->t.t_serialized; | |||||||
763 | th->th.th_ident = loc; | |||||||
764 | ||||||||
765 | // Any half-decent optimizer will remove this test when the blocks are empty | |||||||
766 | // since the macros expand to nothing | |||||||
767 | // when statistics are disabled. | |||||||
768 | if (schedule == __kmp_static) { | |||||||
769 | KMP_COUNT_BLOCK(OMP_LOOP_STATIC)((void)0); | |||||||
770 | } else { | |||||||
771 | KMP_COUNT_BLOCK(OMP_LOOP_DYNAMIC)((void)0); | |||||||
772 | } | |||||||
773 | ||||||||
774 | #if KMP_USE_HIER_SCHED0 | |||||||
775 | // Initialize the scheduling hierarchy if requested in OMP_SCHEDULE envirable | |||||||
776 | // Hierarchical scheduling does not work with ordered, so if ordered is | |||||||
777 | // detected, then revert back to threaded scheduling. | |||||||
778 | bool ordered; | |||||||
779 | enum sched_type my_sched = schedule; | |||||||
780 | my_buffer_index = th->th.th_dispatch->th_disp_index; | |||||||
781 | pr = reinterpret_cast<dispatch_private_info_template<T> *>( | |||||||
782 | &th->th.th_dispatch | |||||||
783 | ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]); | |||||||
784 | my_sched = SCHEDULE_WITHOUT_MODIFIERS(my_sched)(enum sched_type)( (my_sched) & ~(kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)); | |||||||
785 | if ((my_sched >= kmp_nm_lower) && (my_sched < kmp_nm_upper)) | |||||||
786 | my_sched = | |||||||
787 | (enum sched_type)(((int)my_sched) - (kmp_nm_lower - kmp_sch_lower)); | |||||||
788 | ordered = (kmp_ord_lower & my_sched); | |||||||
789 | if (pr->flags.use_hier) { | |||||||
790 | if (ordered) { | |||||||
791 | KD_TRACE(100, ("__kmp_dispatch_init: T#%d ordered loop detected. "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d ordered loop detected. " "Disabling hierarchical scheduling.\n", gtid); } | |||||||
792 | "Disabling hierarchical scheduling.\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d ordered loop detected. " "Disabling hierarchical scheduling.\n", gtid); } | |||||||
793 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d ordered loop detected. " "Disabling hierarchical scheduling.\n", gtid); }; | |||||||
794 | pr->flags.use_hier = FALSE0; | |||||||
795 | } | |||||||
796 | } | |||||||
797 | if (schedule == kmp_sch_runtime && __kmp_hier_scheds.size > 0) { | |||||||
798 | // Don't use hierarchical for ordered parallel loops and don't | |||||||
799 | // use the runtime hierarchy if one was specified in the program | |||||||
800 | if (!ordered && !pr->flags.use_hier) | |||||||
801 | __kmp_dispatch_init_hier_runtime<T>(loc, lb, ub, st); | |||||||
802 | } | |||||||
803 | #endif // KMP_USE_HIER_SCHED | |||||||
804 | ||||||||
805 | #if USE_ITT_BUILD1 | |||||||
806 | kmp_uint64 cur_chunk = chunk; | |||||||
807 | int itt_need_metadata_reporting = __itt_metadata_add_ptr__kmp_itt_metadata_add_ptr__3_0 && | |||||||
808 | __kmp_forkjoin_frames_mode == 3 && | |||||||
809 | KMP_MASTER_GTID(gtid)(__kmp_tid_from_gtid((gtid)) == 0) && | |||||||
810 | #if OMP_40_ENABLED(50 >= 40) | |||||||
811 | th->th.th_teams_microtask == NULL__null && | |||||||
812 | #endif | |||||||
813 | team->t.t_active_level == 1; | |||||||
814 | #endif | |||||||
815 | if (!active) { | |||||||
816 | pr = reinterpret_cast<dispatch_private_info_template<T> *>( | |||||||
817 | th->th.th_dispatch->th_disp_buffer); /* top of the stack */ | |||||||
818 | } else { | |||||||
819 | KMP_DEBUG_ASSERT(th->th.th_dispatch ==if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch [th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 820); } | |||||||
820 | &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid])if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch [th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 820); }; | |||||||
821 | ||||||||
822 | my_buffer_index = th->th.th_dispatch->th_disp_index++; | |||||||
823 | ||||||||
824 | /* What happens when number of threads changes, need to resize buffer? */ | |||||||
825 | pr = reinterpret_cast<dispatch_private_info_template<T> *>( | |||||||
826 | &th->th.th_dispatch | |||||||
827 | ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]); | |||||||
828 | sh = reinterpret_cast<dispatch_shared_info_template<T> volatile *>( | |||||||
829 | &team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]); | |||||||
830 | KD_TRACE(10, ("__kmp_dispatch_init: T#%d my_buffer_index:%d\n", gtid,if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d my_buffer_index:%d\n" , gtid, my_buffer_index); } | |||||||
831 | my_buffer_index))if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d my_buffer_index:%d\n" , gtid, my_buffer_index); }; | |||||||
832 | } | |||||||
833 | ||||||||
834 | __kmp_dispatch_init_algorithm(loc, gtid, pr, schedule, lb, ub, st, | |||||||
835 | #if USE_ITT_BUILD1 | |||||||
836 | &cur_chunk, | |||||||
837 | #endif | |||||||
838 | chunk, (T)th->th.th_team_nproc, | |||||||
839 | (T)th->th.th_info.ds.ds_tid); | |||||||
840 | if (active) { | |||||||
841 | if (pr->flags.ordered == 0) { | |||||||
842 | th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo_error; | |||||||
843 | th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo_error; | |||||||
844 | } else { | |||||||
845 | th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo<UT>; | |||||||
846 | th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo<UT>; | |||||||
847 | } | |||||||
848 | } | |||||||
849 | ||||||||
850 | if (active) { | |||||||
851 | /* The name of this buffer should be my_buffer_index when it's free to use | |||||||
852 | * it */ | |||||||
853 | ||||||||
854 | KD_TRACE(100, ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d " "sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index ); } | |||||||
855 | "sh->buffer_index:%d\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d " "sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index ); } | |||||||
856 | gtid, my_buffer_index, sh->buffer_index))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d " "sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index ); }; | |||||||
857 | __kmp_wait_yield<kmp_uint32>(&sh->buffer_index, my_buffer_index, | |||||||
858 | __kmp_eq<kmp_uint32> USE_ITT_BUILD_ARG(NULL), __null); | |||||||
859 | // Note: KMP_WAIT_YIELD() cannot be used there: buffer index and | |||||||
860 | // my_buffer_index are *always* 32-bit integers. | |||||||
861 | KMP_MB(); /* is this necessary? */ | |||||||
862 | KD_TRACE(100, ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d " "sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index ); } | |||||||
863 | "sh->buffer_index:%d\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d " "sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index ); } | |||||||
864 | gtid, my_buffer_index, sh->buffer_index))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d " "sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index ); }; | |||||||
865 | ||||||||
866 | th->th.th_dispatch->th_dispatch_pr_current = (dispatch_private_info_t *)pr; | |||||||
867 | th->th.th_dispatch->th_dispatch_sh_current = | |||||||
868 | CCAST(dispatch_shared_info_t *, (volatile dispatch_shared_info_t *)sh)const_cast<dispatch_shared_info_t *>((volatile dispatch_shared_info_t *)sh); | |||||||
869 | #if USE_ITT_BUILD1 | |||||||
870 | if (pr->flags.ordered) { | |||||||
871 | __kmp_itt_ordered_init(gtid); | |||||||
872 | } | |||||||
873 | // Report loop metadata | |||||||
874 | if (itt_need_metadata_reporting) { | |||||||
875 | // Only report metadata by master of active team at level 1 | |||||||
876 | kmp_uint64 schedtype = 0; | |||||||
877 | switch (schedule) { | |||||||
878 | case kmp_sch_static_chunked: | |||||||
879 | case kmp_sch_static_balanced: // Chunk is calculated in the switch above | |||||||
880 | break; | |||||||
881 | case kmp_sch_static_greedy: | |||||||
882 | cur_chunk = pr->u.p.parm1; | |||||||
883 | break; | |||||||
884 | case kmp_sch_dynamic_chunked: | |||||||
885 | schedtype = 1; | |||||||
886 | break; | |||||||
887 | case kmp_sch_guided_iterative_chunked: | |||||||
888 | case kmp_sch_guided_analytical_chunked: | |||||||
889 | #if OMP_45_ENABLED(50 >= 45) | |||||||
890 | case kmp_sch_guided_simd: | |||||||
891 | #endif | |||||||
892 | schedtype = 2; | |||||||
893 | break; | |||||||
894 | default: | |||||||
895 | // Should we put this case under "static"? | |||||||
896 | // case kmp_sch_static_steal: | |||||||
897 | schedtype = 3; | |||||||
898 | break; | |||||||
899 | } | |||||||
900 | __kmp_itt_metadata_loop(loc, schedtype, pr->u.p.tc, cur_chunk); | |||||||
901 | } | |||||||
902 | #if KMP_USE_HIER_SCHED0 | |||||||
903 | if (pr->flags.use_hier) { | |||||||
904 | pr->u.p.count = 0; | |||||||
905 | pr->u.p.ub = pr->u.p.lb = pr->u.p.st = pr->u.p.tc = 0; | |||||||
906 | } | |||||||
907 | #endif // KMP_USER_HIER_SCHED | |||||||
908 | #endif /* USE_ITT_BUILD */ | |||||||
909 | } | |||||||
910 | ||||||||
911 | #ifdef KMP_DEBUG1 | |||||||
912 | { | |||||||
913 | char *buff; | |||||||
914 | // create format specifiers before the debug output | |||||||
915 | buff = __kmp_str_format( | |||||||
916 | "__kmp_dispatch_init: T#%%d returning: schedule:%%d ordered:%%%s " | |||||||
917 | "lb:%%%s ub:%%%s" | |||||||
918 | " st:%%%s tc:%%%s count:%%%s\n\tordered_lower:%%%s ordered_upper:%%%s" | |||||||
919 | " parm1:%%%s parm2:%%%s parm3:%%%s parm4:%%%s\n", | |||||||
920 | traits_t<UT>::spec, traits_t<T>::spec, traits_t<T>::spec, | |||||||
921 | traits_t<ST>::spec, traits_t<UT>::spec, traits_t<UT>::spec, | |||||||
922 | traits_t<UT>::spec, traits_t<UT>::spec, traits_t<T>::spec, | |||||||
923 | traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec); | |||||||
924 | KD_TRACE(10, (buff, gtid, pr->schedule, pr->flags.ordered, pr->u.p.lb,if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, pr ->schedule, pr->flags.ordered, pr->u.p.lb, pr->u. p.ub, pr->u.p.st, pr->u.p.tc, pr->u.p.count, pr-> u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1 , pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4); } | |||||||
925 | pr->u.p.ub, pr->u.p.st, pr->u.p.tc, pr->u.p.count,if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, pr ->schedule, pr->flags.ordered, pr->u.p.lb, pr->u. p.ub, pr->u.p.st, pr->u.p.tc, pr->u.p.count, pr-> u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1 , pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4); } | |||||||
926 | pr->u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1,if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, pr ->schedule, pr->flags.ordered, pr->u.p.lb, pr->u. p.ub, pr->u.p.st, pr->u.p.tc, pr->u.p.count, pr-> u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1 , pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4); } | |||||||
927 | pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, pr ->schedule, pr->flags.ordered, pr->u.p.lb, pr->u. p.ub, pr->u.p.st, pr->u.p.tc, pr->u.p.count, pr-> u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1 , pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4); }; | |||||||
928 | __kmp_str_free(&buff); | |||||||
929 | } | |||||||
930 | #endif | |||||||
931 | #if (KMP_STATIC_STEAL_ENABLED1) | |||||||
932 | // It cannot be guaranteed that after execution of a loop with some other | |||||||
933 | // schedule kind all the parm3 variables will contain the same value. Even if | |||||||
934 | // all parm3 will be the same, it still exists a bad case like using 0 and 1 | |||||||
935 | // rather than program life-time increment. So the dedicated variable is | |||||||
936 | // required. The 'static_steal_counter' is used. | |||||||
937 | if (schedule == kmp_sch_static_steal) { | |||||||
938 | // Other threads will inspect this variable when searching for a victim. | |||||||
939 | // This is a flag showing that other threads may steal from this thread | |||||||
940 | // since then. | |||||||
941 | volatile T *p = &pr->u.p.static_steal_counter; | |||||||
942 | *p = *p + 1; | |||||||
943 | } | |||||||
944 | #endif // ( KMP_STATIC_STEAL_ENABLED ) | |||||||
945 | ||||||||
946 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||||||
947 | if (ompt_enabled.ompt_callback_work) { | |||||||
948 | ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL__null); | |||||||
949 | ompt_task_info_t *task_info = __ompt_get_task_info_object(0); | |||||||
950 | ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback( | |||||||
951 | ompt_work_loop, ompt_scope_begin, &(team_info->parallel_data), | |||||||
952 | &(task_info->task_data), pr->u.p.tc, OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid)); | |||||||
953 | } | |||||||
954 | #endif | |||||||
955 | KMP_PUSH_PARTITIONED_TIMER(OMP_loop_dynamic)((void)0); | |||||||
956 | } | |||||||
957 | ||||||||
958 | /* For ordered loops, either __kmp_dispatch_finish() should be called after | |||||||
959 | * every iteration, or __kmp_dispatch_finish_chunk() should be called after | |||||||
960 | * every chunk of iterations. If the ordered section(s) were not executed | |||||||
961 | * for this iteration (or every iteration in this chunk), we need to set the | |||||||
962 | * ordered iteration counters so that the next thread can proceed. */ | |||||||
963 | template <typename UT> | |||||||
964 | static void __kmp_dispatch_finish(int gtid, ident_t *loc) { | |||||||
965 | typedef typename traits_t<UT>::signed_t ST; | |||||||
966 | kmp_info_t *th = __kmp_threads[gtid]; | |||||||
967 | ||||||||
968 | KD_TRACE(100, ("__kmp_dispatch_finish: T#%d called\n", gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d called\n" , gtid); }; | |||||||
969 | if (!th->th.th_team->t.t_serialized) { | |||||||
970 | ||||||||
971 | dispatch_private_info_template<UT> *pr = | |||||||
972 | reinterpret_cast<dispatch_private_info_template<UT> *>( | |||||||
973 | th->th.th_dispatch->th_dispatch_pr_current); | |||||||
974 | dispatch_shared_info_template<UT> volatile *sh = | |||||||
975 | reinterpret_cast<dispatch_shared_info_template<UT> volatile *>( | |||||||
976 | th->th.th_dispatch->th_dispatch_sh_current); | |||||||
977 | KMP_DEBUG_ASSERT(pr)if (!(pr)) { __kmp_debug_assert("pr", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 977); }; | |||||||
978 | KMP_DEBUG_ASSERT(sh)if (!(sh)) { __kmp_debug_assert("sh", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 978); }; | |||||||
979 | KMP_DEBUG_ASSERT(th->th.th_dispatch ==if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch [th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 980); } | |||||||
980 | &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid])if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch [th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 980); }; | |||||||
981 | ||||||||
982 | if (pr->ordered_bumped) { | |||||||
983 | KD_TRACE(if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n" , gtid); } | |||||||
984 | 1000,if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n" , gtid); } | |||||||
985 | ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n" , gtid); } | |||||||
986 | gtid))if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n" , gtid); }; | |||||||
987 | pr->ordered_bumped = 0; | |||||||
988 | } else { | |||||||
989 | UT lower = pr->u.p.ordered_lower; | |||||||
990 | ||||||||
991 | #ifdef KMP_DEBUG1 | |||||||
992 | { | |||||||
993 | char *buff; | |||||||
994 | // create format specifiers before the debug output | |||||||
995 | buff = __kmp_str_format("__kmp_dispatch_finish: T#%%d before wait: " | |||||||
996 | "ordered_iteration:%%%s lower:%%%s\n", | |||||||
997 | traits_t<UT>::spec, traits_t<UT>::spec); | |||||||
998 | KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower))if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid, sh->u.s.ordered_iteration, lower); }; | |||||||
999 | __kmp_str_free(&buff); | |||||||
1000 | } | |||||||
1001 | #endif | |||||||
1002 | ||||||||
1003 | __kmp_wait_yield<UT>(&sh->u.s.ordered_iteration, lower, | |||||||
1004 | __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL), __null); | |||||||
1005 | KMP_MB(); /* is this necessary? */ | |||||||
1006 | #ifdef KMP_DEBUG1 | |||||||
1007 | { | |||||||
1008 | char *buff; | |||||||
1009 | // create format specifiers before the debug output | |||||||
1010 | buff = __kmp_str_format("__kmp_dispatch_finish: T#%%d after wait: " | |||||||
1011 | "ordered_iteration:%%%s lower:%%%s\n", | |||||||
1012 | traits_t<UT>::spec, traits_t<UT>::spec); | |||||||
1013 | KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower))if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid, sh->u.s.ordered_iteration, lower); }; | |||||||
1014 | __kmp_str_free(&buff); | |||||||
1015 | } | |||||||
1016 | #endif | |||||||
1017 | ||||||||
1018 | test_then_inc<ST>((volatile ST *)&sh->u.s.ordered_iteration); | |||||||
1019 | } // if | |||||||
1020 | } // if | |||||||
1021 | KD_TRACE(100, ("__kmp_dispatch_finish: T#%d returned\n", gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d returned\n" , gtid); }; | |||||||
1022 | } | |||||||
1023 | ||||||||
1024 | #ifdef KMP_GOMP_COMPAT | |||||||
1025 | ||||||||
1026 | template <typename UT> | |||||||
1027 | static void __kmp_dispatch_finish_chunk(int gtid, ident_t *loc) { | |||||||
1028 | typedef typename traits_t<UT>::signed_t ST; | |||||||
1029 | kmp_info_t *th = __kmp_threads[gtid]; | |||||||
1030 | ||||||||
1031 | KD_TRACE(100, ("__kmp_dispatch_finish_chunk: T#%d called\n", gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_finish_chunk: T#%d called\n" , gtid); }; | |||||||
1032 | if (!th->th.th_team->t.t_serialized) { | |||||||
1033 | // int cid; | |||||||
1034 | dispatch_private_info_template<UT> *pr = | |||||||
1035 | reinterpret_cast<dispatch_private_info_template<UT> *>( | |||||||
1036 | th->th.th_dispatch->th_dispatch_pr_current); | |||||||
1037 | dispatch_shared_info_template<UT> volatile *sh = | |||||||
1038 | reinterpret_cast<dispatch_shared_info_template<UT> volatile *>( | |||||||
1039 | th->th.th_dispatch->th_dispatch_sh_current); | |||||||
1040 | KMP_DEBUG_ASSERT(pr)if (!(pr)) { __kmp_debug_assert("pr", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1040); }; | |||||||
1041 | KMP_DEBUG_ASSERT(sh)if (!(sh)) { __kmp_debug_assert("sh", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1041); }; | |||||||
1042 | KMP_DEBUG_ASSERT(th->th.th_dispatch ==if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch [th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1043); } | |||||||
1043 | &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid])if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch [th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1043); }; | |||||||
1044 | ||||||||
1045 | // for (cid = 0; cid < KMP_MAX_ORDERED; ++cid) { | |||||||
1046 | UT lower = pr->u.p.ordered_lower; | |||||||
1047 | UT upper = pr->u.p.ordered_upper; | |||||||
1048 | UT inc = upper - lower + 1; | |||||||
1049 | ||||||||
1050 | if (pr->ordered_bumped == inc) { | |||||||
1051 | KD_TRACE(if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n" , gtid); } | |||||||
1052 | 1000,if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n" , gtid); } | |||||||
1053 | ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n" , gtid); } | |||||||
1054 | gtid))if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n" , gtid); }; | |||||||
1055 | pr->ordered_bumped = 0; | |||||||
1056 | } else { | |||||||
1057 | inc -= pr->ordered_bumped; | |||||||
1058 | ||||||||
1059 | #ifdef KMP_DEBUG1 | |||||||
1060 | { | |||||||
1061 | char *buff; | |||||||
1062 | // create format specifiers before the debug output | |||||||
1063 | buff = __kmp_str_format( | |||||||
1064 | "__kmp_dispatch_finish_chunk: T#%%d before wait: " | |||||||
1065 | "ordered_iteration:%%%s lower:%%%s upper:%%%s\n", | |||||||
1066 | traits_t<UT>::spec, traits_t<UT>::spec, traits_t<UT>::spec); | |||||||
1067 | KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower, upper))if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid, sh->u.s.ordered_iteration, lower, upper); }; | |||||||
1068 | __kmp_str_free(&buff); | |||||||
1069 | } | |||||||
1070 | #endif | |||||||
1071 | ||||||||
1072 | __kmp_wait_yield<UT>(&sh->u.s.ordered_iteration, lower, | |||||||
1073 | __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL), __null); | |||||||
1074 | ||||||||
1075 | KMP_MB(); /* is this necessary? */ | |||||||
1076 | KD_TRACE(1000, ("__kmp_dispatch_finish_chunk: T#%d resetting "if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish_chunk: T#%d resetting " "ordered_bumped to zero\n", gtid); } | |||||||
1077 | "ordered_bumped to zero\n",if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish_chunk: T#%d resetting " "ordered_bumped to zero\n", gtid); } | |||||||
1078 | gtid))if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish_chunk: T#%d resetting " "ordered_bumped to zero\n", gtid); }; | |||||||
1079 | pr->ordered_bumped = 0; | |||||||
1080 | //!!!!! TODO check if the inc should be unsigned, or signed??? | |||||||
1081 | #ifdef KMP_DEBUG1 | |||||||
1082 | { | |||||||
1083 | char *buff; | |||||||
1084 | // create format specifiers before the debug output | |||||||
1085 | buff = __kmp_str_format( | |||||||
1086 | "__kmp_dispatch_finish_chunk: T#%%d after wait: " | |||||||
1087 | "ordered_iteration:%%%s inc:%%%s lower:%%%s upper:%%%s\n", | |||||||
1088 | traits_t<UT>::spec, traits_t<UT>::spec, traits_t<UT>::spec, | |||||||
1089 | traits_t<UT>::spec); | |||||||
1090 | KD_TRACE(1000,if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper); } | |||||||
1091 | (buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper))if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper); }; | |||||||
1092 | __kmp_str_free(&buff); | |||||||
1093 | } | |||||||
1094 | #endif | |||||||
1095 | ||||||||
1096 | test_then_add<ST>((volatile ST *)&sh->u.s.ordered_iteration, inc); | |||||||
1097 | } | |||||||
1098 | // } | |||||||
1099 | } | |||||||
1100 | KD_TRACE(100, ("__kmp_dispatch_finish_chunk: T#%d returned\n", gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_finish_chunk: T#%d returned\n" , gtid); }; | |||||||
1101 | } | |||||||
1102 | ||||||||
1103 | #endif /* KMP_GOMP_COMPAT */ | |||||||
1104 | ||||||||
1105 | template <typename T> | |||||||
1106 | int __kmp_dispatch_next_algorithm(int gtid, | |||||||
1107 | dispatch_private_info_template<T> *pr, | |||||||
1108 | dispatch_shared_info_template<T> volatile *sh, | |||||||
1109 | kmp_int32 *p_last, T *p_lb, T *p_ub, | |||||||
1110 | typename traits_t<T>::signed_t *p_st, T nproc, | |||||||
1111 | T tid) { | |||||||
1112 | typedef typename traits_t<T>::unsigned_t UT; | |||||||
1113 | typedef typename traits_t<T>::signed_t ST; | |||||||
1114 | typedef typename traits_t<T>::floating_t DBL; | |||||||
1115 | int status = 0; | |||||||
1116 | kmp_int32 last = 0; | |||||||
1117 | T start; | |||||||
1118 | ST incr; | |||||||
1119 | UT limit, trip, init; | |||||||
1120 | kmp_info_t *th = __kmp_threads[gtid]; | |||||||
1121 | kmp_team_t *team = th->th.th_team; | |||||||
1122 | ||||||||
1123 | KMP_DEBUG_ASSERT(th->th.th_dispatch ==if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch [th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1124); } | |||||||
1124 | &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid])if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch [th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1124); }; | |||||||
1125 | KMP_DEBUG_ASSERT(pr)if (!(pr)) { __kmp_debug_assert("pr", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1125); }; | |||||||
1126 | KMP_DEBUG_ASSERT(sh)if (!(sh)) { __kmp_debug_assert("sh", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1126); }; | |||||||
1127 | KMP_DEBUG_ASSERT(tid >= 0 && tid < nproc)if (!(tid >= 0 && tid < nproc)) { __kmp_debug_assert ("tid >= 0 && tid < nproc", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1127); }; | |||||||
1128 | #ifdef KMP_DEBUG1 | |||||||
1129 | { | |||||||
1130 | char *buff; | |||||||
1131 | // create format specifiers before the debug output | |||||||
1132 | buff = | |||||||
1133 | __kmp_str_format("__kmp_dispatch_next_algorithm: T#%%d called pr:%%p " | |||||||
1134 | "sh:%%p nproc:%%%s tid:%%%s\n", | |||||||
1135 | traits_t<T>::spec, traits_t<T>::spec); | |||||||
1136 | KD_TRACE(10, (buff, gtid, pr, sh, nproc, tid))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, pr , sh, nproc, tid); }; | |||||||
1137 | __kmp_str_free(&buff); | |||||||
1138 | } | |||||||
1139 | #endif | |||||||
1140 | ||||||||
1141 | // zero trip count | |||||||
1142 | if (pr->u.p.tc == 0) { | |||||||
1143 | KD_TRACE(10,if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d early exit trip count is " "zero status:%d\n", gtid, status); } | |||||||
1144 | ("__kmp_dispatch_next_algorithm: T#%d early exit trip count is "if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d early exit trip count is " "zero status:%d\n", gtid, status); } | |||||||
1145 | "zero status:%d\n",if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d early exit trip count is " "zero status:%d\n", gtid, status); } | |||||||
1146 | gtid, status))if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d early exit trip count is " "zero status:%d\n", gtid, status); }; | |||||||
1147 | return 0; | |||||||
1148 | } | |||||||
1149 | ||||||||
1150 | switch (pr->schedule) { | |||||||
1151 | #if (KMP_STATIC_STEAL_ENABLED1) | |||||||
1152 | case kmp_sch_static_steal: { | |||||||
1153 | T chunk = pr->u.p.parm1; | |||||||
1154 | ||||||||
1155 | KD_TRACE(100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_steal case\n" , gtid); } | |||||||
1156 | ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_steal case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_steal case\n" , gtid); } | |||||||
1157 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_steal case\n" , gtid); }; | |||||||
1158 | ||||||||
1159 | trip = pr->u.p.tc - 1; | |||||||
1160 | ||||||||
1161 | if (traits_t<T>::type_size > 4) { | |||||||
1162 | // use lock for 8-byte and CAS for 4-byte induction | |||||||
1163 | // variable. TODO (optional): check and use 16-byte CAS | |||||||
1164 | kmp_lock_t *lck = th->th.th_dispatch->th_steal_lock; | |||||||
1165 | KMP_DEBUG_ASSERT(lck != NULL)if (!(lck != __null)) { __kmp_debug_assert("lck != __null", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1165); }; | |||||||
1166 | if (pr->u.p.count < (UT)pr->u.p.ub) { | |||||||
1167 | __kmp_acquire_lock(lck, gtid); | |||||||
1168 | // try to get own chunk of iterations | |||||||
1169 | init = (pr->u.p.count)++; | |||||||
1170 | status = (init < (UT)pr->u.p.ub); | |||||||
1171 | __kmp_release_lock(lck, gtid); | |||||||
1172 | } else { | |||||||
1173 | status = 0; // no own chunks | |||||||
1174 | } | |||||||
1175 | if (!status) { // try to steal | |||||||
1176 | kmp_info_t **other_threads = team->t.t_threads; | |||||||
1177 | int while_limit = nproc; // nproc attempts to find a victim | |||||||
1178 | int while_index = 0; | |||||||
1179 | // TODO: algorithm of searching for a victim | |||||||
1180 | // should be cleaned up and measured | |||||||
1181 | while ((!status) && (while_limit != ++while_index)) { | |||||||
1182 | T remaining; | |||||||
1183 | T victimIdx = pr->u.p.parm4; | |||||||
1184 | T oldVictimIdx = victimIdx ? victimIdx - 1 : nproc - 1; | |||||||
1185 | dispatch_private_info_template<T> *victim = | |||||||
1186 | reinterpret_cast<dispatch_private_info_template<T> *>( | |||||||
1187 | other_threads[victimIdx] | |||||||
1188 | ->th.th_dispatch->th_dispatch_pr_current); | |||||||
1189 | while ((victim == NULL__null || victim == pr || | |||||||
1190 | (*(volatile T *)&victim->u.p.static_steal_counter != | |||||||
1191 | *(volatile T *)&pr->u.p.static_steal_counter)) && | |||||||
1192 | oldVictimIdx != victimIdx) { | |||||||
1193 | victimIdx = (victimIdx + 1) % nproc; | |||||||
1194 | victim = reinterpret_cast<dispatch_private_info_template<T> *>( | |||||||
1195 | other_threads[victimIdx] | |||||||
1196 | ->th.th_dispatch->th_dispatch_pr_current); | |||||||
1197 | } | |||||||
1198 | if (!victim || (*(volatile T *)&victim->u.p.static_steal_counter != | |||||||
1199 | *(volatile T *)&pr->u.p.static_steal_counter)) { | |||||||
1200 | continue; // try once more (nproc attempts in total) | |||||||
1201 | // no victim is ready yet to participate in stealing | |||||||
1202 | // because all victims are still in kmp_init_dispatch | |||||||
1203 | } | |||||||
1204 | if (victim->u.p.count + 2 > (UT)victim->u.p.ub) { | |||||||
1205 | pr->u.p.parm4 = (victimIdx + 1) % nproc; // shift start tid | |||||||
1206 | continue; // not enough chunks to steal, goto next victim | |||||||
1207 | } | |||||||
1208 | ||||||||
1209 | lck = other_threads[victimIdx]->th.th_dispatch->th_steal_lock; | |||||||
1210 | KMP_ASSERT(lck != NULL)if (!(lck != __null)) { __kmp_debug_assert("lck != NULL", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1210); }; | |||||||
1211 | __kmp_acquire_lock(lck, gtid); | |||||||
1212 | limit = victim->u.p.ub; // keep initial ub | |||||||
1213 | if (victim->u.p.count >= limit || | |||||||
1214 | (remaining = limit - victim->u.p.count) < 2) { | |||||||
1215 | __kmp_release_lock(lck, gtid); | |||||||
1216 | pr->u.p.parm4 = (victimIdx + 1) % nproc; // next victim | |||||||
1217 | continue; // not enough chunks to steal | |||||||
1218 | } | |||||||
1219 | // stealing succeded, reduce victim's ub by 1/4 of undone chunks or | |||||||
1220 | // by 1 | |||||||
1221 | if (remaining > 3) { | |||||||
1222 | // steal 1/4 of remaining | |||||||
1223 | KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen, remaining >> 2)((void)0); | |||||||
1224 | init = (victim->u.p.ub -= (remaining >> 2)); | |||||||
1225 | } else { | |||||||
1226 | // steal 1 chunk of 2 or 3 remaining | |||||||
1227 | KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen, 1)((void)0); | |||||||
1228 | init = (victim->u.p.ub -= 1); | |||||||
1229 | } | |||||||
1230 | __kmp_release_lock(lck, gtid); | |||||||
1231 | ||||||||
1232 | KMP_DEBUG_ASSERT(init + 1 <= limit)if (!(init + 1 <= limit)) { __kmp_debug_assert("init + 1 <= limit" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1232); }; | |||||||
1233 | pr->u.p.parm4 = victimIdx; // remember victim to steal from | |||||||
1234 | status = 1; | |||||||
1235 | while_index = 0; | |||||||
1236 | // now update own count and ub with stolen range but init chunk | |||||||
1237 | __kmp_acquire_lock(th->th.th_dispatch->th_steal_lock, gtid); | |||||||
1238 | pr->u.p.count = init + 1; | |||||||
1239 | pr->u.p.ub = limit; | |||||||
1240 | __kmp_release_lock(th->th.th_dispatch->th_steal_lock, gtid); | |||||||
1241 | } // while (search for victim) | |||||||
1242 | } // if (try to find victim and steal) | |||||||
1243 | } else { | |||||||
1244 | // 4-byte induction variable, use 8-byte CAS for pair (count, ub) | |||||||
1245 | typedef union { | |||||||
1246 | struct { | |||||||
1247 | UT count; | |||||||
1248 | T ub; | |||||||
1249 | } p; | |||||||
1250 | kmp_int64 b; | |||||||
1251 | } union_i4; | |||||||
1252 | // All operations on 'count' or 'ub' must be combined atomically | |||||||
1253 | // together. | |||||||
1254 | { | |||||||
1255 | union_i4 vold, vnew; | |||||||
1256 | vold.b = *(volatile kmp_int64 *)(&pr->u.p.count); | |||||||
1257 | vnew = vold; | |||||||
1258 | vnew.p.count++; | |||||||
1259 | while (!KMP_COMPARE_AND_STORE_ACQ64(__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile kmp_int64 *)&pr->u.p.count), (kmp_uint64)(*(kmp_int64 *) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b)) | |||||||
1260 | (volatile kmp_int64 *)&pr->u.p.count,__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile kmp_int64 *)&pr->u.p.count), (kmp_uint64)(*(kmp_int64 *) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b)) | |||||||
1261 | *VOLATILE_CAST(kmp_int64 *) & vold.b,__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile kmp_int64 *)&pr->u.p.count), (kmp_uint64)(*(kmp_int64 *) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b)) | |||||||
1262 | *VOLATILE_CAST(kmp_int64 *) & vnew.b)__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile kmp_int64 *)&pr->u.p.count), (kmp_uint64)(*(kmp_int64 *) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b))) { | |||||||
1263 | KMP_CPU_PAUSE()__kmp_x86_pause(); | |||||||
1264 | vold.b = *(volatile kmp_int64 *)(&pr->u.p.count); | |||||||
1265 | vnew = vold; | |||||||
1266 | vnew.p.count++; | |||||||
1267 | } | |||||||
1268 | vnew = vold; | |||||||
1269 | init = vnew.p.count; | |||||||
1270 | status = (init < (UT)vnew.p.ub); | |||||||
1271 | } | |||||||
1272 | ||||||||
1273 | if (!status) { | |||||||
1274 | kmp_info_t **other_threads = team->t.t_threads; | |||||||
1275 | int while_limit = nproc; // nproc attempts to find a victim | |||||||
1276 | int while_index = 0; | |||||||
1277 | ||||||||
1278 | // TODO: algorithm of searching for a victim | |||||||
1279 | // should be cleaned up and measured | |||||||
1280 | while ((!status) && (while_limit != ++while_index)) { | |||||||
1281 | union_i4 vold, vnew; | |||||||
1282 | kmp_int32 remaining; | |||||||
1283 | T victimIdx = pr->u.p.parm4; | |||||||
1284 | T oldVictimIdx = victimIdx ? victimIdx - 1 : nproc - 1; | |||||||
1285 | dispatch_private_info_template<T> *victim = | |||||||
1286 | reinterpret_cast<dispatch_private_info_template<T> *>( | |||||||
1287 | other_threads[victimIdx] | |||||||
1288 | ->th.th_dispatch->th_dispatch_pr_current); | |||||||
1289 | while ((victim == NULL__null || victim == pr || | |||||||
1290 | (*(volatile T *)&victim->u.p.static_steal_counter != | |||||||
1291 | *(volatile T *)&pr->u.p.static_steal_counter)) && | |||||||
1292 | oldVictimIdx != victimIdx) { | |||||||
1293 | victimIdx = (victimIdx + 1) % nproc; | |||||||
1294 | victim = reinterpret_cast<dispatch_private_info_template<T> *>( | |||||||
1295 | other_threads[victimIdx] | |||||||
1296 | ->th.th_dispatch->th_dispatch_pr_current); | |||||||
1297 | } | |||||||
1298 | if (!victim || (*(volatile T *)&victim->u.p.static_steal_counter != | |||||||
1299 | *(volatile T *)&pr->u.p.static_steal_counter)) { | |||||||
1300 | continue; // try once more (nproc attempts in total) | |||||||
1301 | // no victim is ready yet to participate in stealing | |||||||
1302 | // because all victims are still in kmp_init_dispatch | |||||||
1303 | } | |||||||
1304 | pr->u.p.parm4 = victimIdx; // new victim found | |||||||
1305 | while (1) { // CAS loop if victim has enough chunks to steal | |||||||
1306 | vold.b = *(volatile kmp_int64 *)(&victim->u.p.count); | |||||||
1307 | vnew = vold; | |||||||
1308 | ||||||||
1309 | KMP_DEBUG_ASSERT((vnew.p.ub - 1) * (UT)chunk <= trip)if (!((vnew.p.ub - 1) * (UT)chunk <= trip)) { __kmp_debug_assert ("(vnew.p.ub - 1) * (UT)chunk <= trip", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1309); }; | |||||||
1310 | if (vnew.p.count >= (UT)vnew.p.ub || | |||||||
1311 | (remaining = vnew.p.ub - vnew.p.count) < 2) { | |||||||
1312 | pr->u.p.parm4 = (victimIdx + 1) % nproc; // shift start victim id | |||||||
1313 | break; // not enough chunks to steal, goto next victim | |||||||
1314 | } | |||||||
1315 | if (remaining > 3) { | |||||||
1316 | vnew.p.ub -= (remaining >> 2); // try to steal 1/4 of remaining | |||||||
1317 | } else { | |||||||
1318 | vnew.p.ub -= 1; // steal 1 chunk of 2 or 3 remaining | |||||||
1319 | } | |||||||
1320 | KMP_DEBUG_ASSERT((vnew.p.ub - 1) * (UT)chunk <= trip)if (!((vnew.p.ub - 1) * (UT)chunk <= trip)) { __kmp_debug_assert ("(vnew.p.ub - 1) * (UT)chunk <= trip", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1320); }; | |||||||
1321 | // TODO: Should this be acquire or release? | |||||||
1322 | if (KMP_COMPARE_AND_STORE_ACQ64(__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile kmp_int64 *)&victim->u.p.count), (kmp_uint64)(*(kmp_int64 *) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b)) | |||||||
1323 | (volatile kmp_int64 *)&victim->u.p.count,__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile kmp_int64 *)&victim->u.p.count), (kmp_uint64)(*(kmp_int64 *) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b)) | |||||||
1324 | *VOLATILE_CAST(kmp_int64 *) & vold.b,__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile kmp_int64 *)&victim->u.p.count), (kmp_uint64)(*(kmp_int64 *) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b)) | |||||||
1325 | *VOLATILE_CAST(kmp_int64 *) & vnew.b)__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile kmp_int64 *)&victim->u.p.count), (kmp_uint64)(*(kmp_int64 *) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b))) { | |||||||
1326 | // stealing succedded | |||||||
1327 | KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen,((void)0) | |||||||
1328 | vold.p.ub - vnew.p.ub)((void)0); | |||||||
1329 | status = 1; | |||||||
1330 | while_index = 0; | |||||||
1331 | // now update own count and ub | |||||||
1332 | init = vnew.p.ub; | |||||||
1333 | vold.p.count = init + 1; | |||||||
1334 | #if KMP_ARCH_X860 | |||||||
1335 | KMP_XCHG_FIXED64((volatile kmp_int64 *)(&pr->u.p.count), vold.b)__sync_lock_test_and_set((volatile kmp_uint64 *)((volatile kmp_int64 *)(&pr->u.p.count)), (kmp_uint64)(vold.b)); | |||||||
1336 | #else | |||||||
1337 | *(volatile kmp_int64 *)(&pr->u.p.count) = vold.b; | |||||||
1338 | #endif | |||||||
1339 | break; | |||||||
1340 | } // if (check CAS result) | |||||||
1341 | KMP_CPU_PAUSE()__kmp_x86_pause(); // CAS failed, repeate attempt | |||||||
1342 | } // while (try to steal from particular victim) | |||||||
1343 | } // while (search for victim) | |||||||
1344 | } // if (try to find victim and steal) | |||||||
1345 | } // if (4-byte induction variable) | |||||||
1346 | if (!status) { | |||||||
1347 | *p_lb = 0; | |||||||
1348 | *p_ub = 0; | |||||||
1349 | if (p_st != NULL__null) | |||||||
1350 | *p_st = 0; | |||||||
1351 | } else { | |||||||
1352 | start = pr->u.p.parm2; | |||||||
1353 | init *= chunk; | |||||||
1354 | limit = chunk + init - 1; | |||||||
1355 | incr = pr->u.p.st; | |||||||
1356 | KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_chunks, 1)((void)0); | |||||||
1357 | ||||||||
1358 | KMP_DEBUG_ASSERT(init <= trip)if (!(init <= trip)) { __kmp_debug_assert("init <= trip" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1358); }; | |||||||
1359 | if ((last = (limit >= trip)) != 0) | |||||||
1360 | limit = trip; | |||||||
1361 | if (p_st != NULL__null) | |||||||
1362 | *p_st = incr; | |||||||
1363 | ||||||||
1364 | if (incr == 1) { | |||||||
1365 | *p_lb = start + init; | |||||||
1366 | *p_ub = start + limit; | |||||||
1367 | } else { | |||||||
1368 | *p_lb = start + init * incr; | |||||||
1369 | *p_ub = start + limit * incr; | |||||||
1370 | } | |||||||
1371 | ||||||||
1372 | if (pr->flags.ordered) { | |||||||
1373 | pr->u.p.ordered_lower = init; | |||||||
1374 | pr->u.p.ordered_upper = limit; | |||||||
1375 | } // if | |||||||
1376 | } // if | |||||||
1377 | break; | |||||||
1378 | } // case | |||||||
1379 | #endif // ( KMP_STATIC_STEAL_ENABLED ) | |||||||
1380 | case kmp_sch_static_balanced: { | |||||||
1381 | KD_TRACE(if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n" , gtid); } | |||||||
1382 | 10,if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n" , gtid); } | |||||||
1383 | ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n",if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n" , gtid); } | |||||||
1384 | gtid))if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n" , gtid); }; | |||||||
1385 | /* check if thread has any iteration to do */ | |||||||
1386 | if ((status = !pr->u.p.count) != 0) { | |||||||
1387 | pr->u.p.count = 1; | |||||||
1388 | *p_lb = pr->u.p.lb; | |||||||
1389 | *p_ub = pr->u.p.ub; | |||||||
1390 | last = pr->u.p.parm1; | |||||||
1391 | if (p_st != NULL__null) | |||||||
1392 | *p_st = pr->u.p.st; | |||||||
1393 | } else { /* no iterations to do */ | |||||||
1394 | pr->u.p.lb = pr->u.p.ub + pr->u.p.st; | |||||||
1395 | } | |||||||
1396 | } // case | |||||||
1397 | break; | |||||||
1398 | case kmp_sch_static_greedy: /* original code for kmp_sch_static_greedy was | |||||||
1399 | merged here */ | |||||||
1400 | case kmp_sch_static_chunked: { | |||||||
1401 | T parm1; | |||||||
1402 | ||||||||
1403 | KD_TRACE(100, ("__kmp_dispatch_next_algorithm: T#%d "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d " "kmp_sch_static_[affinity|chunked] case\n", gtid); } | |||||||
1404 | "kmp_sch_static_[affinity|chunked] case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d " "kmp_sch_static_[affinity|chunked] case\n", gtid); } | |||||||
1405 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d " "kmp_sch_static_[affinity|chunked] case\n", gtid); }; | |||||||
1406 | parm1 = pr->u.p.parm1; | |||||||
1407 | ||||||||
1408 | trip = pr->u.p.tc - 1; | |||||||
1409 | init = parm1 * (pr->u.p.count + tid); | |||||||
1410 | ||||||||
1411 | if ((status = (init <= trip)) != 0) { | |||||||
1412 | start = pr->u.p.lb; | |||||||
1413 | incr = pr->u.p.st; | |||||||
1414 | limit = parm1 + init - 1; | |||||||
1415 | ||||||||
1416 | if ((last = (limit >= trip)) != 0) | |||||||
1417 | limit = trip; | |||||||
1418 | ||||||||
1419 | if (p_st != NULL__null) | |||||||
1420 | *p_st = incr; | |||||||
1421 | ||||||||
1422 | pr->u.p.count += nproc; | |||||||
1423 | ||||||||
1424 | if (incr == 1) { | |||||||
1425 | *p_lb = start + init; | |||||||
1426 | *p_ub = start + limit; | |||||||
1427 | } else { | |||||||
1428 | *p_lb = start + init * incr; | |||||||
1429 | *p_ub = start + limit * incr; | |||||||
1430 | } | |||||||
1431 | ||||||||
1432 | if (pr->flags.ordered) { | |||||||
1433 | pr->u.p.ordered_lower = init; | |||||||
1434 | pr->u.p.ordered_upper = limit; | |||||||
1435 | } // if | |||||||
1436 | } // if | |||||||
1437 | } // case | |||||||
1438 | break; | |||||||
1439 | ||||||||
1440 | case kmp_sch_dynamic_chunked: { | |||||||
1441 | T chunk = pr->u.p.parm1; | |||||||
1442 | ||||||||
1443 | KD_TRACE(if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n" , gtid); } | |||||||
1444 | 100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n" , gtid); } | |||||||
1445 | ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n" , gtid); } | |||||||
1446 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n" , gtid); }; | |||||||
1447 | ||||||||
1448 | init = chunk * test_then_inc_acq<ST>((volatile ST *)&sh->u.s.iteration); | |||||||
1449 | trip = pr->u.p.tc - 1; | |||||||
1450 | ||||||||
1451 | if ((status = (init <= trip)) == 0) { | |||||||
1452 | *p_lb = 0; | |||||||
1453 | *p_ub = 0; | |||||||
1454 | if (p_st != NULL__null) | |||||||
1455 | *p_st = 0; | |||||||
1456 | } else { | |||||||
1457 | start = pr->u.p.lb; | |||||||
1458 | limit = chunk + init - 1; | |||||||
1459 | incr = pr->u.p.st; | |||||||
1460 | ||||||||
1461 | if ((last = (limit >= trip)) != 0) | |||||||
1462 | limit = trip; | |||||||
1463 | ||||||||
1464 | if (p_st != NULL__null) | |||||||
1465 | *p_st = incr; | |||||||
1466 | ||||||||
1467 | if (incr == 1) { | |||||||
1468 | *p_lb = start + init; | |||||||
1469 | *p_ub = start + limit; | |||||||
1470 | } else { | |||||||
1471 | *p_lb = start + init * incr; | |||||||
1472 | *p_ub = start + limit * incr; | |||||||
1473 | } | |||||||
1474 | ||||||||
1475 | if (pr->flags.ordered) { | |||||||
1476 | pr->u.p.ordered_lower = init; | |||||||
1477 | pr->u.p.ordered_upper = limit; | |||||||
1478 | } // if | |||||||
1479 | } // if | |||||||
1480 | } // case | |||||||
1481 | break; | |||||||
1482 | ||||||||
1483 | case kmp_sch_guided_iterative_chunked: { | |||||||
1484 | T chunkspec = pr->u.p.parm1; | |||||||
1485 | KD_TRACE(100, ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_chunked "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_chunked " "iterative case\n", gtid); } | |||||||
1486 | "iterative case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_chunked " "iterative case\n", gtid); } | |||||||
1487 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_chunked " "iterative case\n", gtid); }; | |||||||
1488 | trip = pr->u.p.tc; | |||||||
1489 | // Start atomic part of calculations | |||||||
1490 | while (1) { | |||||||
1491 | ST remaining; // signed, because can be < 0 | |||||||
1492 | init = sh->u.s.iteration; // shared value | |||||||
1493 | remaining = trip - init; | |||||||
1494 | if (remaining <= 0) { // AC: need to compare with 0 first | |||||||
1495 | // nothing to do, don't try atomic op | |||||||
1496 | status = 0; | |||||||
1497 | break; | |||||||
1498 | } | |||||||
1499 | if ((T)remaining < | |||||||
1500 | pr->u.p.parm2) { // compare with K*nproc*(chunk+1), K=2 by default | |||||||
1501 | // use dynamic-style shcedule | |||||||
1502 | // atomically inrement iterations, get old value | |||||||
1503 | init = test_then_add<ST>(RCAST(volatile ST *, &sh->u.s.iteration)reinterpret_cast<volatile ST *>(&sh->u.s.iteration ), | |||||||
1504 | (ST)chunkspec); | |||||||
1505 | remaining = trip - init; | |||||||
1506 | if (remaining <= 0) { | |||||||
1507 | status = 0; // all iterations got by other threads | |||||||
1508 | } else { | |||||||
1509 | // got some iterations to work on | |||||||
1510 | status = 1; | |||||||
1511 | if ((T)remaining > chunkspec) { | |||||||
1512 | limit = init + chunkspec - 1; | |||||||
1513 | } else { | |||||||
1514 | last = 1; // the last chunk | |||||||
1515 | limit = init + remaining - 1; | |||||||
1516 | } // if | |||||||
1517 | } // if | |||||||
1518 | break; | |||||||
1519 | } // if | |||||||
1520 | limit = init + | |||||||
1521 | (UT)(remaining * *(double *)&pr->u.p.parm3); // divide by K*nproc | |||||||
1522 | if (compare_and_swap<ST>(RCAST(volatile ST *, &sh->u.s.iteration)reinterpret_cast<volatile ST *>(&sh->u.s.iteration ), | |||||||
1523 | (ST)init, (ST)limit)) { | |||||||
1524 | // CAS was successful, chunk obtained | |||||||
1525 | status = 1; | |||||||
1526 | --limit; | |||||||
1527 | break; | |||||||
1528 | } // if | |||||||
1529 | } // while | |||||||
1530 | if (status != 0) { | |||||||
1531 | start = pr->u.p.lb; | |||||||
1532 | incr = pr->u.p.st; | |||||||
1533 | if (p_st != NULL__null) | |||||||
1534 | *p_st = incr; | |||||||
1535 | *p_lb = start + init * incr; | |||||||
1536 | *p_ub = start + limit * incr; | |||||||
1537 | if (pr->flags.ordered) { | |||||||
1538 | pr->u.p.ordered_lower = init; | |||||||
1539 | pr->u.p.ordered_upper = limit; | |||||||
1540 | } // if | |||||||
1541 | } else { | |||||||
1542 | *p_lb = 0; | |||||||
1543 | *p_ub = 0; | |||||||
1544 | if (p_st != NULL__null) | |||||||
1545 | *p_st = 0; | |||||||
1546 | } // if | |||||||
1547 | } // case | |||||||
1548 | break; | |||||||
1549 | ||||||||
1550 | #if OMP_45_ENABLED(50 >= 45) | |||||||
1551 | case kmp_sch_guided_simd: { | |||||||
1552 | // same as iterative but curr-chunk adjusted to be multiple of given | |||||||
1553 | // chunk | |||||||
1554 | T chunk = pr->u.p.parm1; | |||||||
1555 | KD_TRACE(100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_simd case\n" , gtid); } | |||||||
1556 | ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_simd case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_simd case\n" , gtid); } | |||||||
1557 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_simd case\n" , gtid); }; | |||||||
1558 | trip = pr->u.p.tc; | |||||||
1559 | // Start atomic part of calculations | |||||||
1560 | while (1) { | |||||||
1561 | ST remaining; // signed, because can be < 0 | |||||||
1562 | init = sh->u.s.iteration; // shared value | |||||||
1563 | remaining = trip - init; | |||||||
1564 | if (remaining <= 0) { // AC: need to compare with 0 first | |||||||
1565 | status = 0; // nothing to do, don't try atomic op | |||||||
1566 | break; | |||||||
1567 | } | |||||||
1568 | KMP_DEBUG_ASSERT(init % chunk == 0)if (!(init % chunk == 0)) { __kmp_debug_assert("init % chunk == 0" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1568); }; | |||||||
1569 | // compare with K*nproc*(chunk+1), K=2 by default | |||||||
1570 | if ((T)remaining < pr->u.p.parm2) { | |||||||
1571 | // use dynamic-style shcedule | |||||||
1572 | // atomically inrement iterations, get old value | |||||||
1573 | init = test_then_add<ST>(RCAST(volatile ST *, &sh->u.s.iteration)reinterpret_cast<volatile ST *>(&sh->u.s.iteration ), | |||||||
1574 | (ST)chunk); | |||||||
1575 | remaining = trip - init; | |||||||
1576 | if (remaining <= 0) { | |||||||
1577 | status = 0; // all iterations got by other threads | |||||||
1578 | } else { | |||||||
1579 | // got some iterations to work on | |||||||
1580 | status = 1; | |||||||
1581 | if ((T)remaining > chunk) { | |||||||
1582 | limit = init + chunk - 1; | |||||||
1583 | } else { | |||||||
1584 | last = 1; // the last chunk | |||||||
1585 | limit = init + remaining - 1; | |||||||
1586 | } // if | |||||||
1587 | } // if | |||||||
1588 | break; | |||||||
1589 | } // if | |||||||
1590 | // divide by K*nproc | |||||||
1591 | UT span = remaining * (*(double *)&pr->u.p.parm3); | |||||||
1592 | UT rem = span % chunk; | |||||||
1593 | if (rem) // adjust so that span%chunk == 0 | |||||||
1594 | span += chunk - rem; | |||||||
1595 | limit = init + span; | |||||||
1596 | if (compare_and_swap<ST>(RCAST(volatile ST *, &sh->u.s.iteration)reinterpret_cast<volatile ST *>(&sh->u.s.iteration ), | |||||||
1597 | (ST)init, (ST)limit)) { | |||||||
1598 | // CAS was successful, chunk obtained | |||||||
1599 | status = 1; | |||||||
1600 | --limit; | |||||||
1601 | break; | |||||||
1602 | } // if | |||||||
1603 | } // while | |||||||
1604 | if (status != 0) { | |||||||
1605 | start = pr->u.p.lb; | |||||||
1606 | incr = pr->u.p.st; | |||||||
1607 | if (p_st != NULL__null) | |||||||
1608 | *p_st = incr; | |||||||
1609 | *p_lb = start + init * incr; | |||||||
1610 | *p_ub = start + limit * incr; | |||||||
1611 | if (pr->flags.ordered) { | |||||||
1612 | pr->u.p.ordered_lower = init; | |||||||
1613 | pr->u.p.ordered_upper = limit; | |||||||
1614 | } // if | |||||||
1615 | } else { | |||||||
1616 | *p_lb = 0; | |||||||
1617 | *p_ub = 0; | |||||||
1618 | if (p_st != NULL__null) | |||||||
1619 | *p_st = 0; | |||||||
1620 | } // if | |||||||
1621 | } // case | |||||||
1622 | break; | |||||||
1623 | #endif // OMP_45_ENABLED | |||||||
1624 | ||||||||
1625 | case kmp_sch_guided_analytical_chunked: { | |||||||
1626 | T chunkspec = pr->u.p.parm1; | |||||||
1627 | UT chunkIdx; | |||||||
1628 | #if KMP_OS_WINDOWS0 && KMP_ARCH_X860 | |||||||
1629 | /* for storing original FPCW value for Windows* OS on | |||||||
1630 | IA-32 architecture 8-byte version */ | |||||||
1631 | unsigned int oldFpcw; | |||||||
1632 | unsigned int fpcwSet = 0; | |||||||
1633 | #endif | |||||||
1634 | KD_TRACE(100, ("__kmp_dispatch_next_algorithm: T#%d "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d " "kmp_sch_guided_analytical_chunked case\n", gtid); } | |||||||
1635 | "kmp_sch_guided_analytical_chunked case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d " "kmp_sch_guided_analytical_chunked case\n", gtid); } | |||||||
1636 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d " "kmp_sch_guided_analytical_chunked case\n", gtid); }; | |||||||
1637 | ||||||||
1638 | trip = pr->u.p.tc; | |||||||
1639 | ||||||||
1640 | KMP_DEBUG_ASSERT(nproc > 1)if (!(nproc > 1)) { __kmp_debug_assert("nproc > 1", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1640); }; | |||||||
1641 | KMP_DEBUG_ASSERT((2UL * chunkspec + 1) * (UT)nproc < trip)if (!((2UL * chunkspec + 1) * (UT)nproc < trip)) { __kmp_debug_assert ("(2UL * chunkspec + 1) * (UT)nproc < trip", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1641); }; | |||||||
1642 | ||||||||
1643 | while (1) { /* this while loop is a safeguard against unexpected zero | |||||||
1644 | chunk sizes */ | |||||||
1645 | chunkIdx = test_then_inc_acq<ST>((volatile ST *)&sh->u.s.iteration); | |||||||
1646 | if (chunkIdx >= (UT)pr->u.p.parm2) { | |||||||
1647 | --trip; | |||||||
1648 | /* use dynamic-style scheduling */ | |||||||
1649 | init = chunkIdx * chunkspec + pr->u.p.count; | |||||||
1650 | /* need to verify init > 0 in case of overflow in the above | |||||||
1651 | * calculation */ | |||||||
1652 | if ((status = (init > 0 && init <= trip)) != 0) { | |||||||
1653 | limit = init + chunkspec - 1; | |||||||
1654 | ||||||||
1655 | if ((last = (limit >= trip)) != 0) | |||||||
1656 | limit = trip; | |||||||
1657 | } | |||||||
1658 | break; | |||||||
1659 | } else { | |||||||
1660 | /* use exponential-style scheduling */ | |||||||
1661 | /* The following check is to workaround the lack of long double precision on | |||||||
1662 | Windows* OS. | |||||||
1663 | This check works around the possible effect that init != 0 for chunkIdx == 0. | |||||||
1664 | */ | |||||||
1665 | #if KMP_OS_WINDOWS0 && KMP_ARCH_X860 | |||||||
1666 | /* If we haven't already done so, save original | |||||||
1667 | FPCW and set precision to 64-bit, as Windows* OS | |||||||
1668 | on IA-32 architecture defaults to 53-bit */ | |||||||
1669 | if (!fpcwSet) { | |||||||
1670 | oldFpcw = _control87(0, 0); | |||||||
1671 | _control87(_PC_64, _MCW_PC); | |||||||
1672 | fpcwSet = 0x30000; | |||||||
1673 | } | |||||||
1674 | #endif | |||||||
1675 | if (chunkIdx) { | |||||||
1676 | init = __kmp_dispatch_guided_remaining<T>( | |||||||
1677 | trip, *(DBL *)&pr->u.p.parm3, chunkIdx); | |||||||
1678 | KMP_DEBUG_ASSERT(init)if (!(init)) { __kmp_debug_assert("init", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1678); }; | |||||||
1679 | init = trip - init; | |||||||
1680 | } else | |||||||
1681 | init = 0; | |||||||
1682 | limit = trip - __kmp_dispatch_guided_remaining<T>( | |||||||
1683 | trip, *(DBL *)&pr->u.p.parm3, chunkIdx + 1); | |||||||
1684 | KMP_ASSERT(init <= limit)if (!(init <= limit)) { __kmp_debug_assert("init <= limit" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1684); }; | |||||||
1685 | if (init < limit) { | |||||||
1686 | KMP_DEBUG_ASSERT(limit <= trip)if (!(limit <= trip)) { __kmp_debug_assert("limit <= trip" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1686); }; | |||||||
1687 | --limit; | |||||||
1688 | status = 1; | |||||||
1689 | break; | |||||||
1690 | } // if | |||||||
1691 | } // if | |||||||
1692 | } // while (1) | |||||||
1693 | #if KMP_OS_WINDOWS0 && KMP_ARCH_X860 | |||||||
1694 | /* restore FPCW if necessary | |||||||
1695 | AC: check fpcwSet flag first because oldFpcw can be uninitialized here | |||||||
1696 | */ | |||||||
1697 | if (fpcwSet && (oldFpcw & fpcwSet)) | |||||||
1698 | _control87(oldFpcw, _MCW_PC); | |||||||
1699 | #endif | |||||||
1700 | if (status != 0) { | |||||||
1701 | start = pr->u.p.lb; | |||||||
1702 | incr = pr->u.p.st; | |||||||
1703 | if (p_st != NULL__null) | |||||||
1704 | *p_st = incr; | |||||||
1705 | *p_lb = start + init * incr; | |||||||
1706 | *p_ub = start + limit * incr; | |||||||
1707 | if (pr->flags.ordered) { | |||||||
1708 | pr->u.p.ordered_lower = init; | |||||||
1709 | pr->u.p.ordered_upper = limit; | |||||||
1710 | } | |||||||
1711 | } else { | |||||||
1712 | *p_lb = 0; | |||||||
1713 | *p_ub = 0; | |||||||
1714 | if (p_st != NULL__null) | |||||||
1715 | *p_st = 0; | |||||||
1716 | } | |||||||
1717 | } // case | |||||||
1718 | break; | |||||||
1719 | ||||||||
1720 | case kmp_sch_trapezoidal: { | |||||||
1721 | UT index; | |||||||
1722 | T parm2 = pr->u.p.parm2; | |||||||
1723 | T parm3 = pr->u.p.parm3; | |||||||
1724 | T parm4 = pr->u.p.parm4; | |||||||
1725 | KD_TRACE(100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_trapezoidal case\n" , gtid); } | |||||||
1726 | ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_trapezoidal case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_trapezoidal case\n" , gtid); } | |||||||
1727 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_trapezoidal case\n" , gtid); }; | |||||||
1728 | ||||||||
1729 | index = test_then_inc<ST>((volatile ST *)&sh->u.s.iteration); | |||||||
1730 | ||||||||
1731 | init = (index * ((2 * parm2) - (index - 1) * parm4)) / 2; | |||||||
1732 | trip = pr->u.p.tc - 1; | |||||||
1733 | ||||||||
1734 | if ((status = ((T)index < parm3 && init <= trip)) == 0) { | |||||||
1735 | *p_lb = 0; | |||||||
1736 | *p_ub = 0; | |||||||
1737 | if (p_st != NULL__null) | |||||||
1738 | *p_st = 0; | |||||||
1739 | } else { | |||||||
1740 | start = pr->u.p.lb; | |||||||
1741 | limit = ((index + 1) * (2 * parm2 - index * parm4)) / 2 - 1; | |||||||
1742 | incr = pr->u.p.st; | |||||||
1743 | ||||||||
1744 | if ((last = (limit >= trip)) != 0) | |||||||
1745 | limit = trip; | |||||||
1746 | ||||||||
1747 | if (p_st != NULL__null) | |||||||
1748 | *p_st = incr; | |||||||
1749 | ||||||||
1750 | if (incr == 1) { | |||||||
1751 | *p_lb = start + init; | |||||||
1752 | *p_ub = start + limit; | |||||||
1753 | } else { | |||||||
1754 | *p_lb = start + init * incr; | |||||||
1755 | *p_ub = start + limit * incr; | |||||||
1756 | } | |||||||
1757 | ||||||||
1758 | if (pr->flags.ordered) { | |||||||
1759 | pr->u.p.ordered_lower = init; | |||||||
1760 | pr->u.p.ordered_upper = limit; | |||||||
1761 | } // if | |||||||
1762 | } // if | |||||||
1763 | } // case | |||||||
1764 | break; | |||||||
1765 | default: { | |||||||
1766 | status = 0; // to avoid complaints on uninitialized variable use | |||||||
1767 | __kmp_fatal(KMP_MSG(UnknownSchedTypeDetected)__kmp_msg_format(kmp_i18n_msg_UnknownSchedTypeDetected), // Primary message | |||||||
1768 | KMP_HNT(GetNewerLibrary)__kmp_msg_format(kmp_i18n_hnt_GetNewerLibrary), // Hint | |||||||
1769 | __kmp_msg_null // Variadic argument list terminator | |||||||
1770 | ); | |||||||
1771 | } break; | |||||||
1772 | } // switch | |||||||
1773 | if (p_last) | |||||||
1774 | *p_last = last; | |||||||
1775 | #ifdef KMP_DEBUG1 | |||||||
1776 | if (pr->flags.ordered) { | |||||||
1777 | char *buff; | |||||||
1778 | // create format specifiers before the debug output | |||||||
1779 | buff = __kmp_str_format("__kmp_dispatch_next_algorithm: T#%%d " | |||||||
1780 | "ordered_lower:%%%s ordered_upper:%%%s\n", | |||||||
1781 | traits_t<UT>::spec, traits_t<UT>::spec); | |||||||
1782 | KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper))if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper); }; | |||||||
1783 | __kmp_str_free(&buff); | |||||||
1784 | } | |||||||
1785 | { | |||||||
1786 | char *buff; | |||||||
1787 | // create format specifiers before the debug output | |||||||
1788 | buff = __kmp_str_format( | |||||||
1789 | "__kmp_dispatch_next_algorithm: T#%%d exit status:%%d p_last:%%d " | |||||||
1790 | "p_lb:%%%s p_ub:%%%s p_st:%%%s\n", | |||||||
1791 | traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec); | |||||||
1792 | KD_TRACE(10, (buff, gtid, status, *p_last, *p_lb, *p_ub, *p_st))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, status , *p_last, *p_lb, *p_ub, *p_st); }; | |||||||
1793 | __kmp_str_free(&buff); | |||||||
1794 | } | |||||||
1795 | #endif | |||||||
1796 | return status; | |||||||
1797 | } | |||||||
1798 | ||||||||
1799 | /* Define a macro for exiting __kmp_dispatch_next(). If status is 0 (no more | |||||||
1800 | work), then tell OMPT the loop is over. In some cases kmp_dispatch_fini() | |||||||
1801 | is not called. */ | |||||||
1802 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||||||
1803 | #define OMPT_LOOP_ENDif (status == 0) { if (ompt_enabled.ompt_callback_work) { ompt_team_info_t *team_info = __ompt_get_teaminfo(0, __null); ompt_task_info_t *task_info = __ompt_get_task_info_object(0); ompt_callbacks. ompt_callback_work_callback( ompt_work_loop, ompt_scope_end, & (team_info->parallel_data), &(task_info->task_data) , 0, codeptr); } } \ | |||||||
1804 | if (status == 0) { \ | |||||||
1805 | if (ompt_enabled.ompt_callback_work) { \ | |||||||
1806 | ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL__null); \ | |||||||
1807 | ompt_task_info_t *task_info = __ompt_get_task_info_object(0); \ | |||||||
1808 | ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback( \ | |||||||
1809 | ompt_work_loop, ompt_scope_end, &(team_info->parallel_data), \ | |||||||
1810 | &(task_info->task_data), 0, codeptr); \ | |||||||
1811 | } \ | |||||||
1812 | } | |||||||
1813 | // TODO: implement count | |||||||
1814 | #else | |||||||
1815 | #define OMPT_LOOP_ENDif (status == 0) { if (ompt_enabled.ompt_callback_work) { ompt_team_info_t *team_info = __ompt_get_teaminfo(0, __null); ompt_task_info_t *task_info = __ompt_get_task_info_object(0); ompt_callbacks. ompt_callback_work_callback( ompt_work_loop, ompt_scope_end, & (team_info->parallel_data), &(task_info->task_data) , 0, codeptr); } } // no-op | |||||||
1816 | #endif | |||||||
1817 | ||||||||
1818 | #if KMP_STATS_ENABLED0 | |||||||
1819 | #define KMP_STATS_LOOP_END \ | |||||||
1820 | { \ | |||||||
1821 | kmp_int64 u, l, t, i; \ | |||||||
1822 | l = (kmp_int64)(*p_lb); \ | |||||||
1823 | u = (kmp_int64)(*p_ub); \ | |||||||
1824 | i = (kmp_int64)(pr->u.p.st); \ | |||||||
1825 | if (status == 0) { \ | |||||||
1826 | t = 0; \ | |||||||
1827 | KMP_POP_PARTITIONED_TIMER()((void)0); \ | |||||||
1828 | } else if (i == 1) { \ | |||||||
1829 | if (u >= l) \ | |||||||
1830 | t = u - l + 1; \ | |||||||
1831 | else \ | |||||||
1832 | t = 0; \ | |||||||
1833 | } else if (i < 0) { \ | |||||||
1834 | if (l >= u) \ | |||||||
1835 | t = (l - u) / (-i) + 1; \ | |||||||
1836 | else \ | |||||||
1837 | t = 0; \ | |||||||
1838 | } else { \ | |||||||
1839 | if (u >= l) \ | |||||||
1840 | t = (u - l) / i + 1; \ | |||||||
1841 | else \ | |||||||
1842 | t = 0; \ | |||||||
1843 | } \ | |||||||
1844 | KMP_COUNT_VALUE(OMP_loop_dynamic_iterations, t)((void)0); \ | |||||||
1845 | } | |||||||
1846 | #else | |||||||
1847 | #define KMP_STATS_LOOP_END /* Nothing */ | |||||||
1848 | #endif | |||||||
1849 | ||||||||
1850 | template <typename T> | |||||||
1851 | static int __kmp_dispatch_next(ident_t *loc, int gtid, kmp_int32 *p_last, | |||||||
1852 | T *p_lb, T *p_ub, | |||||||
1853 | typename traits_t<T>::signed_t *p_st | |||||||
1854 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||||||
1855 | , | |||||||
1856 | void *codeptr | |||||||
1857 | #endif | |||||||
1858 | ) { | |||||||
1859 | ||||||||
1860 | typedef typename traits_t<T>::unsigned_t UT; | |||||||
1861 | typedef typename traits_t<T>::signed_t ST; | |||||||
1862 | // This is potentially slightly misleading, schedule(runtime) will appear here | |||||||
1863 | // even if the actual runtme schedule is static. (Which points out a | |||||||
1864 | // disadavantage of schedule(runtime): even when static scheduling is used it | |||||||
1865 | // costs more than a compile time choice to use static scheduling would.) | |||||||
1866 | KMP_TIME_PARTITIONED_BLOCK(OMP_loop_dynamic_scheduling)((void)0); | |||||||
1867 | ||||||||
1868 | int status; | |||||||
1869 | dispatch_private_info_template<T> *pr; | |||||||
1870 | kmp_info_t *th = __kmp_threads[gtid]; | |||||||
1871 | kmp_team_t *team = th->th.th_team; | |||||||
1872 | ||||||||
1873 | KMP_DEBUG_ASSERT(p_lb && p_ub && p_st)if (!(p_lb && p_ub && p_st)) { __kmp_debug_assert ("p_lb && p_ub && p_st", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1873); }; // AC: these cannot be NULL | |||||||
1874 | KD_TRACE(if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n" , gtid, p_lb, p_ub, p_st, p_last); } | |||||||
1875 | 1000,if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n" , gtid, p_lb, p_ub, p_st, p_last); } | |||||||
1876 | ("__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n",if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n" , gtid, p_lb, p_ub, p_st, p_last); } | |||||||
1877 | gtid, p_lb, p_ub, p_st, p_last))if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n" , gtid, p_lb, p_ub, p_st, p_last); }; | |||||||
1878 | ||||||||
1879 | if (team->t.t_serialized) { | |||||||
1880 | /* NOTE: serialize this dispatch becase we are not at the active level */ | |||||||
1881 | pr = reinterpret_cast<dispatch_private_info_template<T> *>( | |||||||
1882 | th->th.th_dispatch->th_disp_buffer); /* top of the stack */ | |||||||
1883 | KMP_DEBUG_ASSERT(pr)if (!(pr)) { __kmp_debug_assert("pr", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1883); }; | |||||||
1884 | ||||||||
1885 | if ((status = (pr->u.p.tc != 0)) == 0) { | |||||||
1886 | *p_lb = 0; | |||||||
1887 | *p_ub = 0; | |||||||
1888 | // if ( p_last != NULL ) | |||||||
1889 | // *p_last = 0; | |||||||
1890 | if (p_st != NULL__null) | |||||||
1891 | *p_st = 0; | |||||||
1892 | if (__kmp_env_consistency_check) { | |||||||
1893 | if (pr->pushed_ws != ct_none) { | |||||||
1894 | pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc); | |||||||
1895 | } | |||||||
1896 | } | |||||||
1897 | } else if (pr->flags.nomerge) { | |||||||
1898 | kmp_int32 last; | |||||||
1899 | T start; | |||||||
1900 | UT limit, trip, init; | |||||||
1901 | ST incr; | |||||||
1902 | T chunk = pr->u.p.parm1; | |||||||
1903 | ||||||||
1904 | KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n" , gtid); } | |||||||
1905 | gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n" , gtid); }; | |||||||
1906 | ||||||||
1907 | init = chunk * pr->u.p.count++; | |||||||
1908 | trip = pr->u.p.tc - 1; | |||||||
1909 | ||||||||
1910 | if ((status = (init <= trip)) == 0) { | |||||||
1911 | *p_lb = 0; | |||||||
1912 | *p_ub = 0; | |||||||
1913 | // if ( p_last != NULL ) | |||||||
1914 | // *p_last = 0; | |||||||
1915 | if (p_st != NULL__null) | |||||||
1916 | *p_st = 0; | |||||||
1917 | if (__kmp_env_consistency_check) { | |||||||
1918 | if (pr->pushed_ws != ct_none) { | |||||||
1919 | pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc); | |||||||
1920 | } | |||||||
1921 | } | |||||||
1922 | } else { | |||||||
1923 | start = pr->u.p.lb; | |||||||
1924 | limit = chunk + init - 1; | |||||||
1925 | incr = pr->u.p.st; | |||||||
1926 | ||||||||
1927 | if ((last = (limit >= trip)) != 0) { | |||||||
1928 | limit = trip; | |||||||
1929 | #if KMP_OS_WINDOWS0 | |||||||
1930 | pr->u.p.last_upper = pr->u.p.ub; | |||||||
1931 | #endif /* KMP_OS_WINDOWS */ | |||||||
1932 | } | |||||||
1933 | if (p_last != NULL__null) | |||||||
1934 | *p_last = last; | |||||||
1935 | if (p_st != NULL__null) | |||||||
1936 | *p_st = incr; | |||||||
1937 | if (incr == 1) { | |||||||
1938 | *p_lb = start + init; | |||||||
1939 | *p_ub = start + limit; | |||||||
1940 | } else { | |||||||
1941 | *p_lb = start + init * incr; | |||||||
1942 | *p_ub = start + limit * incr; | |||||||
1943 | } | |||||||
1944 | ||||||||
1945 | if (pr->flags.ordered) { | |||||||
1946 | pr->u.p.ordered_lower = init; | |||||||
1947 | pr->u.p.ordered_upper = limit; | |||||||
1948 | #ifdef KMP_DEBUG1 | |||||||
1949 | { | |||||||
1950 | char *buff; | |||||||
1951 | // create format specifiers before the debug output | |||||||
1952 | buff = __kmp_str_format("__kmp_dispatch_next: T#%%d " | |||||||
1953 | "ordered_lower:%%%s ordered_upper:%%%s\n", | |||||||
1954 | traits_t<UT>::spec, traits_t<UT>::spec); | |||||||
1955 | KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper); } | |||||||
1956 | pr->u.p.ordered_upper))if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper); }; | |||||||
1957 | __kmp_str_free(&buff); | |||||||
1958 | } | |||||||
1959 | #endif | |||||||
1960 | } // if | |||||||
1961 | } // if | |||||||
1962 | } else { | |||||||
1963 | pr->u.p.tc = 0; | |||||||
1964 | *p_lb = pr->u.p.lb; | |||||||
1965 | *p_ub = pr->u.p.ub; | |||||||
1966 | #if KMP_OS_WINDOWS0 | |||||||
1967 | pr->u.p.last_upper = *p_ub; | |||||||
1968 | #endif /* KMP_OS_WINDOWS */ | |||||||
1969 | if (p_last != NULL__null) | |||||||
1970 | *p_last = TRUE(!0); | |||||||
1971 | if (p_st != NULL__null) | |||||||
1972 | *p_st = pr->u.p.st; | |||||||
1973 | } // if | |||||||
1974 | #ifdef KMP_DEBUG1 | |||||||
1975 | { | |||||||
1976 | char *buff; | |||||||
1977 | // create format specifiers before the debug output | |||||||
1978 | buff = __kmp_str_format( | |||||||
1979 | "__kmp_dispatch_next: T#%%d serialized case: p_lb:%%%s " | |||||||
1980 | "p_ub:%%%s p_st:%%%s p_last:%%p %%d returning:%%d\n", | |||||||
1981 | traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec); | |||||||
1982 | KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, *p_st, p_last, *p_last, status))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, * p_lb, *p_ub, *p_st, p_last, *p_last, status); }; | |||||||
| ||||||||
1983 | __kmp_str_free(&buff); | |||||||
1984 | } | |||||||
1985 | #endif | |||||||
1986 | #if INCLUDE_SSC_MARKS(1 && 1) | |||||||
1987 | SSC_MARK_DISPATCH_NEXT()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(0xd697) : "%ebx"); | |||||||
1988 | #endif | |||||||
1989 | OMPT_LOOP_ENDif (status == 0) { if (ompt_enabled.ompt_callback_work) { ompt_team_info_t *team_info = __ompt_get_teaminfo(0, __null); ompt_task_info_t *task_info = __ompt_get_task_info_object(0); ompt_callbacks. ompt_callback_work_callback( ompt_work_loop, ompt_scope_end, & (team_info->parallel_data), &(task_info->task_data) , 0, codeptr); } }; | |||||||
1990 | KMP_STATS_LOOP_END; | |||||||
1991 | return status; | |||||||
1992 | } else { | |||||||
1993 | kmp_int32 last = 0; | |||||||
1994 | dispatch_shared_info_template<T> volatile *sh; | |||||||
1995 | ||||||||
1996 | KMP_DEBUG_ASSERT(th->th.th_dispatch ==if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch [th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1997); } | |||||||
1997 | &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid])if (!(th->th.th_dispatch == &th->th.th_team->t.t_dispatch [th->th.th_info.ds.ds_tid])) { __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 1997); }; | |||||||
1998 | ||||||||
1999 | pr = reinterpret_cast<dispatch_private_info_template<T> *>( | |||||||
2000 | th->th.th_dispatch->th_dispatch_pr_current); | |||||||
2001 | KMP_DEBUG_ASSERT(pr)if (!(pr)) { __kmp_debug_assert("pr", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2001); }; | |||||||
2002 | sh = reinterpret_cast<dispatch_shared_info_template<T> volatile *>( | |||||||
2003 | th->th.th_dispatch->th_dispatch_sh_current); | |||||||
2004 | KMP_DEBUG_ASSERT(sh)if (!(sh)) { __kmp_debug_assert("sh", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2004); }; | |||||||
2005 | ||||||||
2006 | #if KMP_USE_HIER_SCHED0 | |||||||
2007 | if (pr->flags.use_hier) | |||||||
2008 | status = sh->hier->next(loc, gtid, pr, &last, p_lb, p_ub, p_st); | |||||||
2009 | else | |||||||
2010 | #endif // KMP_USE_HIER_SCHED | |||||||
2011 | status = __kmp_dispatch_next_algorithm<T>(gtid, pr, sh, &last, p_lb, p_ub, | |||||||
2012 | p_st, th->th.th_team_nproc, | |||||||
2013 | th->th.th_info.ds.ds_tid); | |||||||
2014 | // status == 0: no more iterations to execute | |||||||
2015 | if (status == 0) { | |||||||
2016 | UT num_done; | |||||||
2017 | ||||||||
2018 | num_done = test_then_inc<ST>((volatile ST *)&sh->u.s.num_done); | |||||||
2019 | #ifdef KMP_DEBUG1 | |||||||
2020 | { | |||||||
2021 | char *buff; | |||||||
2022 | // create format specifiers before the debug output | |||||||
2023 | buff = __kmp_str_format( | |||||||
2024 | "__kmp_dispatch_next: T#%%d increment num_done:%%%s\n", | |||||||
2025 | traits_t<UT>::spec); | |||||||
2026 | KD_TRACE(10, (buff, gtid, sh->u.s.num_done))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, sh ->u.s.num_done); }; | |||||||
2027 | __kmp_str_free(&buff); | |||||||
2028 | } | |||||||
2029 | #endif | |||||||
2030 | ||||||||
2031 | #if KMP_USE_HIER_SCHED0 | |||||||
2032 | pr->flags.use_hier = FALSE0; | |||||||
2033 | #endif | |||||||
2034 | if ((ST)num_done == th->th.th_team_nproc - 1) { | |||||||
2035 | #if (KMP_STATIC_STEAL_ENABLED1) | |||||||
2036 | if (pr->schedule == kmp_sch_static_steal && | |||||||
2037 | traits_t<T>::type_size > 4) { | |||||||
2038 | int i; | |||||||
2039 | kmp_info_t **other_threads = team->t.t_threads; | |||||||
2040 | // loop complete, safe to destroy locks used for stealing | |||||||
2041 | for (i = 0; i < th->th.th_team_nproc; ++i) { | |||||||
2042 | kmp_lock_t *lck = other_threads[i]->th.th_dispatch->th_steal_lock; | |||||||
2043 | KMP_ASSERT(lck != NULL)if (!(lck != __null)) { __kmp_debug_assert("lck != NULL", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2043); }; | |||||||
2044 | __kmp_destroy_lock(lck); | |||||||
2045 | __kmp_free(lck)___kmp_free((lck), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2045); | |||||||
2046 | other_threads[i]->th.th_dispatch->th_steal_lock = NULL__null; | |||||||
2047 | } | |||||||
2048 | } | |||||||
2049 | #endif | |||||||
2050 | /* NOTE: release this buffer to be reused */ | |||||||
2051 | ||||||||
2052 | KMP_MB(); /* Flush all pending memory write invalidates. */ | |||||||
2053 | ||||||||
2054 | sh->u.s.num_done = 0; | |||||||
2055 | sh->u.s.iteration = 0; | |||||||
2056 | ||||||||
2057 | /* TODO replace with general release procedure? */ | |||||||
2058 | if (pr->flags.ordered) { | |||||||
2059 | sh->u.s.ordered_iteration = 0; | |||||||
2060 | } | |||||||
2061 | ||||||||
2062 | KMP_MB(); /* Flush all pending memory write invalidates. */ | |||||||
2063 | ||||||||
2064 | sh->buffer_index += __kmp_dispatch_num_buffers; | |||||||
2065 | KD_TRACE(100, ("__kmp_dispatch_next: T#%d change buffer_index:%d\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d change buffer_index:%d\n" , gtid, sh->buffer_index); } | |||||||
2066 | gtid, sh->buffer_index))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d change buffer_index:%d\n" , gtid, sh->buffer_index); }; | |||||||
2067 | ||||||||
2068 | KMP_MB(); /* Flush all pending memory write invalidates. */ | |||||||
2069 | ||||||||
2070 | } // if | |||||||
2071 | if (__kmp_env_consistency_check) { | |||||||
2072 | if (pr->pushed_ws != ct_none) { | |||||||
2073 | pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc); | |||||||
2074 | } | |||||||
2075 | } | |||||||
2076 | ||||||||
2077 | th->th.th_dispatch->th_deo_fcn = NULL__null; | |||||||
2078 | th->th.th_dispatch->th_dxo_fcn = NULL__null; | |||||||
2079 | th->th.th_dispatch->th_dispatch_sh_current = NULL__null; | |||||||
2080 | th->th.th_dispatch->th_dispatch_pr_current = NULL__null; | |||||||
2081 | } // if (status == 0) | |||||||
2082 | #if KMP_OS_WINDOWS0 | |||||||
2083 | else if (last) { | |||||||
2084 | pr->u.p.last_upper = pr->u.p.ub; | |||||||
2085 | } | |||||||
2086 | #endif /* KMP_OS_WINDOWS */ | |||||||
2087 | if (p_last != NULL__null && status != 0) | |||||||
2088 | *p_last = last; | |||||||
2089 | } // if | |||||||
2090 | ||||||||
2091 | #ifdef KMP_DEBUG1 | |||||||
2092 | { | |||||||
2093 | char *buff; | |||||||
2094 | // create format specifiers before the debug output | |||||||
2095 | buff = __kmp_str_format( | |||||||
2096 | "__kmp_dispatch_next: T#%%d normal case: " | |||||||
2097 | "p_lb:%%%s p_ub:%%%s p_st:%%%s p_last:%%p (%%d) returning:%%d\n", | |||||||
2098 | traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec); | |||||||
2099 | KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last,if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, * p_lb, *p_ub, p_st ? *p_st : 0, p_last, (p_last ? *p_last : 0) , status); } | |||||||
2100 | (p_last ? *p_last : 0), status))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, * p_lb, *p_ub, p_st ? *p_st : 0, p_last, (p_last ? *p_last : 0) , status); }; | |||||||
2101 | __kmp_str_free(&buff); | |||||||
2102 | } | |||||||
2103 | #endif | |||||||
2104 | #if INCLUDE_SSC_MARKS(1 && 1) | |||||||
2105 | SSC_MARK_DISPATCH_NEXT()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(0xd697) : "%ebx"); | |||||||
2106 | #endif | |||||||
2107 | OMPT_LOOP_ENDif (status == 0) { if (ompt_enabled.ompt_callback_work) { ompt_team_info_t *team_info = __ompt_get_teaminfo(0, __null); ompt_task_info_t *task_info = __ompt_get_task_info_object(0); ompt_callbacks. ompt_callback_work_callback( ompt_work_loop, ompt_scope_end, & (team_info->parallel_data), &(task_info->task_data) , 0, codeptr); } }; | |||||||
2108 | KMP_STATS_LOOP_END; | |||||||
2109 | return status; | |||||||
2110 | } | |||||||
2111 | ||||||||
2112 | template <typename T> | |||||||
2113 | static void __kmp_dist_get_bounds(ident_t *loc, kmp_int32 gtid, | |||||||
2114 | kmp_int32 *plastiter, T *plower, T *pupper, | |||||||
2115 | typename traits_t<T>::signed_t incr) { | |||||||
2116 | typedef typename traits_t<T>::unsigned_t UT; | |||||||
2117 | kmp_uint32 team_id; | |||||||
2118 | kmp_uint32 nteams; | |||||||
2119 | UT trip_count; | |||||||
2120 | kmp_team_t *team; | |||||||
2121 | kmp_info_t *th; | |||||||
2122 | ||||||||
2123 | KMP_DEBUG_ASSERT(plastiter && plower && pupper)if (!(plastiter && plower && pupper)) { __kmp_debug_assert ("plastiter && plower && pupper", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2123); }; | |||||||
2124 | KE_TRACE(10, ("__kmpc_dist_get_bounds called (%d)\n", gtid))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_dist_get_bounds called (%d)\n" , gtid); }; | |||||||
2125 | #ifdef KMP_DEBUG1 | |||||||
2126 | typedef typename traits_t<T>::signed_t ST; | |||||||
2127 | { | |||||||
2128 | char *buff; | |||||||
2129 | // create format specifiers before the debug output | |||||||
2130 | buff = __kmp_str_format("__kmpc_dist_get_bounds: T#%%d liter=%%d " | |||||||
2131 | "iter=(%%%s, %%%s, %%%s) signed?<%s>\n", | |||||||
2132 | traits_t<T>::spec, traits_t<T>::spec, | |||||||
2133 | traits_t<ST>::spec, traits_t<T>::spec); | |||||||
2134 | KD_TRACE(100, (buff, gtid, *plastiter, *plower, *pupper, incr))if (kmp_d_debug >= 100) { __kmp_debug_printf (buff, gtid, * plastiter, *plower, *pupper, incr); }; | |||||||
2135 | __kmp_str_free(&buff); | |||||||
2136 | } | |||||||
2137 | #endif | |||||||
2138 | ||||||||
2139 | if (__kmp_env_consistency_check) { | |||||||
2140 | if (incr == 0) { | |||||||
2141 | __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, | |||||||
2142 | loc); | |||||||
2143 | } | |||||||
2144 | if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { | |||||||
2145 | // The loop is illegal. | |||||||
2146 | // Some zero-trip loops maintained by compiler, e.g.: | |||||||
2147 | // for(i=10;i<0;++i) // lower >= upper - run-time check | |||||||
2148 | // for(i=0;i>10;--i) // lower <= upper - run-time check | |||||||
2149 | // for(i=0;i>10;++i) // incr > 0 - compile-time check | |||||||
2150 | // for(i=10;i<0;--i) // incr < 0 - compile-time check | |||||||
2151 | // Compiler does not check the following illegal loops: | |||||||
2152 | // for(i=0;i<10;i+=incr) // where incr<0 | |||||||
2153 | // for(i=10;i>0;i-=incr) // where incr<0 | |||||||
2154 | __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); | |||||||
2155 | } | |||||||
2156 | } | |||||||
2157 | th = __kmp_threads[gtid]; | |||||||
2158 | team = th->th.th_team; | |||||||
2159 | #if OMP_40_ENABLED(50 >= 40) | |||||||
2160 | KMP_DEBUG_ASSERT(th->th.th_teams_microtask)if (!(th->th.th_teams_microtask)) { __kmp_debug_assert("th->th.th_teams_microtask" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2160); }; // we are in the teams construct | |||||||
2161 | nteams = th->th.th_teams_size.nteams; | |||||||
2162 | #endif | |||||||
2163 | team_id = team->t.t_master_tid; | |||||||
2164 | KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc)if (!(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc )) { __kmp_debug_assert("nteams == (kmp_uint32)team->t.t_parent->t.t_nproc" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2164); }; | |||||||
2165 | ||||||||
2166 | // compute global trip count | |||||||
2167 | if (incr == 1) { | |||||||
2168 | trip_count = *pupper - *plower + 1; | |||||||
2169 | } else if (incr == -1) { | |||||||
2170 | trip_count = *plower - *pupper + 1; | |||||||
2171 | } else if (incr > 0) { | |||||||
2172 | // upper-lower can exceed the limit of signed type | |||||||
2173 | trip_count = (UT)(*pupper - *plower) / incr + 1; | |||||||
2174 | } else { | |||||||
2175 | trip_count = (UT)(*plower - *pupper) / (-incr) + 1; | |||||||
2176 | } | |||||||
2177 | ||||||||
2178 | if (trip_count <= nteams) { | |||||||
2179 | KMP_DEBUG_ASSERT(if (!(__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced)) { __kmp_debug_assert("__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2182); } | |||||||
2180 | __kmp_static == kmp_sch_static_greedy ||if (!(__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced)) { __kmp_debug_assert("__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2182); } | |||||||
2181 | __kmp_static ==if (!(__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced)) { __kmp_debug_assert("__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2182); } | |||||||
2182 | kmp_sch_static_balanced)if (!(__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced)) { __kmp_debug_assert("__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2182); }; // Unknown static scheduling type. | |||||||
2183 | // only some teams get single iteration, others get nothing | |||||||
2184 | if (team_id < trip_count) { | |||||||
2185 | *pupper = *plower = *plower + team_id * incr; | |||||||
2186 | } else { | |||||||
2187 | *plower = *pupper + incr; // zero-trip loop | |||||||
2188 | } | |||||||
2189 | if (plastiter != NULL__null) | |||||||
2190 | *plastiter = (team_id == trip_count - 1); | |||||||
2191 | } else { | |||||||
2192 | if (__kmp_static == kmp_sch_static_balanced) { | |||||||
2193 | UT chunk = trip_count / nteams; | |||||||
2194 | UT extras = trip_count % nteams; | |||||||
2195 | *plower += | |||||||
2196 | incr * (team_id * chunk + (team_id < extras ? team_id : extras)); | |||||||
2197 | *pupper = *plower + chunk * incr - (team_id < extras ? 0 : incr); | |||||||
2198 | if (plastiter != NULL__null) | |||||||
2199 | *plastiter = (team_id == nteams - 1); | |||||||
2200 | } else { | |||||||
2201 | T chunk_inc_count = | |||||||
2202 | (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr; | |||||||
2203 | T upper = *pupper; | |||||||
2204 | KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy)if (!(__kmp_static == kmp_sch_static_greedy)) { __kmp_debug_assert ("__kmp_static == kmp_sch_static_greedy", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2204); }; | |||||||
2205 | // Unknown static scheduling type. | |||||||
2206 | *plower += team_id * chunk_inc_count; | |||||||
2207 | *pupper = *plower + chunk_inc_count - incr; | |||||||
2208 | // Check/correct bounds if needed | |||||||
2209 | if (incr > 0) { | |||||||
2210 | if (*pupper < *plower) | |||||||
2211 | *pupper = traits_t<T>::max_value; | |||||||
2212 | if (plastiter != NULL__null) | |||||||
2213 | *plastiter = *plower <= upper && *pupper > upper - incr; | |||||||
2214 | if (*pupper > upper) | |||||||
2215 | *pupper = upper; // tracker C73258 | |||||||
2216 | } else { | |||||||
2217 | if (*pupper > *plower) | |||||||
2218 | *pupper = traits_t<T>::min_value; | |||||||
2219 | if (plastiter != NULL__null) | |||||||
2220 | *plastiter = *plower >= upper && *pupper < upper - incr; | |||||||
2221 | if (*pupper < upper) | |||||||
2222 | *pupper = upper; // tracker C73258 | |||||||
2223 | } | |||||||
2224 | } | |||||||
2225 | } | |||||||
2226 | } | |||||||
2227 | ||||||||
2228 | //----------------------------------------------------------------------------- | |||||||
2229 | // Dispatch routines | |||||||
2230 | // Transfer call to template< type T > | |||||||
2231 | // __kmp_dispatch_init( ident_t *loc, int gtid, enum sched_type schedule, | |||||||
2232 | // T lb, T ub, ST st, ST chunk ) | |||||||
2233 | extern "C" { | |||||||
2234 | ||||||||
2235 | /*! | |||||||
2236 | @ingroup WORK_SHARING | |||||||
2237 | @{ | |||||||
2238 | @param loc Source location | |||||||
2239 | @param gtid Global thread id | |||||||
2240 | @param schedule Schedule type | |||||||
2241 | @param lb Lower bound | |||||||
2242 | @param ub Upper bound | |||||||
2243 | @param st Step (or increment if you prefer) | |||||||
2244 | @param chunk The chunk size to block with | |||||||
2245 | ||||||||
2246 | This function prepares the runtime to start a dynamically scheduled for loop, | |||||||
2247 | saving the loop arguments. | |||||||
2248 | These functions are all identical apart from the types of the arguments. | |||||||
2249 | */ | |||||||
2250 | ||||||||
2251 | void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid, | |||||||
2252 | enum sched_type schedule, kmp_int32 lb, | |||||||
2253 | kmp_int32 ub, kmp_int32 st, kmp_int32 chunk) { | |||||||
2254 | KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2254); }; | |||||||
2255 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||||||
2256 | OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads [gtid] && !__kmp_threads[gtid]->th.ompt_thread_info .return_address) __kmp_threads[gtid]->th.ompt_thread_info. return_address = __builtin_return_address(0); | |||||||
2257 | #endif | |||||||
2258 | __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk, true); | |||||||
2259 | } | |||||||
2260 | /*! | |||||||
2261 | See @ref __kmpc_dispatch_init_4 | |||||||
2262 | */ | |||||||
2263 | void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, | |||||||
2264 | enum sched_type schedule, kmp_uint32 lb, | |||||||
2265 | kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk) { | |||||||
2266 | KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2266); }; | |||||||
2267 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||||||
2268 | OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads [gtid] && !__kmp_threads[gtid]->th.ompt_thread_info .return_address) __kmp_threads[gtid]->th.ompt_thread_info. return_address = __builtin_return_address(0); | |||||||
2269 | #endif | |||||||
2270 | __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk, true); | |||||||
2271 | } | |||||||
2272 | ||||||||
2273 | /*! | |||||||
2274 | See @ref __kmpc_dispatch_init_4 | |||||||
2275 | */ | |||||||
2276 | void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid, | |||||||
2277 | enum sched_type schedule, kmp_int64 lb, | |||||||
2278 | kmp_int64 ub, kmp_int64 st, kmp_int64 chunk) { | |||||||
2279 | KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2279); }; | |||||||
2280 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||||||
2281 | OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads [gtid] && !__kmp_threads[gtid]->th.ompt_thread_info .return_address) __kmp_threads[gtid]->th.ompt_thread_info. return_address = __builtin_return_address(0); | |||||||
2282 | #endif | |||||||
2283 | __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk, true); | |||||||
2284 | } | |||||||
2285 | ||||||||
2286 | /*! | |||||||
2287 | See @ref __kmpc_dispatch_init_4 | |||||||
2288 | */ | |||||||
2289 | void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, | |||||||
2290 | enum sched_type schedule, kmp_uint64 lb, | |||||||
2291 | kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk) { | |||||||
2292 | KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2292); }; | |||||||
2293 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||||||
2294 | OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads [gtid] && !__kmp_threads[gtid]->th.ompt_thread_info .return_address) __kmp_threads[gtid]->th.ompt_thread_info. return_address = __builtin_return_address(0); | |||||||
2295 | #endif | |||||||
2296 | __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk, true); | |||||||
2297 | } | |||||||
2298 | ||||||||
2299 | /*! | |||||||
2300 | See @ref __kmpc_dispatch_init_4 | |||||||
2301 | ||||||||
2302 | Difference from __kmpc_dispatch_init set of functions is these functions | |||||||
2303 | are called for composite distribute parallel for construct. Thus before | |||||||
2304 | regular iterations dispatching we need to calc per-team iteration space. | |||||||
2305 | ||||||||
2306 | These functions are all identical apart from the types of the arguments. | |||||||
2307 | */ | |||||||
2308 | void __kmpc_dist_dispatch_init_4(ident_t *loc, kmp_int32 gtid, | |||||||
2309 | enum sched_type schedule, kmp_int32 *p_last, | |||||||
2310 | kmp_int32 lb, kmp_int32 ub, kmp_int32 st, | |||||||
2311 | kmp_int32 chunk) { | |||||||
2312 | KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2312); }; | |||||||
2313 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||||||
2314 | OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads [gtid] && !__kmp_threads[gtid]->th.ompt_thread_info .return_address) __kmp_threads[gtid]->th.ompt_thread_info. return_address = __builtin_return_address(0); | |||||||
2315 | #endif | |||||||
2316 | __kmp_dist_get_bounds<kmp_int32>(loc, gtid, p_last, &lb, &ub, st); | |||||||
2317 | __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk, true); | |||||||
2318 | } | |||||||
2319 | ||||||||
2320 | void __kmpc_dist_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, | |||||||
2321 | enum sched_type schedule, kmp_int32 *p_last, | |||||||
2322 | kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, | |||||||
2323 | kmp_int32 chunk) { | |||||||
2324 | KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2324); }; | |||||||
2325 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||||||
2326 | OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads [gtid] && !__kmp_threads[gtid]->th.ompt_thread_info .return_address) __kmp_threads[gtid]->th.ompt_thread_info. return_address = __builtin_return_address(0); | |||||||
2327 | #endif | |||||||
2328 | __kmp_dist_get_bounds<kmp_uint32>(loc, gtid, p_last, &lb, &ub, st); | |||||||
2329 | __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk, true); | |||||||
2330 | } | |||||||
2331 | ||||||||
2332 | void __kmpc_dist_dispatch_init_8(ident_t *loc, kmp_int32 gtid, | |||||||
2333 | enum sched_type schedule, kmp_int32 *p_last, | |||||||
2334 | kmp_int64 lb, kmp_int64 ub, kmp_int64 st, | |||||||
2335 | kmp_int64 chunk) { | |||||||
2336 | KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2336); }; | |||||||
2337 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||||||
2338 | OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads [gtid] && !__kmp_threads[gtid]->th.ompt_thread_info .return_address) __kmp_threads[gtid]->th.ompt_thread_info. return_address = __builtin_return_address(0); | |||||||
2339 | #endif | |||||||
2340 | __kmp_dist_get_bounds<kmp_int64>(loc, gtid, p_last, &lb, &ub, st); | |||||||
2341 | __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk, true); | |||||||
2342 | } | |||||||
2343 | ||||||||
2344 | void __kmpc_dist_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, | |||||||
2345 | enum sched_type schedule, kmp_int32 *p_last, | |||||||
2346 | kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, | |||||||
2347 | kmp_int64 chunk) { | |||||||
2348 | KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_dispatch.cpp" , 2348); }; | |||||||
2349 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||||||
2350 | OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads [gtid] && !__kmp_threads[gtid]->th.ompt_thread_info .return_address) __kmp_threads[gtid]->th.ompt_thread_info. return_address = __builtin_return_address(0); | |||||||
2351 | #endif | |||||||
2352 | __kmp_dist_get_bounds<kmp_uint64>(loc, gtid, p_last, &lb, &ub, st); | |||||||
2353 | __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk, true); | |||||||
2354 | } | |||||||
2355 | ||||||||
2356 | /*! | |||||||
2357 | @param loc Source code location | |||||||
2358 | @param gtid Global thread id | |||||||
2359 | @param p_last Pointer to a flag set to one if this is the last chunk or zero | |||||||
2360 | otherwise | |||||||
2361 | @param p_lb Pointer to the lower bound for the next chunk of work | |||||||
2362 | @param p_ub Pointer to the upper bound for the next chunk of work | |||||||
2363 | @param p_st Pointer to the stride for the next chunk of work | |||||||
2364 | @return one if there is work to be done, zero otherwise | |||||||
2365 | ||||||||
2366 | Get the next dynamically allocated chunk of work for this thread. | |||||||
2367 | If there is no more work, then the lb,ub and stride need not be modified. | |||||||
2368 | */ | |||||||
2369 | int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, | |||||||
2370 | kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st) { | |||||||
2371 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||||||
2372 | OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads [gtid] && !__kmp_threads[gtid]->th.ompt_thread_info .return_address) __kmp_threads[gtid]->th.ompt_thread_info. return_address = __builtin_return_address(0); | |||||||
2373 | #endif | |||||||
2374 | return __kmp_dispatch_next<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st | |||||||
2375 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||||||
2376 | , | |||||||
2377 | OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid) | |||||||
2378 | #endif | |||||||
2379 | ); | |||||||
2380 | } | |||||||
2381 | ||||||||
2382 | /*! | |||||||
2383 | See @ref __kmpc_dispatch_next_4 | |||||||
2384 | */ | |||||||
2385 | int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, | |||||||
2386 | kmp_uint32 *p_lb, kmp_uint32 *p_ub, | |||||||
2387 | kmp_int32 *p_st) { | |||||||
2388 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||||||
2389 | OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads [gtid] && !__kmp_threads[gtid]->th.ompt_thread_info .return_address) __kmp_threads[gtid]->th.ompt_thread_info. return_address = __builtin_return_address(0); | |||||||
2390 | #endif | |||||||
2391 | return __kmp_dispatch_next<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st | |||||||
2392 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||||||
2393 | , | |||||||
2394 | OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid) | |||||||
2395 | #endif | |||||||
2396 | ); | |||||||
2397 | } | |||||||
2398 | ||||||||
2399 | /*! | |||||||
2400 | See @ref __kmpc_dispatch_next_4 | |||||||
2401 | */ | |||||||
2402 | int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, | |||||||
2403 | kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st) { | |||||||
2404 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||||||
2405 | OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads [gtid] && !__kmp_threads[gtid]->th.ompt_thread_info .return_address) __kmp_threads[gtid]->th.ompt_thread_info. return_address = __builtin_return_address(0); | |||||||
2406 | #endif | |||||||
2407 | return __kmp_dispatch_next<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st | |||||||
2408 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||||||
2409 | , | |||||||
2410 | OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid) | |||||||
2411 | #endif | |||||||
2412 | ); | |||||||
2413 | } | |||||||
2414 | ||||||||
2415 | /*! | |||||||
2416 | See @ref __kmpc_dispatch_next_4 | |||||||
2417 | */ | |||||||
2418 | int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, | |||||||
2419 | kmp_uint64 *p_lb, kmp_uint64 *p_ub, | |||||||
2420 | kmp_int64 *p_st) { | |||||||
2421 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||||||
2422 | OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads [gtid] && !__kmp_threads[gtid]->th.ompt_thread_info .return_address) __kmp_threads[gtid]->th.ompt_thread_info. return_address = __builtin_return_address(0); | |||||||
2423 | #endif | |||||||
2424 | return __kmp_dispatch_next<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st | |||||||
| ||||||||
2425 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||||||
2426 | , | |||||||
2427 | OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid) | |||||||
2428 | #endif | |||||||
2429 | ); | |||||||
2430 | } | |||||||
2431 | ||||||||
2432 | /*! | |||||||
2433 | @param loc Source code location | |||||||
2434 | @param gtid Global thread id | |||||||
2435 | ||||||||
2436 | Mark the end of a dynamic loop. | |||||||
2437 | */ | |||||||
2438 | void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid) { | |||||||
2439 | __kmp_dispatch_finish<kmp_uint32>(gtid, loc); | |||||||
2440 | } | |||||||
2441 | ||||||||
2442 | /*! | |||||||
2443 | See @ref __kmpc_dispatch_fini_4 | |||||||
2444 | */ | |||||||
2445 | void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid) { | |||||||
2446 | __kmp_dispatch_finish<kmp_uint64>(gtid, loc); | |||||||
2447 | } | |||||||
2448 | ||||||||
2449 | /*! | |||||||
2450 | See @ref __kmpc_dispatch_fini_4 | |||||||
2451 | */ | |||||||
2452 | void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid) { | |||||||
2453 | __kmp_dispatch_finish<kmp_uint32>(gtid, loc); | |||||||
2454 | } | |||||||
2455 | ||||||||
2456 | /*! | |||||||
2457 | See @ref __kmpc_dispatch_fini_4 | |||||||
2458 | */ | |||||||
2459 | void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid) { | |||||||
2460 | __kmp_dispatch_finish<kmp_uint64>(gtid, loc); | |||||||
2461 | } | |||||||
2462 | /*! @} */ | |||||||
2463 | ||||||||
2464 | //----------------------------------------------------------------------------- | |||||||
2465 | // Non-template routines from kmp_dispatch.cpp used in other sources | |||||||
2466 | ||||||||
2467 | kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker) { | |||||||
2468 | return value == checker; | |||||||
2469 | } | |||||||
2470 | ||||||||
2471 | kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker) { | |||||||
2472 | return value != checker; | |||||||
2473 | } | |||||||
2474 | ||||||||
2475 | kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker) { | |||||||
2476 | return value < checker; | |||||||
2477 | } | |||||||
2478 | ||||||||
2479 | kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker) { | |||||||
2480 | return value >= checker; | |||||||
2481 | } | |||||||
2482 | ||||||||
2483 | kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker) { | |||||||
2484 | return value <= checker; | |||||||
2485 | } | |||||||
2486 | ||||||||
2487 | kmp_uint32 | |||||||
2488 | __kmp_wait_yield_4(volatile kmp_uint32 *spinner, kmp_uint32 checker, | |||||||
2489 | kmp_uint32 (*pred)(kmp_uint32, kmp_uint32), | |||||||
2490 | void *obj // Higher-level synchronization object, or NULL. | |||||||
2491 | ) { | |||||||
2492 | // note: we may not belong to a team at this point | |||||||
2493 | volatile kmp_uint32 *spin = spinner; | |||||||
2494 | kmp_uint32 check = checker; | |||||||
2495 | kmp_uint32 spins; | |||||||
2496 | kmp_uint32 (*f)(kmp_uint32, kmp_uint32) = pred; | |||||||
2497 | kmp_uint32 r; | |||||||
2498 | ||||||||
2499 | KMP_FSYNC_SPIN_INIT(obj, CCAST(kmp_uint32 *, spin))int sync_iters = 0; if (__kmp_itt_fsync_prepare_ptr__3_0) { if (obj == __null) { obj = const_cast<kmp_uint32 *>(spin) ; } } __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(0x4376) : "%ebx"); | |||||||
2500 | KMP_INIT_YIELD(spins){ (spins) = __kmp_yield_init; }; | |||||||
2501 | // main wait spin loop | |||||||
2502 | while (!f(r = TCR_4(*spin)(*spin), check)) { | |||||||
2503 | KMP_FSYNC_SPIN_PREPARE(obj)do { if (__kmp_itt_fsync_prepare_ptr__3_0 && sync_iters < __kmp_itt_prepare_delay) { ++sync_iters; if (sync_iters >= __kmp_itt_prepare_delay) { (!__kmp_itt_fsync_prepare_ptr__3_0 ) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)((void *)obj)); } } } while (0); | |||||||
2504 | /* GEH - remove this since it was accidentally introduced when kmp_wait was | |||||||
2505 | split. It causes problems with infinite recursion because of exit lock */ | |||||||
2506 | /* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort) | |||||||
2507 | __kmp_abort_thread(); */ | |||||||
2508 | ||||||||
2509 | /* if we have waited a bit, or are oversubscribed, yield */ | |||||||
2510 | /* pause is in the following code */ | |||||||
2511 | KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc){ __kmp_x86_pause(); __kmp_yield(((__kmp_nth) > __kmp_avail_proc )); }; | |||||||
2512 | KMP_YIELD_SPIN(spins){ __kmp_x86_pause(); (spins) -= 2; if (!(spins)) { __kmp_yield (1); (spins) = __kmp_yield_next; } }; | |||||||
2513 | } | |||||||
2514 | KMP_FSYNC_SPIN_ACQUIRED(obj)do { __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(0x4377) : "%ebx"); if (sync_iters >= __kmp_itt_prepare_delay ) { (!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0 ((void *)((void *)obj)); } } while (0); | |||||||
2515 | return r; | |||||||
2516 | } | |||||||
2517 | ||||||||
2518 | void __kmp_wait_yield_4_ptr( | |||||||
2519 | void *spinner, kmp_uint32 checker, kmp_uint32 (*pred)(void *, kmp_uint32), | |||||||
2520 | void *obj // Higher-level synchronization object, or NULL. | |||||||
2521 | ) { | |||||||
2522 | // note: we may not belong to a team at this point | |||||||
2523 | void *spin = spinner; | |||||||
2524 | kmp_uint32 check = checker; | |||||||
2525 | kmp_uint32 spins; | |||||||
2526 | kmp_uint32 (*f)(void *, kmp_uint32) = pred; | |||||||
2527 | ||||||||
2528 | KMP_FSYNC_SPIN_INIT(obj, spin)int sync_iters = 0; if (__kmp_itt_fsync_prepare_ptr__3_0) { if (obj == __null) { obj = spin; } } __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(0x4376) : "%ebx"); | |||||||
2529 | KMP_INIT_YIELD(spins){ (spins) = __kmp_yield_init; }; | |||||||
2530 | // main wait spin loop | |||||||
2531 | while (!f(spin, check)) { | |||||||
2532 | KMP_FSYNC_SPIN_PREPARE(obj)do { if (__kmp_itt_fsync_prepare_ptr__3_0 && sync_iters < __kmp_itt_prepare_delay) { ++sync_iters; if (sync_iters >= __kmp_itt_prepare_delay) { (!__kmp_itt_fsync_prepare_ptr__3_0 ) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)((void *)obj)); } } } while (0); | |||||||
2533 | /* if we have waited a bit, or are oversubscribed, yield */ | |||||||
2534 | /* pause is in the following code */ | |||||||
2535 | KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc){ __kmp_x86_pause(); __kmp_yield(((__kmp_nth) > __kmp_avail_proc )); }; | |||||||
2536 | KMP_YIELD_SPIN(spins){ __kmp_x86_pause(); (spins) -= 2; if (!(spins)) { __kmp_yield (1); (spins) = __kmp_yield_next; } }; | |||||||
2537 | } | |||||||
2538 | KMP_FSYNC_SPIN_ACQUIRED(obj)do { __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(0x4377) : "%ebx"); if (sync_iters >= __kmp_itt_prepare_delay ) { (!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0 ((void *)((void *)obj)); } } while (0); | |||||||
2539 | } | |||||||
2540 | ||||||||
2541 | } // extern "C" | |||||||
2542 | ||||||||
2543 | #ifdef KMP_GOMP_COMPAT | |||||||
2544 | ||||||||
2545 | void __kmp_aux_dispatch_init_4(ident_t *loc, kmp_int32 gtid, | |||||||
2546 | enum sched_type schedule, kmp_int32 lb, | |||||||
2547 | kmp_int32 ub, kmp_int32 st, kmp_int32 chunk, | |||||||
2548 | int push_ws) { | |||||||
2549 | __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk, | |||||||
2550 | push_ws); | |||||||
2551 | } | |||||||
2552 | ||||||||
2553 | void __kmp_aux_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, | |||||||
2554 | enum sched_type schedule, kmp_uint32 lb, | |||||||
2555 | kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk, | |||||||
2556 | int push_ws) { | |||||||
2557 | __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk, | |||||||
2558 | push_ws); | |||||||
2559 | } | |||||||
2560 | ||||||||
2561 | void __kmp_aux_dispatch_init_8(ident_t *loc, kmp_int32 gtid, | |||||||
2562 | enum sched_type schedule, kmp_int64 lb, | |||||||
2563 | kmp_int64 ub, kmp_int64 st, kmp_int64 chunk, | |||||||
2564 | int push_ws) { | |||||||
2565 | __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk, | |||||||
2566 | push_ws); | |||||||
2567 | } | |||||||
2568 | ||||||||
2569 | void __kmp_aux_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, | |||||||
2570 | enum sched_type schedule, kmp_uint64 lb, | |||||||
2571 | kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk, | |||||||
2572 | int push_ws) { | |||||||
2573 | __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk, | |||||||
2574 | push_ws); | |||||||
2575 | } | |||||||
2576 | ||||||||
2577 | void __kmp_aux_dispatch_fini_chunk_4(ident_t *loc, kmp_int32 gtid) { | |||||||
2578 | __kmp_dispatch_finish_chunk<kmp_uint32>(gtid, loc); | |||||||
2579 | } | |||||||
2580 | ||||||||
2581 | void __kmp_aux_dispatch_fini_chunk_8(ident_t *loc, kmp_int32 gtid) { | |||||||
2582 | __kmp_dispatch_finish_chunk<kmp_uint64>(gtid, loc); | |||||||
2583 | } | |||||||
2584 | ||||||||
2585 | void __kmp_aux_dispatch_fini_chunk_4u(ident_t *loc, kmp_int32 gtid) { | |||||||
2586 | __kmp_dispatch_finish_chunk<kmp_uint32>(gtid, loc); | |||||||
2587 | } | |||||||
2588 | ||||||||
2589 | void __kmp_aux_dispatch_fini_chunk_8u(ident_t *loc, kmp_int32 gtid) { | |||||||
2590 | __kmp_dispatch_finish_chunk<kmp_uint64>(gtid, loc); | |||||||
2591 | } | |||||||
2592 | ||||||||
2593 | #endif /* KMP_GOMP_COMPAT */ | |||||||
2594 | ||||||||
2595 | /* ------------------------------------------------------------------------ */ |