File: | build/source/openmp/runtime/src/kmp_sched.cpp |
Warning: | line 322, column 37 Division by zero |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* | |||
2 | * kmp_sched.cpp -- static scheduling -- iteration initialization | |||
3 | */ | |||
4 | ||||
5 | //===----------------------------------------------------------------------===// | |||
6 | // | |||
7 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
8 | // See https://llvm.org/LICENSE.txt for license information. | |||
9 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
10 | // | |||
11 | //===----------------------------------------------------------------------===// | |||
12 | ||||
13 | /* Static scheduling initialization. | |||
14 | ||||
15 | NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however | |||
16 | it may change values between parallel regions. __kmp_max_nth | |||
17 | is the largest value __kmp_nth may take, 1 is the smallest. */ | |||
18 | ||||
19 | #include "kmp.h" | |||
20 | #include "kmp_error.h" | |||
21 | #include "kmp_i18n.h" | |||
22 | #include "kmp_itt.h" | |||
23 | #include "kmp_stats.h" | |||
24 | #include "kmp_str.h" | |||
25 | ||||
26 | #if OMPT_SUPPORT1 | |||
27 | #include "ompt-specific.h" | |||
28 | #endif | |||
29 | ||||
30 | #ifdef KMP_DEBUG1 | |||
31 | //------------------------------------------------------------------------- | |||
32 | // template for debug prints specification ( d, u, lld, llu ) | |||
33 | char const *traits_t<int>::spec = "d"; | |||
34 | char const *traits_t<unsigned int>::spec = "u"; | |||
35 | char const *traits_t<long long>::spec = "lld"; | |||
36 | char const *traits_t<unsigned long long>::spec = "llu"; | |||
37 | char const *traits_t<long>::spec = "ld"; | |||
38 | //------------------------------------------------------------------------- | |||
39 | #endif | |||
40 | ||||
41 | #if KMP_STATS_ENABLED0 | |||
42 | #define KMP_STATS_LOOP_END(stat) \ | |||
43 | { \ | |||
44 | kmp_int64 t; \ | |||
45 | kmp_int64 u = (kmp_int64)(*pupper); \ | |||
46 | kmp_int64 l = (kmp_int64)(*plower); \ | |||
47 | kmp_int64 i = (kmp_int64)incr; \ | |||
48 | if (i == 1) { \ | |||
49 | t = u - l + 1; \ | |||
50 | } else if (i == -1) { \ | |||
51 | t = l - u + 1; \ | |||
52 | } else if (i > 0) { \ | |||
53 | t = (u - l) / i + 1; \ | |||
54 | } else { \ | |||
55 | t = (l - u) / (-i) + 1; \ | |||
56 | } \ | |||
57 | KMP_COUNT_VALUE(stat, t)((void)0); \ | |||
58 | KMP_POP_PARTITIONED_TIMER()((void)0); \ | |||
59 | } | |||
60 | #else | |||
61 | #define KMP_STATS_LOOP_END(stat) /* Nothing */ | |||
62 | #endif | |||
63 | ||||
64 | #if USE_ITT_BUILD1 || defined KMP_DEBUG1 | |||
65 | static ident_t loc_stub = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"}; | |||
66 | static inline void check_loc(ident_t *&loc) { | |||
67 | if (loc == NULL__null) | |||
68 | loc = &loc_stub; // may need to report location info to ittnotify | |||
69 | } | |||
70 | #endif | |||
71 | ||||
72 | template <typename T> | |||
73 | static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid, | |||
74 | kmp_int32 schedtype, kmp_int32 *plastiter, | |||
75 | T *plower, T *pupper, | |||
76 | typename traits_t<T>::signed_t *pstride, | |||
77 | typename traits_t<T>::signed_t incr, | |||
78 | typename traits_t<T>::signed_t chunk | |||
79 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
80 | , | |||
81 | void *codeptr | |||
82 | #endif | |||
83 | ) { | |||
84 | KMP_COUNT_BLOCK(OMP_LOOP_STATIC)((void)0); | |||
85 | KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static)((void)0); | |||
86 | KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling)((void)0); | |||
87 | ||||
88 | // Clear monotonic/nonmonotonic bits (ignore it) | |||
89 | schedtype = SCHEDULE_WITHOUT_MODIFIERS(schedtype)(enum sched_type)( (schedtype) & ~(kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)); | |||
90 | ||||
91 | typedef typename traits_t<T>::unsigned_t UT; | |||
92 | typedef typename traits_t<T>::signed_t ST; | |||
93 | /* this all has to be changed back to TID and such.. */ | |||
94 | kmp_int32 gtid = global_tid; | |||
95 | kmp_uint32 tid; | |||
96 | kmp_uint32 nth; | |||
97 | UT trip_count; | |||
98 | kmp_team_t *team; | |||
99 | __kmp_assert_valid_gtid(gtid); | |||
100 | kmp_info_t *th = __kmp_threads[gtid]; | |||
101 | ||||
102 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
103 | ompt_team_info_t *team_info = NULL__null; | |||
104 | ompt_task_info_t *task_info = NULL__null; | |||
105 | ompt_work_t ompt_work_type = ompt_work_loop; | |||
106 | ||||
107 | static kmp_int8 warn = 0; | |||
108 | ||||
109 | if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) { | |||
| ||||
110 | // Only fully initialize variables needed by OMPT if OMPT is enabled. | |||
111 | team_info = __ompt_get_teaminfo(0, NULL__null); | |||
112 | task_info = __ompt_get_task_info_object(0); | |||
113 | // Determine workshare type | |||
114 | if (loc != NULL__null) { | |||
115 | if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { | |||
116 | ompt_work_type = ompt_work_loop; | |||
117 | } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { | |||
118 | ompt_work_type = ompt_work_sections; | |||
119 | } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { | |||
120 | ompt_work_type = ompt_work_distribute; | |||
121 | } else { | |||
122 | kmp_int8 bool_res = | |||
123 | KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1)__sync_bool_compare_and_swap((volatile kmp_uint8 *)(&warn ), (kmp_uint8)((kmp_int8)0), (kmp_uint8)((kmp_int8)1)); | |||
124 | if (bool_res) | |||
125 | KMP_WARNING(OmptOutdatedWorkshare)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_OmptOutdatedWorkshare ), __kmp_msg_null); | |||
126 | } | |||
127 | KMP_DEBUG_ASSERT(ompt_work_type)if (!(ompt_work_type)) { __kmp_debug_assert("ompt_work_type", "openmp/runtime/src/kmp_sched.cpp", 127); }; | |||
128 | } | |||
129 | } | |||
130 | #endif | |||
131 | ||||
132 | KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride)if (!(plastiter && plower && pupper && pstride)) { __kmp_debug_assert("plastiter && plower && pupper && pstride" , "openmp/runtime/src/kmp_sched.cpp", 132); }; | |||
133 | KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_for_static_init called (%d)\n" , global_tid); }; | |||
134 | #ifdef KMP_DEBUG1 | |||
135 | { | |||
136 | char *buff; | |||
137 | // create format specifiers before the debug output | |||
138 | buff = __kmp_str_format( | |||
139 | "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," | |||
140 | " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n", | |||
141 | traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec, | |||
142 | traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec); | |||
143 | KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,if (kmp_d_debug >= 100) { __kmp_debug_printf (buff, global_tid , schedtype, *plastiter, *plower, *pupper, *pstride, incr, chunk ); } | |||
144 | *pstride, incr, chunk))if (kmp_d_debug >= 100) { __kmp_debug_printf (buff, global_tid , schedtype, *plastiter, *plower, *pupper, *pstride, incr, chunk ); }; | |||
145 | __kmp_str_free(&buff); | |||
146 | } | |||
147 | #endif | |||
148 | ||||
149 | if (__kmp_env_consistency_check) { | |||
150 | __kmp_push_workshare(global_tid, ct_pdo, loc); | |||
151 | if (incr == 0) { | |||
152 | __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, | |||
153 | loc); | |||
154 | } | |||
155 | } | |||
156 | /* special handling for zero-trip loops */ | |||
157 | if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { | |||
158 | if (plastiter != NULL__null) | |||
159 | *plastiter = FALSE0; | |||
160 | /* leave pupper and plower set to entire iteration space */ | |||
161 | *pstride = incr; /* value should never be used */ | |||
162 | // *plower = *pupper - incr; | |||
163 | // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) | |||
164 | // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE | |||
165 | // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009. | |||
166 | #ifdef KMP_DEBUG1 | |||
167 | { | |||
168 | char *buff; | |||
169 | // create format specifiers before the debug output | |||
170 | buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d " | |||
171 | "lower=%%%s upper=%%%s stride = %%%s " | |||
172 | "signed?<%s>, loc = %%s\n", | |||
173 | traits_t<T>::spec, traits_t<T>::spec, | |||
174 | traits_t<ST>::spec, traits_t<T>::spec); | |||
175 | check_loc(loc); | |||
176 | KD_TRACE(100,if (kmp_d_debug >= 100) { __kmp_debug_printf (buff, *plastiter , *plower, *pupper, *pstride, loc->psource); } | |||
177 | (buff, *plastiter, *plower, *pupper, *pstride, loc->psource))if (kmp_d_debug >= 100) { __kmp_debug_printf (buff, *plastiter , *plower, *pupper, *pstride, loc->psource); }; | |||
178 | __kmp_str_free(&buff); | |||
179 | } | |||
180 | #endif | |||
181 | KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_for_static_init: T#%d return\n" , global_tid); }; | |||
182 | ||||
183 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
184 | if (ompt_enabled.ompt_callback_work) { | |||
185 | ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback( | |||
186 | ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), | |||
187 | &(task_info->task_data), 0, codeptr); | |||
188 | } | |||
189 | #endif | |||
190 | KMP_STATS_LOOP_END(OMP_loop_static_iterations); | |||
191 | return; | |||
192 | } | |||
193 | ||||
194 | // Although there are schedule enumerations above kmp_ord_upper which are not | |||
195 | // schedules for "distribute", the only ones which are useful are dynamic, so | |||
196 | // cannot be seen here, since this codepath is only executed for static | |||
197 | // schedules. | |||
198 | if (schedtype > kmp_ord_upper) { | |||
199 | // we are in DISTRIBUTE construct | |||
200 | schedtype += kmp_sch_static - | |||
201 | kmp_distribute_static; // AC: convert to usual schedule type | |||
202 | if (th->th.th_team->t.t_serialized > 1) { | |||
203 | tid = 0; | |||
204 | team = th->th.th_team; | |||
205 | } else { | |||
206 | tid = th->th.th_team->t.t_master_tid; | |||
207 | team = th->th.th_team->t.t_parent; | |||
208 | } | |||
209 | } else { | |||
210 | tid = __kmp_tid_from_gtid(global_tid); | |||
211 | team = th->th.th_team; | |||
212 | } | |||
213 | ||||
214 | /* determine if "for" loop is an active worksharing construct */ | |||
215 | if (team->t.t_serialized) { | |||
216 | /* serialized parallel, each thread executes whole iteration space */ | |||
217 | if (plastiter != NULL__null) | |||
218 | *plastiter = TRUE(!0); | |||
219 | /* leave pupper and plower set to entire iteration space */ | |||
220 | *pstride = | |||
221 | (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); | |||
222 | ||||
223 | #ifdef KMP_DEBUG1 | |||
224 | { | |||
225 | char *buff; | |||
226 | // create format specifiers before the debug output | |||
227 | buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " | |||
228 | "lower=%%%s upper=%%%s stride = %%%s\n", | |||
229 | traits_t<T>::spec, traits_t<T>::spec, | |||
230 | traits_t<ST>::spec); | |||
231 | KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride))if (kmp_d_debug >= 100) { __kmp_debug_printf (buff, *plastiter , *plower, *pupper, *pstride); }; | |||
232 | __kmp_str_free(&buff); | |||
233 | } | |||
234 | #endif | |||
235 | KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_for_static_init: T#%d return\n" , global_tid); }; | |||
236 | ||||
237 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
238 | if (ompt_enabled.ompt_callback_work) { | |||
239 | ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback( | |||
240 | ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), | |||
241 | &(task_info->task_data), *pstride, codeptr); | |||
242 | } | |||
243 | #endif | |||
244 | KMP_STATS_LOOP_END(OMP_loop_static_iterations); | |||
245 | return; | |||
246 | } | |||
247 | nth = team->t.t_nproc; | |||
248 | if (nth == 1) { | |||
249 | if (plastiter != NULL__null) | |||
250 | *plastiter = TRUE(!0); | |||
251 | *pstride = | |||
252 | (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); | |||
253 | #ifdef KMP_DEBUG1 | |||
254 | { | |||
255 | char *buff; | |||
256 | // create format specifiers before the debug output | |||
257 | buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " | |||
258 | "lower=%%%s upper=%%%s stride = %%%s\n", | |||
259 | traits_t<T>::spec, traits_t<T>::spec, | |||
260 | traits_t<ST>::spec); | |||
261 | KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride))if (kmp_d_debug >= 100) { __kmp_debug_printf (buff, *plastiter , *plower, *pupper, *pstride); }; | |||
262 | __kmp_str_free(&buff); | |||
263 | } | |||
264 | #endif | |||
265 | KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_for_static_init: T#%d return\n" , global_tid); }; | |||
266 | ||||
267 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
268 | if (ompt_enabled.ompt_callback_work) { | |||
269 | ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback( | |||
270 | ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), | |||
271 | &(task_info->task_data), *pstride, codeptr); | |||
272 | } | |||
273 | #endif | |||
274 | KMP_STATS_LOOP_END(OMP_loop_static_iterations); | |||
275 | return; | |||
276 | } | |||
277 | ||||
278 | /* compute trip count */ | |||
279 | if (incr == 1) { | |||
280 | trip_count = *pupper - *plower + 1; | |||
281 | } else if (incr == -1) { | |||
282 | trip_count = *plower - *pupper + 1; | |||
283 | } else if (incr > 0) { | |||
284 | // upper-lower can exceed the limit of signed type | |||
285 | trip_count = (UT)(*pupper - *plower) / incr + 1; | |||
286 | } else { | |||
287 | trip_count = (UT)(*plower - *pupper) / (-incr) + 1; | |||
288 | } | |||
289 | ||||
290 | #if KMP_STATS_ENABLED0 | |||
291 | if (KMP_MASTER_GTID(gtid)(0 == __kmp_tid_from_gtid((gtid)))) { | |||
292 | KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count)((void)0); | |||
293 | } | |||
294 | #endif | |||
295 | ||||
296 | if (__kmp_env_consistency_check) { | |||
297 | /* tripcount overflow? */ | |||
298 | if (trip_count == 0 && *pupper != *plower) { | |||
299 | __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, | |||
300 | loc); | |||
301 | } | |||
302 | } | |||
303 | ||||
304 | /* compute remaining parameters */ | |||
305 | switch (schedtype) { | |||
306 | case kmp_sch_static: { | |||
307 | if (trip_count < nth) { | |||
308 | KMP_DEBUG_ASSERT(if (!(__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced)) { __kmp_debug_assert("__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced" , "openmp/runtime/src/kmp_sched.cpp", 311); } | |||
309 | __kmp_static == kmp_sch_static_greedy ||if (!(__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced)) { __kmp_debug_assert("__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced" , "openmp/runtime/src/kmp_sched.cpp", 311); } | |||
310 | __kmp_static ==if (!(__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced)) { __kmp_debug_assert("__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced" , "openmp/runtime/src/kmp_sched.cpp", 311); } | |||
311 | kmp_sch_static_balanced)if (!(__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced)) { __kmp_debug_assert("__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced" , "openmp/runtime/src/kmp_sched.cpp", 311); }; // Unknown static scheduling type. | |||
312 | if (tid < trip_count) { | |||
313 | *pupper = *plower = *plower + tid * incr; | |||
314 | } else { | |||
315 | // set bounds so non-active threads execute no iterations | |||
316 | *plower = *pupper + (incr > 0 ? 1 : -1); | |||
317 | } | |||
318 | if (plastiter != NULL__null) | |||
319 | *plastiter = (tid == trip_count - 1); | |||
320 | } else { | |||
321 | if (__kmp_static == kmp_sch_static_balanced) { | |||
322 | UT small_chunk = trip_count / nth; | |||
| ||||
323 | UT extras = trip_count % nth; | |||
324 | *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras)); | |||
325 | *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr); | |||
326 | if (plastiter != NULL__null) | |||
327 | *plastiter = (tid == nth - 1); | |||
328 | } else { | |||
329 | T big_chunk_inc_count = | |||
330 | (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; | |||
331 | T old_upper = *pupper; | |||
332 | ||||
333 | KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy)if (!(__kmp_static == kmp_sch_static_greedy)) { __kmp_debug_assert ("__kmp_static == kmp_sch_static_greedy", "openmp/runtime/src/kmp_sched.cpp" , 333); }; | |||
334 | // Unknown static scheduling type. | |||
335 | ||||
336 | *plower += tid * big_chunk_inc_count; | |||
337 | *pupper = *plower + big_chunk_inc_count - incr; | |||
338 | if (incr > 0) { | |||
339 | if (*pupper < *plower) | |||
340 | *pupper = traits_t<T>::max_value; | |||
341 | if (plastiter != NULL__null) | |||
342 | *plastiter = *plower <= old_upper && *pupper > old_upper - incr; | |||
343 | if (*pupper > old_upper) | |||
344 | *pupper = old_upper; // tracker C73258 | |||
345 | } else { | |||
346 | if (*pupper > *plower) | |||
347 | *pupper = traits_t<T>::min_value; | |||
348 | if (plastiter != NULL__null) | |||
349 | *plastiter = *plower >= old_upper && *pupper < old_upper - incr; | |||
350 | if (*pupper < old_upper) | |||
351 | *pupper = old_upper; // tracker C73258 | |||
352 | } | |||
353 | } | |||
354 | } | |||
355 | *pstride = trip_count; | |||
356 | break; | |||
357 | } | |||
358 | case kmp_sch_static_chunked: { | |||
359 | ST span; | |||
360 | UT nchunks; | |||
361 | if (chunk < 1) | |||
362 | chunk = 1; | |||
363 | else if ((UT)chunk > trip_count) | |||
364 | chunk = trip_count; | |||
365 | nchunks = (trip_count) / (UT)chunk + (trip_count % (UT)chunk ? 1 : 0); | |||
366 | span = chunk * incr; | |||
367 | if (nchunks < nth) { | |||
368 | *pstride = span * nchunks; | |||
369 | if (tid < nchunks) { | |||
370 | *plower = *plower + (span * tid); | |||
371 | *pupper = *plower + span - incr; | |||
372 | } else { | |||
373 | *plower = *pupper + (incr > 0 ? 1 : -1); | |||
374 | } | |||
375 | } else { | |||
376 | *pstride = span * nth; | |||
377 | *plower = *plower + (span * tid); | |||
378 | *pupper = *plower + span - incr; | |||
379 | } | |||
380 | if (plastiter != NULL__null) | |||
381 | *plastiter = (tid == (nchunks - 1) % nth); | |||
382 | break; | |||
383 | } | |||
384 | case kmp_sch_static_balanced_chunked: { | |||
385 | T old_upper = *pupper; | |||
386 | // round up to make sure the chunk is enough to cover all iterations | |||
387 | UT span = (trip_count + nth - 1) / nth; | |||
388 | ||||
389 | // perform chunk adjustment | |||
390 | chunk = (span + chunk - 1) & ~(chunk - 1); | |||
391 | ||||
392 | span = chunk * incr; | |||
393 | *plower = *plower + (span * tid); | |||
394 | *pupper = *plower + span - incr; | |||
395 | if (incr > 0) { | |||
396 | if (*pupper > old_upper) | |||
397 | *pupper = old_upper; | |||
398 | } else if (*pupper < old_upper) | |||
399 | *pupper = old_upper; | |||
400 | ||||
401 | if (plastiter != NULL__null) | |||
402 | *plastiter = (tid == ((trip_count - 1) / (UT)chunk)); | |||
403 | break; | |||
404 | } | |||
405 | default: | |||
406 | KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type")if (!(0)) { __kmp_debug_assert(("__kmpc_for_static_init: unknown scheduling type" ), "openmp/runtime/src/kmp_sched.cpp", 406); }; | |||
407 | break; | |||
408 | } | |||
409 | ||||
410 | #if USE_ITT_BUILD1 | |||
411 | // Report loop metadata | |||
412 | if (KMP_MASTER_TID(tid)(0 == (tid)) && __itt_metadata_add_ptr__kmp_itt_metadata_add_ptr__3_0 && | |||
413 | __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL__null && | |||
414 | team->t.t_active_level == 1) { | |||
415 | kmp_uint64 cur_chunk = chunk; | |||
416 | check_loc(loc); | |||
417 | // Calculate chunk in case it was not specified; it is specified for | |||
418 | // kmp_sch_static_chunked | |||
419 | if (schedtype == kmp_sch_static) { | |||
420 | cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0); | |||
421 | } | |||
422 | // 0 - "static" schedule | |||
423 | __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk); | |||
424 | } | |||
425 | #endif | |||
426 | #ifdef KMP_DEBUG1 | |||
427 | { | |||
428 | char *buff; | |||
429 | // create format specifiers before the debug output | |||
430 | buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s " | |||
431 | "upper=%%%s stride = %%%s signed?<%s>\n", | |||
432 | traits_t<T>::spec, traits_t<T>::spec, | |||
433 | traits_t<ST>::spec, traits_t<T>::spec); | |||
434 | KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride))if (kmp_d_debug >= 100) { __kmp_debug_printf (buff, *plastiter , *plower, *pupper, *pstride); }; | |||
435 | __kmp_str_free(&buff); | |||
436 | } | |||
437 | #endif | |||
438 | KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_for_static_init: T#%d return\n" , global_tid); }; | |||
439 | ||||
440 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
441 | if (ompt_enabled.ompt_callback_work) { | |||
442 | ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback( | |||
443 | ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), | |||
444 | &(task_info->task_data), trip_count, codeptr); | |||
445 | } | |||
446 | if (ompt_enabled.ompt_callback_dispatch) { | |||
447 | ompt_dispatch_t dispatch_type; | |||
448 | ompt_data_t instance = ompt_data_none{0}; | |||
449 | ompt_dispatch_chunk_t dispatch_chunk; | |||
450 | if (ompt_work_type == ompt_work_sections) { | |||
451 | dispatch_type = ompt_dispatch_section; | |||
452 | instance.ptr = codeptr; | |||
453 | } else { | |||
454 | OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupper, incr)do { if (incr > 0) { dispatch_chunk.start = static_cast< uint64_t>(*plower); dispatch_chunk.iterations = static_cast <uint64_t>(((*pupper) - (*plower)) / (incr) + 1); } else { dispatch_chunk.start = static_cast<uint64_t>(*pupper ); dispatch_chunk.iterations = static_cast<uint64_t>((( *plower) - (*pupper)) / -(incr) + 1); } } while (0); | |||
455 | dispatch_type = (ompt_work_type == ompt_work_distribute) | |||
456 | ? ompt_dispatch_distribute_chunk | |||
457 | : ompt_dispatch_ws_loop_chunk; | |||
458 | instance.ptr = &dispatch_chunk; | |||
459 | } | |||
460 | ompt_callbacks.ompt_callback(ompt_callback_dispatch)ompt_callback_dispatch_callback( | |||
461 | &(team_info->parallel_data), &(task_info->task_data), dispatch_type, | |||
462 | instance); | |||
463 | } | |||
464 | #endif | |||
465 | ||||
466 | KMP_STATS_LOOP_END(OMP_loop_static_iterations); | |||
467 | return; | |||
468 | } | |||
469 | ||||
470 | template <typename T> | |||
471 | static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid, | |||
472 | kmp_int32 schedule, kmp_int32 *plastiter, | |||
473 | T *plower, T *pupper, T *pupperDist, | |||
474 | typename traits_t<T>::signed_t *pstride, | |||
475 | typename traits_t<T>::signed_t incr, | |||
476 | typename traits_t<T>::signed_t chunk | |||
477 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
478 | , | |||
479 | void *codeptr | |||
480 | #endif | |||
481 | ) { | |||
482 | KMP_COUNT_BLOCK(OMP_DISTRIBUTE)((void)0); | |||
483 | KMP_PUSH_PARTITIONED_TIMER(OMP_distribute)((void)0); | |||
484 | KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling)((void)0); | |||
485 | typedef typename traits_t<T>::unsigned_t UT; | |||
486 | typedef typename traits_t<T>::signed_t ST; | |||
487 | kmp_uint32 tid; | |||
488 | kmp_uint32 nth; | |||
489 | kmp_uint32 team_id; | |||
490 | kmp_uint32 nteams; | |||
491 | UT trip_count; | |||
492 | kmp_team_t *team; | |||
493 | kmp_info_t *th; | |||
494 | ||||
495 | KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride)if (!(plastiter && plower && pupper && pupperDist && pstride)) { __kmp_debug_assert("plastiter && plower && pupper && pupperDist && pstride" , "openmp/runtime/src/kmp_sched.cpp", 495); }; | |||
496 | KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_dist_for_static_init called (%d)\n" , gtid); }; | |||
497 | __kmp_assert_valid_gtid(gtid); | |||
498 | #ifdef KMP_DEBUG1 | |||
499 | { | |||
500 | char *buff; | |||
501 | // create format specifiers before the debug output | |||
502 | buff = __kmp_str_format( | |||
503 | "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d " | |||
504 | "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n", | |||
505 | traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec, | |||
506 | traits_t<ST>::spec, traits_t<T>::spec); | |||
507 | KD_TRACE(100,if (kmp_d_debug >= 100) { __kmp_debug_printf (buff, gtid, schedule , *plastiter, *plower, *pupper, incr, chunk); } | |||
508 | (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk))if (kmp_d_debug >= 100) { __kmp_debug_printf (buff, gtid, schedule , *plastiter, *plower, *pupper, incr, chunk); }; | |||
509 | __kmp_str_free(&buff); | |||
510 | } | |||
511 | #endif | |||
512 | ||||
513 | if (__kmp_env_consistency_check) { | |||
514 | __kmp_push_workshare(gtid, ct_pdo, loc); | |||
515 | if (incr == 0) { | |||
516 | __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, | |||
517 | loc); | |||
518 | } | |||
519 | if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { | |||
520 | // The loop is illegal. | |||
521 | // Some zero-trip loops maintained by compiler, e.g.: | |||
522 | // for(i=10;i<0;++i) // lower >= upper - run-time check | |||
523 | // for(i=0;i>10;--i) // lower <= upper - run-time check | |||
524 | // for(i=0;i>10;++i) // incr > 0 - compile-time check | |||
525 | // for(i=10;i<0;--i) // incr < 0 - compile-time check | |||
526 | // Compiler does not check the following illegal loops: | |||
527 | // for(i=0;i<10;i+=incr) // where incr<0 | |||
528 | // for(i=10;i>0;i-=incr) // where incr<0 | |||
529 | __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); | |||
530 | } | |||
531 | } | |||
532 | tid = __kmp_tid_from_gtid(gtid); | |||
533 | th = __kmp_threads[gtid]; | |||
534 | nth = th->th.th_team_nproc; | |||
535 | team = th->th.th_team; | |||
536 | KMP_DEBUG_ASSERT(th->th.th_teams_microtask)if (!(th->th.th_teams_microtask)) { __kmp_debug_assert("th->th.th_teams_microtask" , "openmp/runtime/src/kmp_sched.cpp", 536); }; // we are in the teams construct | |||
537 | nteams = th->th.th_teams_size.nteams; | |||
538 | team_id = team->t.t_master_tid; | |||
539 | KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc)if (!(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc )) { __kmp_debug_assert("nteams == (kmp_uint32)team->t.t_parent->t.t_nproc" , "openmp/runtime/src/kmp_sched.cpp", 539); }; | |||
540 | ||||
541 | // compute global trip count | |||
542 | if (incr == 1) { | |||
543 | trip_count = *pupper - *plower + 1; | |||
544 | } else if (incr == -1) { | |||
545 | trip_count = *plower - *pupper + 1; | |||
546 | } else if (incr > 0) { | |||
547 | // upper-lower can exceed the limit of signed type | |||
548 | trip_count = (UT)(*pupper - *plower) / incr + 1; | |||
549 | } else { | |||
550 | trip_count = (UT)(*plower - *pupper) / (-incr) + 1; | |||
551 | } | |||
552 | ||||
553 | *pstride = *pupper - *plower; // just in case (can be unused) | |||
554 | if (trip_count <= nteams) { | |||
555 | KMP_DEBUG_ASSERT(if (!(__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced)) { __kmp_debug_assert("__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced" , "openmp/runtime/src/kmp_sched.cpp", 558); } | |||
556 | __kmp_static == kmp_sch_static_greedy ||if (!(__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced)) { __kmp_debug_assert("__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced" , "openmp/runtime/src/kmp_sched.cpp", 558); } | |||
557 | __kmp_static ==if (!(__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced)) { __kmp_debug_assert("__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced" , "openmp/runtime/src/kmp_sched.cpp", 558); } | |||
558 | kmp_sch_static_balanced)if (!(__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced)) { __kmp_debug_assert("__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced" , "openmp/runtime/src/kmp_sched.cpp", 558); }; // Unknown static scheduling type. | |||
559 | // only primary threads of some teams get single iteration, other threads | |||
560 | // get nothing | |||
561 | if (team_id < trip_count && tid == 0) { | |||
562 | *pupper = *pupperDist = *plower = *plower + team_id * incr; | |||
563 | } else { | |||
564 | *pupperDist = *pupper; | |||
565 | *plower = *pupper + incr; // compiler should skip loop body | |||
566 | } | |||
567 | if (plastiter != NULL__null) | |||
568 | *plastiter = (tid == 0 && team_id == trip_count - 1); | |||
569 | } else { | |||
570 | // Get the team's chunk first (each team gets at most one chunk) | |||
571 | if (__kmp_static == kmp_sch_static_balanced) { | |||
572 | UT chunkD = trip_count / nteams; | |||
573 | UT extras = trip_count % nteams; | |||
574 | *plower += | |||
575 | incr * (team_id * chunkD + (team_id < extras ? team_id : extras)); | |||
576 | *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr); | |||
577 | if (plastiter != NULL__null) | |||
578 | *plastiter = (team_id == nteams - 1); | |||
579 | } else { | |||
580 | T chunk_inc_count = | |||
581 | (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr; | |||
582 | T upper = *pupper; | |||
583 | KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy)if (!(__kmp_static == kmp_sch_static_greedy)) { __kmp_debug_assert ("__kmp_static == kmp_sch_static_greedy", "openmp/runtime/src/kmp_sched.cpp" , 583); }; | |||
584 | // Unknown static scheduling type. | |||
585 | *plower += team_id * chunk_inc_count; | |||
586 | *pupperDist = *plower + chunk_inc_count - incr; | |||
587 | // Check/correct bounds if needed | |||
588 | if (incr > 0) { | |||
589 | if (*pupperDist < *plower) | |||
590 | *pupperDist = traits_t<T>::max_value; | |||
591 | if (plastiter != NULL__null) | |||
592 | *plastiter = *plower <= upper && *pupperDist > upper - incr; | |||
593 | if (*pupperDist > upper) | |||
594 | *pupperDist = upper; // tracker C73258 | |||
595 | if (*plower > *pupperDist) { | |||
596 | *pupper = *pupperDist; // no iterations available for the team | |||
597 | goto end; | |||
598 | } | |||
599 | } else { | |||
600 | if (*pupperDist > *plower) | |||
601 | *pupperDist = traits_t<T>::min_value; | |||
602 | if (plastiter != NULL__null) | |||
603 | *plastiter = *plower >= upper && *pupperDist < upper - incr; | |||
604 | if (*pupperDist < upper) | |||
605 | *pupperDist = upper; // tracker C73258 | |||
606 | if (*plower < *pupperDist) { | |||
607 | *pupper = *pupperDist; // no iterations available for the team | |||
608 | goto end; | |||
609 | } | |||
610 | } | |||
611 | } | |||
612 | // Get the parallel loop chunk now (for thread) | |||
613 | // compute trip count for team's chunk | |||
614 | if (incr == 1) { | |||
615 | trip_count = *pupperDist - *plower + 1; | |||
616 | } else if (incr == -1) { | |||
617 | trip_count = *plower - *pupperDist + 1; | |||
618 | } else if (incr > 1) { | |||
619 | // upper-lower can exceed the limit of signed type | |||
620 | trip_count = (UT)(*pupperDist - *plower) / incr + 1; | |||
621 | } else { | |||
622 | trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1; | |||
623 | } | |||
624 | KMP_DEBUG_ASSERT(trip_count)if (!(trip_count)) { __kmp_debug_assert("trip_count", "openmp/runtime/src/kmp_sched.cpp" , 624); }; | |||
625 | switch (schedule) { | |||
626 | case kmp_sch_static: { | |||
627 | if (trip_count <= nth) { | |||
628 | KMP_DEBUG_ASSERT(if (!(__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced)) { __kmp_debug_assert("__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced" , "openmp/runtime/src/kmp_sched.cpp", 631); } | |||
629 | __kmp_static == kmp_sch_static_greedy ||if (!(__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced)) { __kmp_debug_assert("__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced" , "openmp/runtime/src/kmp_sched.cpp", 631); } | |||
630 | __kmp_static ==if (!(__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced)) { __kmp_debug_assert("__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced" , "openmp/runtime/src/kmp_sched.cpp", 631); } | |||
631 | kmp_sch_static_balanced)if (!(__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced)) { __kmp_debug_assert("__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced" , "openmp/runtime/src/kmp_sched.cpp", 631); }; // Unknown static scheduling type. | |||
632 | if (tid < trip_count) | |||
633 | *pupper = *plower = *plower + tid * incr; | |||
634 | else | |||
635 | *plower = *pupper + incr; // no iterations available | |||
636 | if (plastiter != NULL__null) | |||
637 | if (*plastiter != 0 && !(tid == trip_count - 1)) | |||
638 | *plastiter = 0; | |||
639 | } else { | |||
640 | if (__kmp_static == kmp_sch_static_balanced) { | |||
641 | UT chunkL = trip_count / nth; | |||
642 | UT extras = trip_count % nth; | |||
643 | *plower += incr * (tid * chunkL + (tid < extras ? tid : extras)); | |||
644 | *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr); | |||
645 | if (plastiter != NULL__null) | |||
646 | if (*plastiter != 0 && !(tid == nth - 1)) | |||
647 | *plastiter = 0; | |||
648 | } else { | |||
649 | T chunk_inc_count = | |||
650 | (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; | |||
651 | T upper = *pupperDist; | |||
652 | KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy)if (!(__kmp_static == kmp_sch_static_greedy)) { __kmp_debug_assert ("__kmp_static == kmp_sch_static_greedy", "openmp/runtime/src/kmp_sched.cpp" , 652); }; | |||
653 | // Unknown static scheduling type. | |||
654 | *plower += tid * chunk_inc_count; | |||
655 | *pupper = *plower + chunk_inc_count - incr; | |||
656 | if (incr > 0) { | |||
657 | if (*pupper < *plower) | |||
658 | *pupper = traits_t<T>::max_value; | |||
659 | if (plastiter != NULL__null) | |||
660 | if (*plastiter != 0 && | |||
661 | !(*plower <= upper && *pupper > upper - incr)) | |||
662 | *plastiter = 0; | |||
663 | if (*pupper > upper) | |||
664 | *pupper = upper; // tracker C73258 | |||
665 | } else { | |||
666 | if (*pupper > *plower) | |||
667 | *pupper = traits_t<T>::min_value; | |||
668 | if (plastiter != NULL__null) | |||
669 | if (*plastiter != 0 && | |||
670 | !(*plower >= upper && *pupper < upper - incr)) | |||
671 | *plastiter = 0; | |||
672 | if (*pupper < upper) | |||
673 | *pupper = upper; // tracker C73258 | |||
674 | } | |||
675 | } | |||
676 | } | |||
677 | break; | |||
678 | } | |||
679 | case kmp_sch_static_chunked: { | |||
680 | ST span; | |||
681 | if (chunk < 1) | |||
682 | chunk = 1; | |||
683 | span = chunk * incr; | |||
684 | *pstride = span * nth; | |||
685 | *plower = *plower + (span * tid); | |||
686 | *pupper = *plower + span - incr; | |||
687 | if (plastiter != NULL__null) | |||
688 | if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth)) | |||
689 | *plastiter = 0; | |||
690 | break; | |||
691 | } | |||
692 | default: | |||
693 | KMP_ASSERT2(0,if (!(0)) { __kmp_debug_assert(("__kmpc_dist_for_static_init: unknown loop scheduling type" ), "openmp/runtime/src/kmp_sched.cpp", 694); } | |||
694 | "__kmpc_dist_for_static_init: unknown loop scheduling type")if (!(0)) { __kmp_debug_assert(("__kmpc_dist_for_static_init: unknown loop scheduling type" ), "openmp/runtime/src/kmp_sched.cpp", 694); }; | |||
695 | break; | |||
696 | } | |||
697 | } | |||
698 | end:; | |||
699 | #ifdef KMP_DEBUG1 | |||
700 | { | |||
701 | char *buff; | |||
702 | // create format specifiers before the debug output | |||
703 | buff = __kmp_str_format( | |||
704 | "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s " | |||
705 | "stride=%%%s signed?<%s>\n", | |||
706 | traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec, | |||
707 | traits_t<ST>::spec, traits_t<T>::spec); | |||
708 | KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride))if (kmp_d_debug >= 100) { __kmp_debug_printf (buff, *plastiter , *plower, *pupper, *pupperDist, *pstride); }; | |||
709 | __kmp_str_free(&buff); | |||
710 | } | |||
711 | #endif | |||
712 | KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_dist_for_static_init: T#%d return\n" , gtid); }; | |||
713 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
714 | if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) { | |||
715 | ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL__null); | |||
716 | ompt_task_info_t *task_info = __ompt_get_task_info_object(0); | |||
717 | if (ompt_enabled.ompt_callback_work) { | |||
718 | ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback( | |||
719 | ompt_work_distribute, ompt_scope_begin, &(team_info->parallel_data), | |||
720 | &(task_info->task_data), 0, codeptr); | |||
721 | } | |||
722 | if (ompt_enabled.ompt_callback_dispatch) { | |||
723 | ompt_data_t instance = ompt_data_none{0}; | |||
724 | ompt_dispatch_chunk_t dispatch_chunk; | |||
725 | OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupperDist, incr)do { if (incr > 0) { dispatch_chunk.start = static_cast< uint64_t>(*plower); dispatch_chunk.iterations = static_cast <uint64_t>(((*pupperDist) - (*plower)) / (incr) + 1); } else { dispatch_chunk.start = static_cast<uint64_t>(*pupperDist ); dispatch_chunk.iterations = static_cast<uint64_t>((( *plower) - (*pupperDist)) / -(incr) + 1); } } while (0); | |||
726 | instance.ptr = &dispatch_chunk; | |||
727 | ompt_callbacks.ompt_callback(ompt_callback_dispatch)ompt_callback_dispatch_callback( | |||
728 | &(team_info->parallel_data), &(task_info->task_data), | |||
729 | ompt_dispatch_distribute_chunk, instance); | |||
730 | } | |||
731 | } | |||
732 | #endif // OMPT_SUPPORT && OMPT_OPTIONAL | |||
733 | KMP_STATS_LOOP_END(OMP_distribute_iterations); | |||
734 | return; | |||
735 | } | |||
736 | ||||
737 | template <typename T> | |||
738 | static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid, | |||
739 | kmp_int32 *p_last, T *p_lb, T *p_ub, | |||
740 | typename traits_t<T>::signed_t *p_st, | |||
741 | typename traits_t<T>::signed_t incr, | |||
742 | typename traits_t<T>::signed_t chunk) { | |||
743 | // The routine returns the first chunk distributed to the team and | |||
744 | // stride for next chunks calculation. | |||
745 | // Last iteration flag set for the team that will execute | |||
746 | // the last iteration of the loop. | |||
747 | // The routine is called for dist_schedule(static,chunk) only. | |||
748 | typedef typename traits_t<T>::unsigned_t UT; | |||
749 | typedef typename traits_t<T>::signed_t ST; | |||
750 | kmp_uint32 team_id; | |||
751 | kmp_uint32 nteams; | |||
752 | UT trip_count; | |||
753 | T lower; | |||
754 | T upper; | |||
755 | ST span; | |||
756 | kmp_team_t *team; | |||
757 | kmp_info_t *th; | |||
758 | ||||
759 | KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st)if (!(p_last && p_lb && p_ub && p_st) ) { __kmp_debug_assert("p_last && p_lb && p_ub && p_st" , "openmp/runtime/src/kmp_sched.cpp", 759); }; | |||
760 | KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_team_static_init called (%d)\n" , gtid); }; | |||
761 | __kmp_assert_valid_gtid(gtid); | |||
762 | #ifdef KMP_DEBUG1 | |||
763 | { | |||
764 | char *buff; | |||
765 | // create format specifiers before the debug output | |||
766 | buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d " | |||
767 | "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n", | |||
768 | traits_t<T>::spec, traits_t<T>::spec, | |||
769 | traits_t<ST>::spec, traits_t<ST>::spec, | |||
770 | traits_t<T>::spec); | |||
771 | KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk))if (kmp_d_debug >= 100) { __kmp_debug_printf (buff, gtid, * p_last, *p_lb, *p_ub, *p_st, chunk); }; | |||
772 | __kmp_str_free(&buff); | |||
773 | } | |||
774 | #endif | |||
775 | ||||
776 | lower = *p_lb; | |||
777 | upper = *p_ub; | |||
778 | if (__kmp_env_consistency_check) { | |||
779 | if (incr == 0) { | |||
780 | __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, | |||
781 | loc); | |||
782 | } | |||
783 | if (incr > 0 ? (upper < lower) : (lower < upper)) { | |||
784 | // The loop is illegal. | |||
785 | // Some zero-trip loops maintained by compiler, e.g.: | |||
786 | // for(i=10;i<0;++i) // lower >= upper - run-time check | |||
787 | // for(i=0;i>10;--i) // lower <= upper - run-time check | |||
788 | // for(i=0;i>10;++i) // incr > 0 - compile-time check | |||
789 | // for(i=10;i<0;--i) // incr < 0 - compile-time check | |||
790 | // Compiler does not check the following illegal loops: | |||
791 | // for(i=0;i<10;i+=incr) // where incr<0 | |||
792 | // for(i=10;i>0;i-=incr) // where incr<0 | |||
793 | __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); | |||
794 | } | |||
795 | } | |||
796 | th = __kmp_threads[gtid]; | |||
797 | team = th->th.th_team; | |||
798 | KMP_DEBUG_ASSERT(th->th.th_teams_microtask)if (!(th->th.th_teams_microtask)) { __kmp_debug_assert("th->th.th_teams_microtask" , "openmp/runtime/src/kmp_sched.cpp", 798); }; // we are in the teams construct | |||
799 | nteams = th->th.th_teams_size.nteams; | |||
800 | team_id = team->t.t_master_tid; | |||
801 | KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc)if (!(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc )) { __kmp_debug_assert("nteams == (kmp_uint32)team->t.t_parent->t.t_nproc" , "openmp/runtime/src/kmp_sched.cpp", 801); }; | |||
802 | ||||
803 | // compute trip count | |||
804 | if (incr == 1) { | |||
805 | trip_count = upper - lower + 1; | |||
806 | } else if (incr == -1) { | |||
807 | trip_count = lower - upper + 1; | |||
808 | } else if (incr > 0) { | |||
809 | // upper-lower can exceed the limit of signed type | |||
810 | trip_count = (UT)(upper - lower) / incr + 1; | |||
811 | } else { | |||
812 | trip_count = (UT)(lower - upper) / (-incr) + 1; | |||
813 | } | |||
814 | if (chunk < 1) | |||
815 | chunk = 1; | |||
816 | span = chunk * incr; | |||
817 | *p_st = span * nteams; | |||
818 | *p_lb = lower + (span * team_id); | |||
819 | *p_ub = *p_lb + span - incr; | |||
820 | if (p_last != NULL__null) | |||
821 | *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams); | |||
822 | // Correct upper bound if needed | |||
823 | if (incr > 0) { | |||
824 | if (*p_ub < *p_lb) // overflow? | |||
825 | *p_ub = traits_t<T>::max_value; | |||
826 | if (*p_ub > upper) | |||
827 | *p_ub = upper; // tracker C73258 | |||
828 | } else { // incr < 0 | |||
829 | if (*p_ub > *p_lb) | |||
830 | *p_ub = traits_t<T>::min_value; | |||
831 | if (*p_ub < upper) | |||
832 | *p_ub = upper; // tracker C73258 | |||
833 | } | |||
834 | #ifdef KMP_DEBUG1 | |||
835 | { | |||
836 | char *buff; | |||
837 | // create format specifiers before the debug output | |||
838 | buff = | |||
839 | __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d " | |||
840 | "iter=(%%%s, %%%s, %%%s) chunk %%%s\n", | |||
841 | traits_t<T>::spec, traits_t<T>::spec, | |||
842 | traits_t<ST>::spec, traits_t<ST>::spec); | |||
843 | KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk))if (kmp_d_debug >= 100) { __kmp_debug_printf (buff, gtid, team_id , *p_last, *p_lb, *p_ub, *p_st, chunk); }; | |||
844 | __kmp_str_free(&buff); | |||
845 | } | |||
846 | #endif | |||
847 | } | |||
848 | ||||
849 | //------------------------------------------------------------------------------ | |||
850 | extern "C" { | |||
851 | /*! | |||
852 | @ingroup WORK_SHARING | |||
853 | @param loc Source code location | |||
854 | @param gtid Global thread id of this thread | |||
855 | @param schedtype Scheduling type | |||
856 | @param plastiter Pointer to the "last iteration" flag | |||
857 | @param plower Pointer to the lower bound | |||
858 | @param pupper Pointer to the upper bound | |||
859 | @param pstride Pointer to the stride | |||
860 | @param incr Loop increment | |||
861 | @param chunk The chunk size | |||
862 | ||||
863 | Each of the four functions here are identical apart from the argument types. | |||
864 | ||||
865 | The functions compute the upper and lower bounds and stride to be used for the | |||
866 | set of iterations to be executed by the current thread from the statically | |||
867 | scheduled loop that is described by the initial values of the bounds, stride, | |||
868 | increment and chunk size. | |||
869 | ||||
870 | @{ | |||
871 | */ | |||
872 | void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, | |||
873 | kmp_int32 *plastiter, kmp_int32 *plower, | |||
874 | kmp_int32 *pupper, kmp_int32 *pstride, | |||
875 | kmp_int32 incr, kmp_int32 chunk) { | |||
876 | __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower, | |||
877 | pupper, pstride, incr, chunk | |||
878 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
879 | , | |||
880 | OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0) | |||
881 | #endif | |||
882 | ); | |||
883 | } | |||
884 | ||||
885 | /*! | |||
886 | See @ref __kmpc_for_static_init_4 | |||
887 | */ | |||
888 | void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid, | |||
889 | kmp_int32 schedtype, kmp_int32 *plastiter, | |||
890 | kmp_uint32 *plower, kmp_uint32 *pupper, | |||
891 | kmp_int32 *pstride, kmp_int32 incr, | |||
892 | kmp_int32 chunk) { | |||
893 | __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower, | |||
894 | pupper, pstride, incr, chunk | |||
895 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
896 | , | |||
897 | OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0) | |||
898 | #endif | |||
899 | ); | |||
900 | } | |||
901 | ||||
902 | /*! | |||
903 | See @ref __kmpc_for_static_init_4 | |||
904 | */ | |||
905 | void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, | |||
906 | kmp_int32 *plastiter, kmp_int64 *plower, | |||
907 | kmp_int64 *pupper, kmp_int64 *pstride, | |||
908 | kmp_int64 incr, kmp_int64 chunk) { | |||
909 | __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower, | |||
910 | pupper, pstride, incr, chunk | |||
911 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
912 | , | |||
913 | OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0) | |||
914 | #endif | |||
915 | ); | |||
916 | } | |||
917 | ||||
918 | /*! | |||
919 | See @ref __kmpc_for_static_init_4 | |||
920 | */ | |||
921 | void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid, | |||
922 | kmp_int32 schedtype, kmp_int32 *plastiter, | |||
923 | kmp_uint64 *plower, kmp_uint64 *pupper, | |||
924 | kmp_int64 *pstride, kmp_int64 incr, | |||
925 | kmp_int64 chunk) { | |||
926 | __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower, | |||
927 | pupper, pstride, incr, chunk | |||
928 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
929 | , | |||
930 | OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0) | |||
931 | #endif | |||
932 | ); | |||
933 | } | |||
934 | /*! | |||
935 | @} | |||
936 | */ | |||
937 | ||||
938 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
939 | #define OMPT_CODEPTR_ARG, __builtin_return_address(0) , OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0) | |||
940 | #else | |||
941 | #define OMPT_CODEPTR_ARG, __builtin_return_address(0) | |||
942 | #endif | |||
943 | ||||
944 | /*! | |||
945 | @ingroup WORK_SHARING | |||
946 | @param loc Source code location | |||
947 | @param gtid Global thread id of this thread | |||
948 | @param schedule Scheduling type for the parallel loop | |||
949 | @param plastiter Pointer to the "last iteration" flag | |||
950 | @param plower Pointer to the lower bound | |||
951 | @param pupper Pointer to the upper bound of loop chunk | |||
952 | @param pupperD Pointer to the upper bound of dist_chunk | |||
953 | @param pstride Pointer to the stride for parallel loop | |||
954 | @param incr Loop increment | |||
955 | @param chunk The chunk size for the parallel loop | |||
956 | ||||
957 | Each of the four functions here are identical apart from the argument types. | |||
958 | ||||
959 | The functions compute the upper and lower bounds and strides to be used for the | |||
960 | set of iterations to be executed by the current thread from the statically | |||
961 | scheduled loop that is described by the initial values of the bounds, strides, | |||
962 | increment and chunks for parallel loop and distribute constructs. | |||
963 | ||||
964 | @{ | |||
965 | */ | |||
966 | void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid, | |||
967 | kmp_int32 schedule, kmp_int32 *plastiter, | |||
968 | kmp_int32 *plower, kmp_int32 *pupper, | |||
969 | kmp_int32 *pupperD, kmp_int32 *pstride, | |||
970 | kmp_int32 incr, kmp_int32 chunk) { | |||
971 | __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower, | |||
972 | pupper, pupperD, pstride, incr, | |||
973 | chunk OMPT_CODEPTR_ARG, __builtin_return_address(0)); | |||
974 | } | |||
975 | ||||
976 | /*! | |||
977 | See @ref __kmpc_dist_for_static_init_4 | |||
978 | */ | |||
979 | void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid, | |||
980 | kmp_int32 schedule, kmp_int32 *plastiter, | |||
981 | kmp_uint32 *plower, kmp_uint32 *pupper, | |||
982 | kmp_uint32 *pupperD, kmp_int32 *pstride, | |||
983 | kmp_int32 incr, kmp_int32 chunk) { | |||
984 | __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower, | |||
985 | pupper, pupperD, pstride, incr, | |||
986 | chunk OMPT_CODEPTR_ARG, __builtin_return_address(0)); | |||
987 | } | |||
988 | ||||
989 | /*! | |||
990 | See @ref __kmpc_dist_for_static_init_4 | |||
991 | */ | |||
992 | void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid, | |||
993 | kmp_int32 schedule, kmp_int32 *plastiter, | |||
994 | kmp_int64 *plower, kmp_int64 *pupper, | |||
995 | kmp_int64 *pupperD, kmp_int64 *pstride, | |||
996 | kmp_int64 incr, kmp_int64 chunk) { | |||
997 | __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower, | |||
998 | pupper, pupperD, pstride, incr, | |||
999 | chunk OMPT_CODEPTR_ARG, __builtin_return_address(0)); | |||
1000 | } | |||
1001 | ||||
1002 | /*! | |||
1003 | See @ref __kmpc_dist_for_static_init_4 | |||
1004 | */ | |||
1005 | void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid, | |||
1006 | kmp_int32 schedule, kmp_int32 *plastiter, | |||
1007 | kmp_uint64 *plower, kmp_uint64 *pupper, | |||
1008 | kmp_uint64 *pupperD, kmp_int64 *pstride, | |||
1009 | kmp_int64 incr, kmp_int64 chunk) { | |||
1010 | __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower, | |||
1011 | pupper, pupperD, pstride, incr, | |||
1012 | chunk OMPT_CODEPTR_ARG, __builtin_return_address(0)); | |||
1013 | } | |||
1014 | /*! | |||
1015 | @} | |||
1016 | */ | |||
1017 | ||||
1018 | //------------------------------------------------------------------------------ | |||
1019 | // Auxiliary routines for Distribute Parallel Loop construct implementation | |||
1020 | // Transfer call to template< type T > | |||
1021 | // __kmp_team_static_init( ident_t *loc, int gtid, | |||
1022 | // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk ) | |||
1023 | ||||
1024 | /*! | |||
1025 | @ingroup WORK_SHARING | |||
1026 | @{ | |||
1027 | @param loc Source location | |||
1028 | @param gtid Global thread id | |||
1029 | @param p_last pointer to last iteration flag | |||
1030 | @param p_lb pointer to Lower bound | |||
1031 | @param p_ub pointer to Upper bound | |||
1032 | @param p_st Step (or increment if you prefer) | |||
1033 | @param incr Loop increment | |||
1034 | @param chunk The chunk size to block with | |||
1035 | ||||
1036 | The functions compute the upper and lower bounds and stride to be used for the | |||
1037 | set of iterations to be executed by the current team from the statically | |||
1038 | scheduled loop that is described by the initial values of the bounds, stride, | |||
1039 | increment and chunk for the distribute construct as part of composite distribute | |||
1040 | parallel loop construct. These functions are all identical apart from the types | |||
1041 | of the arguments. | |||
1042 | */ | |||
1043 | ||||
1044 | void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, | |||
1045 | kmp_int32 *p_lb, kmp_int32 *p_ub, | |||
1046 | kmp_int32 *p_st, kmp_int32 incr, | |||
1047 | kmp_int32 chunk) { | |||
1048 | KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial" , "openmp/runtime/src/kmp_sched.cpp", 1048); }; | |||
1049 | __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, | |||
1050 | chunk); | |||
1051 | } | |||
1052 | ||||
1053 | /*! | |||
1054 | See @ref __kmpc_team_static_init_4 | |||
1055 | */ | |||
1056 | void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, | |||
1057 | kmp_uint32 *p_lb, kmp_uint32 *p_ub, | |||
1058 | kmp_int32 *p_st, kmp_int32 incr, | |||
1059 | kmp_int32 chunk) { | |||
1060 | KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial" , "openmp/runtime/src/kmp_sched.cpp", 1060); }; | |||
1061 | __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, | |||
1062 | chunk); | |||
1063 | } | |||
1064 | ||||
1065 | /*! | |||
1066 | See @ref __kmpc_team_static_init_4 | |||
1067 | */ | |||
1068 | void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, | |||
1069 | kmp_int64 *p_lb, kmp_int64 *p_ub, | |||
1070 | kmp_int64 *p_st, kmp_int64 incr, | |||
1071 | kmp_int64 chunk) { | |||
1072 | KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial" , "openmp/runtime/src/kmp_sched.cpp", 1072); }; | |||
1073 | __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, | |||
1074 | chunk); | |||
1075 | } | |||
1076 | ||||
1077 | /*! | |||
1078 | See @ref __kmpc_team_static_init_4 | |||
1079 | */ | |||
1080 | void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, | |||
1081 | kmp_uint64 *p_lb, kmp_uint64 *p_ub, | |||
1082 | kmp_int64 *p_st, kmp_int64 incr, | |||
1083 | kmp_int64 chunk) { | |||
1084 | KMP_DEBUG_ASSERT(__kmp_init_serial)if (!(__kmp_init_serial)) { __kmp_debug_assert("__kmp_init_serial" , "openmp/runtime/src/kmp_sched.cpp", 1084); }; | |||
1085 | __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, | |||
1086 | chunk); | |||
1087 | } | |||
1088 | /*! | |||
1089 | @} | |||
1090 | */ | |||
1091 | ||||
1092 | } // extern "C" |