File: | projects/openmp/runtime/src/kmp_taskq.cpp |
Warning: | line 1346, column 13 Access to field 'tq_shareds' results in a dereference of a null pointer (loaded from field 'tq_root') |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* | |||||
2 | * kmp_taskq.cpp -- TASKQ support for OpenMP. | |||||
3 | */ | |||||
4 | ||||||
5 | //===----------------------------------------------------------------------===// | |||||
6 | // | |||||
7 | // The LLVM Compiler Infrastructure | |||||
8 | // | |||||
9 | // This file is dual licensed under the MIT and the University of Illinois Open | |||||
10 | // Source Licenses. See LICENSE.txt for details. | |||||
11 | // | |||||
12 | //===----------------------------------------------------------------------===// | |||||
13 | ||||||
14 | #include "kmp.h" | |||||
15 | #include "kmp_error.h" | |||||
16 | #include "kmp_i18n.h" | |||||
17 | #include "kmp_io.h" | |||||
18 | ||||||
19 | #define MAX_MESSAGE512 512 | |||||
20 | ||||||
21 | /* Taskq routines and global variables */ | |||||
22 | ||||||
23 | #define KMP_DEBUG_REF_CTS(x)if (kmp_f_debug >= 1) { __kmp_debug_printf x; }; KF_TRACE(1, x)if (kmp_f_debug >= 1) { __kmp_debug_printf x; }; | |||||
24 | ||||||
25 | #define THREAD_ALLOC_FOR_TASKQ | |||||
26 | ||||||
27 | static int in_parallel_context(kmp_team_t *team) { | |||||
28 | return !team->t.t_serialized; | |||||
29 | } | |||||
30 | ||||||
31 | static void __kmp_taskq_eo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { | |||||
32 | int gtid = *gtid_ref; | |||||
33 | int tid = __kmp_tid_from_gtid(gtid); | |||||
34 | kmp_uint32 my_token; | |||||
35 | kmpc_task_queue_t *taskq; | |||||
36 | kmp_taskq_t *tq = &__kmp_threads[gtid]->th.th_team->t.t_taskq; | |||||
37 | ||||||
38 | if (__kmp_env_consistency_check) | |||||
39 | #if KMP_USE_DYNAMIC_LOCK1 | |||||
40 | __kmp_push_sync(gtid, ct_ordered_in_taskq, loc_ref, NULL__null, 0); | |||||
41 | #else | |||||
42 | __kmp_push_sync(gtid, ct_ordered_in_taskq, loc_ref, NULL__null); | |||||
43 | #endif | |||||
44 | ||||||
45 | if (!__kmp_threads[gtid]->th.th_team->t.t_serialized) { | |||||
46 | KMP_MB(); /* Flush all pending memory write invalidates. */ | |||||
47 | ||||||
48 | /* GEH - need check here under stats to make sure */ | |||||
49 | /* inside task (curr_thunk[*tid_ref] != NULL) */ | |||||
50 | ||||||
51 | my_token = tq->tq_curr_thunk[tid]->th_tasknum; | |||||
52 | ||||||
53 | taskq = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue; | |||||
54 | ||||||
55 | KMP_WAIT_YIELD__kmp_wait_yield_4(&taskq->tq_tasknum_serving, my_token, KMP_EQ__kmp_eq_4, NULL__null); | |||||
56 | KMP_MB(); | |||||
57 | } | |||||
58 | } | |||||
59 | ||||||
60 | static void __kmp_taskq_xo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { | |||||
61 | int gtid = *gtid_ref; | |||||
62 | int tid = __kmp_tid_from_gtid(gtid); | |||||
63 | kmp_uint32 my_token; | |||||
64 | kmp_taskq_t *tq = &__kmp_threads[gtid]->th.th_team->t.t_taskq; | |||||
65 | ||||||
66 | if (__kmp_env_consistency_check) | |||||
67 | __kmp_pop_sync(gtid, ct_ordered_in_taskq, loc_ref); | |||||
68 | ||||||
69 | if (!__kmp_threads[gtid]->th.th_team->t.t_serialized) { | |||||
70 | KMP_MB(); /* Flush all pending memory write invalidates. */ | |||||
71 | ||||||
72 | /* GEH - need check here under stats to make sure */ | |||||
73 | /* inside task (curr_thunk[tid] != NULL) */ | |||||
74 | ||||||
75 | my_token = tq->tq_curr_thunk[tid]->th_tasknum; | |||||
76 | ||||||
77 | KMP_MB(); /* Flush all pending memory write invalidates. */ | |||||
78 | ||||||
79 | tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue->tq_tasknum_serving = | |||||
80 | my_token + 1; | |||||
81 | ||||||
82 | KMP_MB(); /* Flush all pending memory write invalidates. */ | |||||
83 | } | |||||
84 | } | |||||
85 | ||||||
86 | static void __kmp_taskq_check_ordered(kmp_int32 gtid, kmpc_thunk_t *thunk) { | |||||
87 | kmp_uint32 my_token; | |||||
88 | kmpc_task_queue_t *taskq; | |||||
89 | ||||||
90 | /* assume we are always called from an active parallel context */ | |||||
91 | ||||||
92 | KMP_MB(); /* Flush all pending memory write invalidates. */ | |||||
93 | ||||||
94 | my_token = thunk->th_tasknum; | |||||
95 | ||||||
96 | taskq = thunk->th.th_shareds->sv_queue; | |||||
97 | ||||||
98 | if (taskq->tq_tasknum_serving <= my_token) { | |||||
99 | KMP_WAIT_YIELD__kmp_wait_yield_4(&taskq->tq_tasknum_serving, my_token, KMP_GE__kmp_ge_4, NULL__null); | |||||
100 | KMP_MB(); | |||||
101 | taskq->tq_tasknum_serving = my_token + 1; | |||||
102 | KMP_MB(); | |||||
103 | } | |||||
104 | } | |||||
105 | ||||||
106 | #ifdef KMP_DEBUG1 | |||||
107 | ||||||
108 | static void __kmp_dump_TQF(kmp_int32 flags) { | |||||
109 | if (flags & TQF_IS_ORDERED0x0001) | |||||
110 | __kmp_printf("ORDERED "); | |||||
111 | if (flags & TQF_IS_LASTPRIVATE0x0002) | |||||
112 | __kmp_printf("LAST_PRIV "); | |||||
113 | if (flags & TQF_IS_NOWAIT0x0004) | |||||
114 | __kmp_printf("NOWAIT "); | |||||
115 | if (flags & TQF_HEURISTICS0x0008) | |||||
116 | __kmp_printf("HEURIST "); | |||||
117 | if (flags & TQF_INTERFACE_RESERVED10x0010) | |||||
118 | __kmp_printf("RESERV1 "); | |||||
119 | if (flags & TQF_INTERFACE_RESERVED20x0020) | |||||
120 | __kmp_printf("RESERV2 "); | |||||
121 | if (flags & TQF_INTERFACE_RESERVED30x0040) | |||||
122 | __kmp_printf("RESERV3 "); | |||||
123 | if (flags & TQF_INTERFACE_RESERVED40x0080) | |||||
124 | __kmp_printf("RESERV4 "); | |||||
125 | if (flags & TQF_IS_LAST_TASK0x0100) | |||||
126 | __kmp_printf("LAST_TASK "); | |||||
127 | if (flags & TQF_TASKQ_TASK0x0200) | |||||
128 | __kmp_printf("TASKQ_TASK "); | |||||
129 | if (flags & TQF_RELEASE_WORKERS0x0400) | |||||
130 | __kmp_printf("RELEASE "); | |||||
131 | if (flags & TQF_ALL_TASKS_QUEUED0x0800) | |||||
132 | __kmp_printf("ALL_QUEUED "); | |||||
133 | if (flags & TQF_PARALLEL_CONTEXT0x1000) | |||||
134 | __kmp_printf("PARALLEL "); | |||||
135 | if (flags & TQF_DEALLOCATED0x2000) | |||||
136 | __kmp_printf("DEALLOC "); | |||||
137 | if (!(flags & (TQF_INTERNAL_FLAGS0x3f00 | TQF_INTERFACE_FLAGS0x00ff))) | |||||
138 | __kmp_printf("(NONE)"); | |||||
139 | } | |||||
140 | ||||||
141 | static void __kmp_dump_thunk(kmp_taskq_t *tq, kmpc_thunk_t *thunk, | |||||
142 | kmp_int32 global_tid) { | |||||
143 | int i; | |||||
144 | int nproc = __kmp_threads[global_tid]->th.th_team->t.t_nproc; | |||||
145 | ||||||
146 | __kmp_printf("\tThunk at %p on (%d): ", thunk, global_tid); | |||||
147 | ||||||
148 | if (thunk != NULL__null) { | |||||
149 | for (i = 0; i < nproc; i++) { | |||||
150 | if (tq->tq_curr_thunk[i] == thunk) { | |||||
151 | __kmp_printf("[%i] ", i); | |||||
152 | } | |||||
153 | } | |||||
154 | __kmp_printf("th_shareds=%p, ", thunk->th.th_shareds); | |||||
155 | __kmp_printf("th_task=%p, ", thunk->th_task); | |||||
156 | __kmp_printf("th_encl_thunk=%p, ", thunk->th_encl_thunk); | |||||
157 | __kmp_printf("th_status=%d, ", thunk->th_status); | |||||
158 | __kmp_printf("th_tasknum=%u, ", thunk->th_tasknum); | |||||
159 | __kmp_printf("th_flags="); | |||||
160 | __kmp_dump_TQF(thunk->th_flags); | |||||
161 | } | |||||
162 | ||||||
163 | __kmp_printf("\n"); | |||||
164 | } | |||||
165 | ||||||
166 | static void __kmp_dump_thunk_stack(kmpc_thunk_t *thunk, kmp_int32 thread_num) { | |||||
167 | kmpc_thunk_t *th; | |||||
168 | ||||||
169 | __kmp_printf(" Thunk stack for T#%d: ", thread_num); | |||||
170 | ||||||
171 | for (th = thunk; th != NULL__null; th = th->th_encl_thunk) | |||||
172 | __kmp_printf("%p ", th); | |||||
173 | ||||||
174 | __kmp_printf("\n"); | |||||
175 | } | |||||
176 | ||||||
177 | static void __kmp_dump_task_queue(kmp_taskq_t *tq, kmpc_task_queue_t *queue, | |||||
178 | kmp_int32 global_tid) { | |||||
179 | int qs, count, i; | |||||
180 | kmpc_thunk_t *thunk; | |||||
181 | kmpc_task_queue_t *taskq; | |||||
182 | ||||||
183 | __kmp_printf("Task Queue at %p on (%d):\n", queue, global_tid); | |||||
184 | ||||||
185 | if (queue != NULL__null) { | |||||
186 | int in_parallel = queue->tq_flags & TQF_PARALLEL_CONTEXT0x1000; | |||||
187 | ||||||
188 | if (__kmp_env_consistency_check) { | |||||
189 | __kmp_printf(" tq_loc : "); | |||||
190 | } | |||||
191 | if (in_parallel) { | |||||
192 | ||||||
193 | // if (queue->tq.tq_parent != 0) | |||||
194 | //__kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); | |||||
195 | ||||||
196 | //__kmp_acquire_lock(& queue->tq_link_lck, global_tid); | |||||
197 | ||||||
198 | // Make sure data structures are in consistent state before querying them | |||||
199 | // Seems to work without this for digital/alpha, needed for IBM/RS6000 | |||||
200 | KMP_MB(); | |||||
201 | ||||||
202 | __kmp_printf(" tq_parent : %p\n", queue->tq.tq_parent); | |||||
203 | __kmp_printf(" tq_first_child : %p\n", queue->tq_first_child); | |||||
204 | __kmp_printf(" tq_next_child : %p\n", queue->tq_next_child); | |||||
205 | __kmp_printf(" tq_prev_child : %p\n", queue->tq_prev_child); | |||||
206 | __kmp_printf(" tq_ref_count : %d\n", queue->tq_ref_count); | |||||
207 | ||||||
208 | //__kmp_release_lock(& queue->tq_link_lck, global_tid); | |||||
209 | ||||||
210 | // if (queue->tq.tq_parent != 0) | |||||
211 | //__kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); | |||||
212 | ||||||
213 | //__kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid); | |||||
214 | //__kmp_acquire_lock(& queue->tq_queue_lck, global_tid); | |||||
215 | ||||||
216 | // Make sure data structures are in consistent state before querying them | |||||
217 | // Seems to work without this for digital/alpha, needed for IBM/RS6000 | |||||
218 | KMP_MB(); | |||||
219 | } | |||||
220 | ||||||
221 | __kmp_printf(" tq_shareds : "); | |||||
222 | for (i = 0; i < ((queue == tq->tq_root) ? queue->tq_nproc : 1); i++) | |||||
223 | __kmp_printf("%p ", queue->tq_shareds[i].ai_data); | |||||
224 | __kmp_printf("\n"); | |||||
225 | ||||||
226 | if (in_parallel) { | |||||
227 | __kmp_printf(" tq_tasknum_queuing : %u\n", queue->tq_tasknum_queuing); | |||||
228 | __kmp_printf(" tq_tasknum_serving : %u\n", queue->tq_tasknum_serving); | |||||
229 | } | |||||
230 | ||||||
231 | __kmp_printf(" tq_queue : %p\n", queue->tq_queue); | |||||
232 | __kmp_printf(" tq_thunk_space : %p\n", queue->tq_thunk_space); | |||||
233 | __kmp_printf(" tq_taskq_slot : %p\n", queue->tq_taskq_slot); | |||||
234 | ||||||
235 | __kmp_printf(" tq_free_thunks : "); | |||||
236 | for (thunk = queue->tq_free_thunks; thunk != NULL__null; | |||||
237 | thunk = thunk->th.th_next_free) | |||||
238 | __kmp_printf("%p ", thunk); | |||||
239 | __kmp_printf("\n"); | |||||
240 | ||||||
241 | __kmp_printf(" tq_nslots : %d\n", queue->tq_nslots); | |||||
242 | __kmp_printf(" tq_head : %d\n", queue->tq_head); | |||||
243 | __kmp_printf(" tq_tail : %d\n", queue->tq_tail); | |||||
244 | __kmp_printf(" tq_nfull : %d\n", queue->tq_nfull); | |||||
245 | __kmp_printf(" tq_hiwat : %d\n", queue->tq_hiwat); | |||||
246 | __kmp_printf(" tq_flags : "); | |||||
247 | __kmp_dump_TQF(queue->tq_flags); | |||||
248 | __kmp_printf("\n"); | |||||
249 | ||||||
250 | if (in_parallel) { | |||||
251 | __kmp_printf(" tq_th_thunks : "); | |||||
252 | for (i = 0; i < queue->tq_nproc; i++) { | |||||
253 | __kmp_printf("%d ", queue->tq_th_thunks[i].ai_data); | |||||
254 | } | |||||
255 | __kmp_printf("\n"); | |||||
256 | } | |||||
257 | ||||||
258 | __kmp_printf("\n"); | |||||
259 | __kmp_printf(" Queue slots:\n"); | |||||
260 | ||||||
261 | qs = queue->tq_tail; | |||||
262 | for (count = 0; count < queue->tq_nfull; ++count) { | |||||
263 | __kmp_printf("(%d)", qs); | |||||
264 | __kmp_dump_thunk(tq, queue->tq_queue[qs].qs_thunk, global_tid); | |||||
265 | qs = (qs + 1) % queue->tq_nslots; | |||||
266 | } | |||||
267 | ||||||
268 | __kmp_printf("\n"); | |||||
269 | ||||||
270 | if (in_parallel) { | |||||
271 | if (queue->tq_taskq_slot != NULL__null) { | |||||
272 | __kmp_printf(" TaskQ slot:\n"); | |||||
273 | __kmp_dump_thunk(tq, CCAST(kmpc_thunk_t *, queue->tq_taskq_slot)const_cast<kmpc_thunk_t *>(queue->tq_taskq_slot), | |||||
274 | global_tid); | |||||
275 | __kmp_printf("\n"); | |||||
276 | } | |||||
277 | //__kmp_release_lock(& queue->tq_queue_lck, global_tid); | |||||
278 | //__kmp_release_lock(& queue->tq_free_thunks_lck, global_tid); | |||||
279 | } | |||||
280 | } | |||||
281 | ||||||
282 | __kmp_printf(" Taskq freelist: "); | |||||
283 | ||||||
284 | //__kmp_acquire_lock( & tq->tq_freelist_lck, global_tid ); | |||||
285 | ||||||
286 | // Make sure data structures are in consistent state before querying them | |||||
287 | // Seems to work without this call for digital/alpha, needed for IBM/RS6000 | |||||
288 | KMP_MB(); | |||||
289 | ||||||
290 | for (taskq = tq->tq_freelist; taskq != NULL__null; taskq = taskq->tq.tq_next_free) | |||||
291 | __kmp_printf("%p ", taskq); | |||||
292 | ||||||
293 | //__kmp_release_lock( & tq->tq_freelist_lck, global_tid ); | |||||
294 | ||||||
295 | __kmp_printf("\n\n"); | |||||
296 | } | |||||
297 | ||||||
298 | static void __kmp_aux_dump_task_queue_tree(kmp_taskq_t *tq, | |||||
299 | kmpc_task_queue_t *curr_queue, | |||||
300 | kmp_int32 level, | |||||
301 | kmp_int32 global_tid) { | |||||
302 | int i, count, qs; | |||||
303 | int nproc = __kmp_threads[global_tid]->th.th_team->t.t_nproc; | |||||
304 | kmpc_task_queue_t *queue = curr_queue; | |||||
305 | ||||||
306 | if (curr_queue == NULL__null) | |||||
307 | return; | |||||
308 | ||||||
309 | __kmp_printf(" "); | |||||
310 | ||||||
311 | for (i = 0; i < level; i++) | |||||
312 | __kmp_printf(" "); | |||||
313 | ||||||
314 | __kmp_printf("%p", curr_queue); | |||||
315 | ||||||
316 | for (i = 0; i < nproc; i++) { | |||||
317 | if (tq->tq_curr_thunk[i] && | |||||
318 | tq->tq_curr_thunk[i]->th.th_shareds->sv_queue == curr_queue) { | |||||
319 | __kmp_printf(" [%i]", i); | |||||
320 | } | |||||
321 | } | |||||
322 | ||||||
323 | __kmp_printf(":"); | |||||
324 | ||||||
325 | //__kmp_acquire_lock(& curr_queue->tq_queue_lck, global_tid); | |||||
326 | ||||||
327 | // Make sure data structures are in consistent state before querying them | |||||
328 | // Seems to work without this call for digital/alpha, needed for IBM/RS6000 | |||||
329 | KMP_MB(); | |||||
330 | ||||||
331 | qs = curr_queue->tq_tail; | |||||
332 | ||||||
333 | for (count = 0; count < curr_queue->tq_nfull; ++count) { | |||||
334 | __kmp_printf("%p ", curr_queue->tq_queue[qs].qs_thunk); | |||||
335 | qs = (qs + 1) % curr_queue->tq_nslots; | |||||
336 | } | |||||
337 | ||||||
338 | //__kmp_release_lock(& curr_queue->tq_queue_lck, global_tid); | |||||
339 | ||||||
340 | __kmp_printf("\n"); | |||||
341 | ||||||
342 | if (curr_queue->tq_first_child) { | |||||
343 | //__kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); | |||||
344 | ||||||
345 | // Make sure data structures are in consistent state before querying them | |||||
346 | // Seems to work without this call for digital/alpha, needed for IBM/RS6000 | |||||
347 | KMP_MB(); | |||||
348 | ||||||
349 | if (curr_queue->tq_first_child) { | |||||
350 | for (queue = CCAST(kmpc_task_queue_t *, curr_queue->tq_first_child)const_cast<kmpc_task_queue_t *>(curr_queue->tq_first_child ); | |||||
351 | queue != NULL__null; queue = queue->tq_next_child) { | |||||
352 | __kmp_aux_dump_task_queue_tree(tq, queue, level + 1, global_tid); | |||||
353 | } | |||||
354 | } | |||||
355 | ||||||
356 | //__kmp_release_lock(& curr_queue->tq_link_lck, global_tid); | |||||
357 | } | |||||
358 | } | |||||
359 | ||||||
360 | static void __kmp_dump_task_queue_tree(kmp_taskq_t *tq, | |||||
361 | kmpc_task_queue_t *tqroot, | |||||
362 | kmp_int32 global_tid) { | |||||
363 | __kmp_printf("TaskQ Tree at root %p on (%d):\n", tqroot, global_tid); | |||||
364 | ||||||
365 | __kmp_aux_dump_task_queue_tree(tq, tqroot, 0, global_tid); | |||||
366 | ||||||
367 | __kmp_printf("\n"); | |||||
368 | } | |||||
369 | #endif | |||||
370 | ||||||
371 | /* New taskq storage routines that try to minimize overhead of mallocs but | |||||
372 | still provide cache line alignment. */ | |||||
373 | static void *__kmp_taskq_allocate(size_t size, kmp_int32 global_tid) { | |||||
374 | void *addr, *orig_addr; | |||||
375 | size_t bytes; | |||||
376 | ||||||
377 | KB_TRACE(5, ("__kmp_taskq_allocate: called size=%d, gtid=%d\n", (int)size,if (kmp_b_debug >= 5) { __kmp_debug_printf ("__kmp_taskq_allocate: called size=%d, gtid=%d\n" , (int)size, global_tid); } | |||||
378 | global_tid))if (kmp_b_debug >= 5) { __kmp_debug_printf ("__kmp_taskq_allocate: called size=%d, gtid=%d\n" , (int)size, global_tid); }; | |||||
379 | ||||||
380 | bytes = sizeof(void *) + CACHE_LINE64 + size; | |||||
381 | ||||||
382 | #ifdef THREAD_ALLOC_FOR_TASKQ | |||||
383 | orig_addr = | |||||
384 | (void *)__kmp_thread_malloc(__kmp_thread_from_gtid(global_tid), bytes)___kmp_thread_malloc((__kmp_thread_from_gtid(global_tid)), (bytes ), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 384); | |||||
385 | #else | |||||
386 | KE_TRACE(10, ("%%%%%% MALLOC( %d )\n", bytes))if (kmp_e_debug >= 10) { __kmp_debug_printf ("%%%%%% MALLOC( %d )\n" , bytes); }; | |||||
387 | orig_addr = (void *)KMP_INTERNAL_MALLOC(bytes)malloc(bytes); | |||||
388 | #endif /* THREAD_ALLOC_FOR_TASKQ */ | |||||
389 | ||||||
390 | if (orig_addr == 0) | |||||
391 | KMP_FATAL(OutOfHeapMemory)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_OutOfHeapMemory), __kmp_msg_null ); | |||||
392 | ||||||
393 | addr = orig_addr; | |||||
394 | ||||||
395 | if (((kmp_uintptr_t)addr & (CACHE_LINE64 - 1)) != 0) { | |||||
396 | KB_TRACE(50, ("__kmp_taskq_allocate: adjust for cache alignment\n"))if (kmp_b_debug >= 50) { __kmp_debug_printf ("__kmp_taskq_allocate: adjust for cache alignment\n" ); }; | |||||
397 | addr = (void *)(((kmp_uintptr_t)addr + CACHE_LINE64) & ~(CACHE_LINE64 - 1)); | |||||
398 | } | |||||
399 | ||||||
400 | (*(void **)addr) = orig_addr; | |||||
401 | ||||||
402 | KB_TRACE(10,if (kmp_b_debug >= 10) { __kmp_debug_printf ("__kmp_taskq_allocate: allocate: %p, use: %p - %p, size: %d, " "gtid: %d\n", orig_addr, ((void **)addr) + 1, ((char *)(((void **)addr) + 1)) + size - 1, (int)size, global_tid); } | |||||
403 | ("__kmp_taskq_allocate: allocate: %p, use: %p - %p, size: %d, "if (kmp_b_debug >= 10) { __kmp_debug_printf ("__kmp_taskq_allocate: allocate: %p, use: %p - %p, size: %d, " "gtid: %d\n", orig_addr, ((void **)addr) + 1, ((char *)(((void **)addr) + 1)) + size - 1, (int)size, global_tid); } | |||||
404 | "gtid: %d\n",if (kmp_b_debug >= 10) { __kmp_debug_printf ("__kmp_taskq_allocate: allocate: %p, use: %p - %p, size: %d, " "gtid: %d\n", orig_addr, ((void **)addr) + 1, ((char *)(((void **)addr) + 1)) + size - 1, (int)size, global_tid); } | |||||
405 | orig_addr, ((void **)addr) + 1,if (kmp_b_debug >= 10) { __kmp_debug_printf ("__kmp_taskq_allocate: allocate: %p, use: %p - %p, size: %d, " "gtid: %d\n", orig_addr, ((void **)addr) + 1, ((char *)(((void **)addr) + 1)) + size - 1, (int)size, global_tid); } | |||||
406 | ((char *)(((void **)addr) + 1)) + size - 1, (int)size, global_tid))if (kmp_b_debug >= 10) { __kmp_debug_printf ("__kmp_taskq_allocate: allocate: %p, use: %p - %p, size: %d, " "gtid: %d\n", orig_addr, ((void **)addr) + 1, ((char *)(((void **)addr) + 1)) + size - 1, (int)size, global_tid); }; | |||||
407 | ||||||
408 | return (((void **)addr) + 1); | |||||
409 | } | |||||
410 | ||||||
411 | static void __kmpc_taskq_free(void *p, kmp_int32 global_tid) { | |||||
412 | KB_TRACE(5, ("__kmpc_taskq_free: called addr=%p, gtid=%d\n", p, global_tid))if (kmp_b_debug >= 5) { __kmp_debug_printf ("__kmpc_taskq_free: called addr=%p, gtid=%d\n" , p, global_tid); }; | |||||
413 | ||||||
414 | KB_TRACE(10, ("__kmpc_taskq_free: freeing: %p, gtid: %d\n",if (kmp_b_debug >= 10) { __kmp_debug_printf ("__kmpc_taskq_free: freeing: %p, gtid: %d\n" , (*(((void **)p) - 1)), global_tid); } | |||||
415 | (*(((void **)p) - 1)), global_tid))if (kmp_b_debug >= 10) { __kmp_debug_printf ("__kmpc_taskq_free: freeing: %p, gtid: %d\n" , (*(((void **)p) - 1)), global_tid); }; | |||||
416 | ||||||
417 | #ifdef THREAD_ALLOC_FOR_TASKQ | |||||
418 | __kmp_thread_free(__kmp_thread_from_gtid(global_tid), *(((void **)p) - 1))___kmp_thread_free((__kmp_thread_from_gtid(global_tid)), (*(( (void **)p) - 1)), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 418); | |||||
419 | #else | |||||
420 | KMP_INTERNAL_FREE(*(((void **)p) - 1))free(*(((void **)p) - 1)); | |||||
421 | #endif /* THREAD_ALLOC_FOR_TASKQ */ | |||||
422 | } | |||||
423 | ||||||
424 | /* Keep freed kmpc_task_queue_t on an internal freelist and recycle since | |||||
425 | they're of constant size. */ | |||||
426 | ||||||
427 | static kmpc_task_queue_t * | |||||
428 | __kmp_alloc_taskq(kmp_taskq_t *tq, int in_parallel, kmp_int32 nslots, | |||||
429 | kmp_int32 nthunks, kmp_int32 nshareds, kmp_int32 nproc, | |||||
430 | size_t sizeof_thunk, size_t sizeof_shareds, | |||||
431 | kmpc_thunk_t **new_taskq_thunk, kmp_int32 global_tid) { | |||||
432 | kmp_int32 i; | |||||
433 | size_t bytes; | |||||
434 | kmpc_task_queue_t *new_queue; | |||||
435 | kmpc_aligned_shared_vars_t *shared_var_array; | |||||
436 | char *shared_var_storage; | |||||
437 | char *pt; /* for doing byte-adjusted address computations */ | |||||
438 | ||||||
439 | __kmp_acquire_lock(&tq->tq_freelist_lck, global_tid); | |||||
440 | ||||||
441 | // Make sure data structures are in consistent state before querying them | |||||
442 | // Seems to work without this call for digital/alpha, needed for IBM/RS6000 | |||||
443 | KMP_MB(); | |||||
444 | ||||||
445 | if (tq->tq_freelist) { | |||||
446 | new_queue = tq->tq_freelist; | |||||
447 | tq->tq_freelist = tq->tq_freelist->tq.tq_next_free; | |||||
448 | ||||||
449 | KMP_DEBUG_ASSERT(new_queue->tq_flags & TQF_DEALLOCATED)if (!(new_queue->tq_flags & 0x2000)) { __kmp_debug_assert ("new_queue->tq_flags & 0x2000", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 449); }; | |||||
450 | ||||||
451 | new_queue->tq_flags = 0; | |||||
452 | ||||||
453 | __kmp_release_lock(&tq->tq_freelist_lck, global_tid); | |||||
454 | } else { | |||||
455 | __kmp_release_lock(&tq->tq_freelist_lck, global_tid); | |||||
456 | ||||||
457 | new_queue = (kmpc_task_queue_t *)__kmp_taskq_allocate( | |||||
458 | sizeof(kmpc_task_queue_t), global_tid); | |||||
459 | new_queue->tq_flags = 0; | |||||
460 | } | |||||
461 | ||||||
462 | /* space in the task queue for queue slots (allocate as one big chunk */ | |||||
463 | /* of storage including new_taskq_task space) */ | |||||
464 | ||||||
465 | sizeof_thunk += | |||||
466 | (CACHE_LINE64 - (sizeof_thunk % CACHE_LINE64)); /* pad to cache line size */ | |||||
467 | pt = (char *)__kmp_taskq_allocate(nthunks * sizeof_thunk, global_tid); | |||||
468 | new_queue->tq_thunk_space = (kmpc_thunk_t *)pt; | |||||
469 | *new_taskq_thunk = (kmpc_thunk_t *)(pt + (nthunks - 1) * sizeof_thunk); | |||||
470 | ||||||
471 | /* chain the allocated thunks into a freelist for this queue */ | |||||
472 | ||||||
473 | new_queue->tq_free_thunks = (kmpc_thunk_t *)pt; | |||||
474 | ||||||
475 | for (i = 0; i < (nthunks - 2); i++) { | |||||
476 | ((kmpc_thunk_t *)(pt + i * sizeof_thunk))->th.th_next_free = | |||||
477 | (kmpc_thunk_t *)(pt + (i + 1) * sizeof_thunk); | |||||
478 | #ifdef KMP_DEBUG1 | |||||
479 | ((kmpc_thunk_t *)(pt + i * sizeof_thunk))->th_flags = TQF_DEALLOCATED0x2000; | |||||
480 | #endif | |||||
481 | } | |||||
482 | ||||||
483 | ((kmpc_thunk_t *)(pt + (nthunks - 2) * sizeof_thunk))->th.th_next_free = NULL__null; | |||||
484 | #ifdef KMP_DEBUG1 | |||||
485 | ((kmpc_thunk_t *)(pt + (nthunks - 2) * sizeof_thunk))->th_flags = | |||||
486 | TQF_DEALLOCATED0x2000; | |||||
487 | #endif | |||||
488 | ||||||
489 | /* initialize the locks */ | |||||
490 | ||||||
491 | if (in_parallel) { | |||||
492 | __kmp_init_lock(&new_queue->tq_link_lck); | |||||
493 | __kmp_init_lock(&new_queue->tq_free_thunks_lck); | |||||
494 | __kmp_init_lock(&new_queue->tq_queue_lck); | |||||
495 | } | |||||
496 | ||||||
497 | /* now allocate the slots */ | |||||
498 | ||||||
499 | bytes = nslots * sizeof(kmpc_aligned_queue_slot_t); | |||||
500 | new_queue->tq_queue = | |||||
501 | (kmpc_aligned_queue_slot_t *)__kmp_taskq_allocate(bytes, global_tid); | |||||
502 | ||||||
503 | /* space for array of pointers to shared variable structures */ | |||||
504 | sizeof_shareds += sizeof(kmpc_task_queue_t *); | |||||
505 | sizeof_shareds += | |||||
506 | (CACHE_LINE64 - (sizeof_shareds % CACHE_LINE64)); /* pad to cache line size */ | |||||
507 | ||||||
508 | bytes = nshareds * sizeof(kmpc_aligned_shared_vars_t); | |||||
509 | shared_var_array = | |||||
510 | (kmpc_aligned_shared_vars_t *)__kmp_taskq_allocate(bytes, global_tid); | |||||
511 | ||||||
512 | bytes = nshareds * sizeof_shareds; | |||||
513 | shared_var_storage = (char *)__kmp_taskq_allocate(bytes, global_tid); | |||||
514 | ||||||
515 | for (i = 0; i < nshareds; i++) { | |||||
516 | shared_var_array[i].ai_data = | |||||
517 | (kmpc_shared_vars_t *)(shared_var_storage + i * sizeof_shareds); | |||||
518 | shared_var_array[i].ai_data->sv_queue = new_queue; | |||||
519 | } | |||||
520 | new_queue->tq_shareds = shared_var_array; | |||||
521 | ||||||
522 | /* array for number of outstanding thunks per thread */ | |||||
523 | ||||||
524 | if (in_parallel) { | |||||
525 | bytes = nproc * sizeof(kmpc_aligned_int32_t); | |||||
526 | new_queue->tq_th_thunks = | |||||
527 | (kmpc_aligned_int32_t *)__kmp_taskq_allocate(bytes, global_tid); | |||||
528 | new_queue->tq_nproc = nproc; | |||||
529 | ||||||
530 | for (i = 0; i < nproc; i++) | |||||
531 | new_queue->tq_th_thunks[i].ai_data = 0; | |||||
532 | } | |||||
533 | ||||||
534 | return new_queue; | |||||
535 | } | |||||
536 | ||||||
537 | static void __kmp_free_taskq(kmp_taskq_t *tq, kmpc_task_queue_t *p, | |||||
538 | int in_parallel, kmp_int32 global_tid) { | |||||
539 | __kmpc_taskq_free(p->tq_thunk_space, global_tid); | |||||
540 | __kmpc_taskq_free(p->tq_queue, global_tid); | |||||
541 | ||||||
542 | /* free shared var structure storage */ | |||||
543 | __kmpc_taskq_free(CCAST(kmpc_shared_vars_t *, p->tq_shareds[0].ai_data)const_cast<kmpc_shared_vars_t *>(p->tq_shareds[0].ai_data ), | |||||
544 | global_tid); | |||||
545 | /* free array of pointers to shared vars storage */ | |||||
546 | __kmpc_taskq_free(p->tq_shareds, global_tid); | |||||
547 | ||||||
548 | #ifdef KMP_DEBUG1 | |||||
549 | p->tq_first_child = NULL__null; | |||||
550 | p->tq_next_child = NULL__null; | |||||
551 | p->tq_prev_child = NULL__null; | |||||
552 | p->tq_ref_count = -10; | |||||
553 | p->tq_shareds = NULL__null; | |||||
554 | p->tq_tasknum_queuing = 0; | |||||
555 | p->tq_tasknum_serving = 0; | |||||
556 | p->tq_queue = NULL__null; | |||||
557 | p->tq_thunk_space = NULL__null; | |||||
558 | p->tq_taskq_slot = NULL__null; | |||||
559 | p->tq_free_thunks = NULL__null; | |||||
560 | p->tq_nslots = 0; | |||||
561 | p->tq_head = 0; | |||||
562 | p->tq_tail = 0; | |||||
563 | p->tq_nfull = 0; | |||||
564 | p->tq_hiwat = 0; | |||||
565 | ||||||
566 | if (in_parallel) { | |||||
567 | int i; | |||||
568 | ||||||
569 | for (i = 0; i < p->tq_nproc; i++) | |||||
570 | p->tq_th_thunks[i].ai_data = 0; | |||||
571 | } | |||||
572 | if (__kmp_env_consistency_check) | |||||
573 | p->tq_loc = NULL__null; | |||||
574 | KMP_DEBUG_ASSERT(p->tq_flags & TQF_DEALLOCATED)if (!(p->tq_flags & 0x2000)) { __kmp_debug_assert("p->tq_flags & 0x2000" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 574); }; | |||||
575 | p->tq_flags = TQF_DEALLOCATED0x2000; | |||||
576 | #endif /* KMP_DEBUG */ | |||||
577 | ||||||
578 | if (in_parallel) { | |||||
579 | __kmpc_taskq_free(p->tq_th_thunks, global_tid); | |||||
580 | __kmp_destroy_lock(&p->tq_link_lck); | |||||
581 | __kmp_destroy_lock(&p->tq_queue_lck); | |||||
582 | __kmp_destroy_lock(&p->tq_free_thunks_lck); | |||||
583 | } | |||||
584 | #ifdef KMP_DEBUG1 | |||||
585 | p->tq_th_thunks = NULL__null; | |||||
586 | #endif /* KMP_DEBUG */ | |||||
587 | ||||||
588 | // Make sure data structures are in consistent state before querying them | |||||
589 | // Seems to work without this call for digital/alpha, needed for IBM/RS6000 | |||||
590 | KMP_MB(); | |||||
591 | ||||||
592 | __kmp_acquire_lock(&tq->tq_freelist_lck, global_tid); | |||||
593 | p->tq.tq_next_free = tq->tq_freelist; | |||||
594 | ||||||
595 | tq->tq_freelist = p; | |||||
596 | __kmp_release_lock(&tq->tq_freelist_lck, global_tid); | |||||
597 | } | |||||
598 | ||||||
599 | /* Once a group of thunks has been allocated for use in a particular queue, | |||||
600 | these are managed via a per-queue freelist. | |||||
601 | We force a check that there's always a thunk free if we need one. */ | |||||
602 | ||||||
603 | static kmpc_thunk_t *__kmp_alloc_thunk(kmpc_task_queue_t *queue, | |||||
604 | int in_parallel, kmp_int32 global_tid) { | |||||
605 | kmpc_thunk_t *fl; | |||||
606 | ||||||
607 | if (in_parallel) { | |||||
608 | __kmp_acquire_lock(&queue->tq_free_thunks_lck, global_tid); | |||||
609 | // Make sure data structures are in consistent state before querying them | |||||
610 | // Seems to work without this call for digital/alpha, needed for IBM/RS6000 | |||||
611 | KMP_MB(); | |||||
612 | } | |||||
613 | ||||||
614 | fl = queue->tq_free_thunks; | |||||
615 | ||||||
616 | KMP_DEBUG_ASSERT(fl != NULL)if (!(fl != __null)) { __kmp_debug_assert("fl != __null", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 616); }; | |||||
617 | ||||||
618 | queue->tq_free_thunks = fl->th.th_next_free; | |||||
619 | fl->th_flags = 0; | |||||
620 | ||||||
621 | if (in_parallel) | |||||
622 | __kmp_release_lock(&queue->tq_free_thunks_lck, global_tid); | |||||
623 | ||||||
624 | return fl; | |||||
625 | } | |||||
626 | ||||||
627 | static void __kmp_free_thunk(kmpc_task_queue_t *queue, kmpc_thunk_t *p, | |||||
628 | int in_parallel, kmp_int32 global_tid) { | |||||
629 | #ifdef KMP_DEBUG1 | |||||
630 | p->th_task = 0; | |||||
631 | p->th_encl_thunk = 0; | |||||
632 | p->th_status = 0; | |||||
633 | p->th_tasknum = 0; | |||||
634 | /* Also could zero pointers to private vars */ | |||||
635 | #endif | |||||
636 | ||||||
637 | if (in_parallel) { | |||||
638 | __kmp_acquire_lock(&queue->tq_free_thunks_lck, global_tid); | |||||
639 | // Make sure data structures are in consistent state before querying them | |||||
640 | // Seems to work without this call for digital/alpha, needed for IBM/RS6000 | |||||
641 | KMP_MB(); | |||||
642 | } | |||||
643 | ||||||
644 | p->th.th_next_free = queue->tq_free_thunks; | |||||
645 | queue->tq_free_thunks = p; | |||||
646 | ||||||
647 | #ifdef KMP_DEBUG1 | |||||
648 | p->th_flags = TQF_DEALLOCATED0x2000; | |||||
649 | #endif | |||||
650 | ||||||
651 | if (in_parallel) | |||||
652 | __kmp_release_lock(&queue->tq_free_thunks_lck, global_tid); | |||||
653 | } | |||||
654 | ||||||
655 | /* returns nonzero if the queue just became full after the enqueue */ | |||||
656 | static kmp_int32 __kmp_enqueue_task(kmp_taskq_t *tq, kmp_int32 global_tid, | |||||
657 | kmpc_task_queue_t *queue, | |||||
658 | kmpc_thunk_t *thunk, int in_parallel) { | |||||
659 | kmp_int32 ret; | |||||
660 | ||||||
661 | /* dkp: can we get around the lock in the TQF_RELEASE_WORKERS case (only the | |||||
662 | * master is executing then) */ | |||||
663 | if (in_parallel) { | |||||
664 | __kmp_acquire_lock(&queue->tq_queue_lck, global_tid); | |||||
665 | // Make sure data structures are in consistent state before querying them | |||||
666 | // Seems to work without this call for digital/alpha, needed for IBM/RS6000 | |||||
667 | KMP_MB(); | |||||
668 | } | |||||
669 | ||||||
670 | KMP_DEBUG_ASSERT(queue->tq_nfull < queue->tq_nslots)if (!(queue->tq_nfull < queue->tq_nslots)) { __kmp_debug_assert ("queue->tq_nfull < queue->tq_nslots", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 670); }; // check queue not full | |||||
671 | ||||||
672 | queue->tq_queue[(queue->tq_head)++].qs_thunk = thunk; | |||||
673 | ||||||
674 | if (queue->tq_head >= queue->tq_nslots) | |||||
675 | queue->tq_head = 0; | |||||
676 | ||||||
677 | (queue->tq_nfull)++; | |||||
678 | ||||||
679 | KMP_MB(); /* to assure that nfull is seen to increase before | |||||
680 | TQF_ALL_TASKS_QUEUED is set */ | |||||
681 | ||||||
682 | ret = (in_parallel) ? (queue->tq_nfull == queue->tq_nslots) : FALSE0; | |||||
683 | ||||||
684 | if (in_parallel) { | |||||
685 | /* don't need to wait until workers are released before unlocking */ | |||||
686 | __kmp_release_lock(&queue->tq_queue_lck, global_tid); | |||||
687 | ||||||
688 | if (tq->tq_global_flags & TQF_RELEASE_WORKERS0x0400) { | |||||
689 | // If just creating the root queue, the worker threads are waiting at a | |||||
690 | // join barrier until now, when there's something in the queue for them to | |||||
691 | // do; release them now to do work. This should only be done when this is | |||||
692 | // the first task enqueued, so reset the flag here also. | |||||
693 | tq->tq_global_flags &= ~TQF_RELEASE_WORKERS0x0400; /* no lock needed, workers | |||||
694 | are still in spin mode */ | |||||
695 | // avoid releasing barrier twice if taskq_task switches threads | |||||
696 | KMP_MB(); | |||||
697 | ||||||
698 | __kmpc_end_barrier_master(NULL__null, global_tid); | |||||
699 | } | |||||
700 | } | |||||
701 | ||||||
702 | return ret; | |||||
703 | } | |||||
704 | ||||||
705 | static kmpc_thunk_t *__kmp_dequeue_task(kmp_int32 global_tid, | |||||
706 | kmpc_task_queue_t *queue, | |||||
707 | int in_parallel) { | |||||
708 | kmpc_thunk_t *pt; | |||||
709 | int tid = __kmp_tid_from_gtid(global_tid); | |||||
710 | ||||||
711 | KMP_DEBUG_ASSERT(queue->tq_nfull > 0)if (!(queue->tq_nfull > 0)) { __kmp_debug_assert("queue->tq_nfull > 0" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 711); }; /* check queue not empty */ | |||||
712 | ||||||
713 | if (queue->tq.tq_parent != NULL__null && in_parallel) { | |||||
714 | int ct; | |||||
715 | __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); | |||||
716 | ct = ++(queue->tq_ref_count); | |||||
717 | __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); | |||||
718 | KMP_DEBUG_REF_CTS(if (kmp_f_debug >= 1) { __kmp_debug_printf ("line %d gtid %d: Q %p inc %d\n" , 719, global_tid, queue, ct); }; | |||||
719 | ("line %d gtid %d: Q %p inc %d\n", __LINE__, global_tid, queue, ct))if (kmp_f_debug >= 1) { __kmp_debug_printf ("line %d gtid %d: Q %p inc %d\n" , 719, global_tid, queue, ct); };; | |||||
720 | } | |||||
721 | ||||||
722 | pt = queue->tq_queue[(queue->tq_tail)++].qs_thunk; | |||||
723 | ||||||
724 | if (queue->tq_tail >= queue->tq_nslots) | |||||
725 | queue->tq_tail = 0; | |||||
726 | ||||||
727 | if (in_parallel) { | |||||
728 | queue->tq_th_thunks[tid].ai_data++; | |||||
729 | ||||||
730 | KMP_MB(); /* necessary so ai_data increment is propagated to other threads | |||||
731 | immediately (digital) */ | |||||
732 | ||||||
733 | KF_TRACE(200, ("__kmp_dequeue_task: T#%d(:%d) now has %d outstanding "if (kmp_f_debug >= 200) { __kmp_debug_printf ("__kmp_dequeue_task: T#%d(:%d) now has %d outstanding " "thunks from queue %p\n", global_tid, tid, queue->tq_th_thunks [tid].ai_data, queue); } | |||||
734 | "thunks from queue %p\n",if (kmp_f_debug >= 200) { __kmp_debug_printf ("__kmp_dequeue_task: T#%d(:%d) now has %d outstanding " "thunks from queue %p\n", global_tid, tid, queue->tq_th_thunks [tid].ai_data, queue); } | |||||
735 | global_tid, tid, queue->tq_th_thunks[tid].ai_data, queue))if (kmp_f_debug >= 200) { __kmp_debug_printf ("__kmp_dequeue_task: T#%d(:%d) now has %d outstanding " "thunks from queue %p\n", global_tid, tid, queue->tq_th_thunks [tid].ai_data, queue); }; | |||||
736 | } | |||||
737 | ||||||
738 | (queue->tq_nfull)--; | |||||
739 | ||||||
740 | #ifdef KMP_DEBUG1 | |||||
741 | KMP_MB(); | |||||
742 | ||||||
743 | /* necessary so (queue->tq_nfull > 0) above succeeds after tq_nfull is | |||||
744 | * decremented */ | |||||
745 | ||||||
746 | KMP_DEBUG_ASSERT(queue->tq_nfull >= 0)if (!(queue->tq_nfull >= 0)) { __kmp_debug_assert("queue->tq_nfull >= 0" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 746); }; | |||||
747 | ||||||
748 | if (in_parallel) { | |||||
749 | KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data <=if (!(queue->tq_th_thunks[tid].ai_data <= 1)) { __kmp_debug_assert ("queue->tq_th_thunks[tid].ai_data <= 1", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 750); } | |||||
750 | __KMP_TASKQ_THUNKS_PER_TH)if (!(queue->tq_th_thunks[tid].ai_data <= 1)) { __kmp_debug_assert ("queue->tq_th_thunks[tid].ai_data <= 1", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 750); }; | |||||
751 | } | |||||
752 | #endif | |||||
753 | ||||||
754 | return pt; | |||||
755 | } | |||||
756 | ||||||
757 | /* Find the next (non-null) task to dequeue and return it. | |||||
758 | * This is never called unless in_parallel=TRUE | |||||
759 | * | |||||
760 | * Here are the rules for deciding which queue to take the task from: | |||||
761 | * 1. Walk up the task queue tree from the current queue's parent and look | |||||
762 | * on the way up (for loop, below). | |||||
763 | * 2. Do a depth-first search back down the tree from the root and | |||||
764 | * look (find_task_in_descendant_queue()). | |||||
765 | * | |||||
766 | * Here are the rules for deciding which task to take from a queue | |||||
767 | * (__kmp_find_task_in_queue ()): | |||||
768 | * 1. Never take the last task from a queue if TQF_IS_LASTPRIVATE; this task | |||||
769 | * must be staged to make sure we execute the last one with | |||||
770 | * TQF_IS_LAST_TASK at the end of task queue execution. | |||||
771 | * 2. If the queue length is below some high water mark and the taskq task | |||||
772 | * is enqueued, prefer running the taskq task. | |||||
773 | * 3. Otherwise, take a (normal) task from the queue. | |||||
774 | * | |||||
775 | * If we do all this and return pt == NULL at the bottom of this routine, | |||||
776 | * this means there are no more tasks to execute (except possibly for | |||||
777 | * TQF_IS_LASTPRIVATE). | |||||
778 | */ | |||||
779 | ||||||
780 | static kmpc_thunk_t *__kmp_find_task_in_queue(kmp_int32 global_tid, | |||||
781 | kmpc_task_queue_t *queue) { | |||||
782 | kmpc_thunk_t *pt = NULL__null; | |||||
783 | int tid = __kmp_tid_from_gtid(global_tid); | |||||
784 | ||||||
785 | /* To prevent deadlock from tq_queue_lck if queue already deallocated */ | |||||
786 | if (!(queue->tq_flags & TQF_DEALLOCATED0x2000)) { | |||||
787 | ||||||
788 | __kmp_acquire_lock(&queue->tq_queue_lck, global_tid); | |||||
789 | ||||||
790 | /* Check again to avoid race in __kmpc_end_taskq() */ | |||||
791 | if (!(queue->tq_flags & TQF_DEALLOCATED0x2000)) { | |||||
792 | // Make sure data structures are in consistent state before querying them | |||||
793 | // Seems to work without this for digital/alpha, needed for IBM/RS6000 | |||||
794 | KMP_MB(); | |||||
795 | ||||||
796 | if ((queue->tq_taskq_slot != NULL__null) && | |||||
797 | (queue->tq_nfull <= queue->tq_hiwat)) { | |||||
798 | /* if there's enough room in the queue and the dispatcher */ | |||||
799 | /* (taskq task) is available, schedule more tasks */ | |||||
800 | pt = CCAST(kmpc_thunk_t *, queue->tq_taskq_slot)const_cast<kmpc_thunk_t *>(queue->tq_taskq_slot); | |||||
801 | queue->tq_taskq_slot = NULL__null; | |||||
802 | } else if (queue->tq_nfull == 0 || | |||||
803 | queue->tq_th_thunks[tid].ai_data >= | |||||
804 | __KMP_TASKQ_THUNKS_PER_TH1) { | |||||
805 | /* do nothing if no thunks available or this thread can't */ | |||||
806 | /* run any because it already is executing too many */ | |||||
807 | pt = NULL__null; | |||||
808 | } else if (queue->tq_nfull > 1) { | |||||
809 | /* always safe to schedule a task even if TQF_IS_LASTPRIVATE */ | |||||
810 | ||||||
811 | pt = __kmp_dequeue_task(global_tid, queue, TRUE(!0)); | |||||
812 | } else if (!(queue->tq_flags & TQF_IS_LASTPRIVATE0x0002)) { | |||||
813 | // one thing in queue, always safe to schedule if !TQF_IS_LASTPRIVATE | |||||
814 | pt = __kmp_dequeue_task(global_tid, queue, TRUE(!0)); | |||||
815 | } else if (queue->tq_flags & TQF_IS_LAST_TASK0x0100) { | |||||
816 | /* TQF_IS_LASTPRIVATE, one thing in queue, kmpc_end_taskq_task() */ | |||||
817 | /* has been run so this is last task, run with TQF_IS_LAST_TASK so */ | |||||
818 | /* instrumentation does copy-out. */ | |||||
819 | pt = __kmp_dequeue_task(global_tid, queue, TRUE(!0)); | |||||
820 | pt->th_flags |= | |||||
821 | TQF_IS_LAST_TASK0x0100; /* don't need test_then_or since already locked */ | |||||
822 | } | |||||
823 | } | |||||
824 | ||||||
825 | /* GEH - What happens here if is lastprivate, but not last task? */ | |||||
826 | __kmp_release_lock(&queue->tq_queue_lck, global_tid); | |||||
827 | } | |||||
828 | ||||||
829 | return pt; | |||||
830 | } | |||||
831 | ||||||
832 | /* Walk a tree of queues starting at queue's first child and return a non-NULL | |||||
833 | thunk if one can be scheduled. Must only be called when in_parallel=TRUE */ | |||||
834 | ||||||
835 | static kmpc_thunk_t * | |||||
836 | __kmp_find_task_in_descendant_queue(kmp_int32 global_tid, | |||||
837 | kmpc_task_queue_t *curr_queue) { | |||||
838 | kmpc_thunk_t *pt = NULL__null; | |||||
839 | kmpc_task_queue_t *queue = curr_queue; | |||||
840 | ||||||
841 | if (curr_queue->tq_first_child != NULL__null) { | |||||
842 | __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid); | |||||
843 | // Make sure data structures are in consistent state before querying them | |||||
844 | // Seems to work without this call for digital/alpha, needed for IBM/RS6000 | |||||
845 | KMP_MB(); | |||||
846 | ||||||
847 | queue = CCAST(kmpc_task_queue_t *, curr_queue->tq_first_child)const_cast<kmpc_task_queue_t *>(curr_queue->tq_first_child ); | |||||
848 | if (queue == NULL__null) { | |||||
849 | __kmp_release_lock(&curr_queue->tq_link_lck, global_tid); | |||||
850 | return NULL__null; | |||||
851 | } | |||||
852 | ||||||
853 | while (queue != NULL__null) { | |||||
854 | int ct; | |||||
855 | kmpc_task_queue_t *next; | |||||
856 | ||||||
857 | ct = ++(queue->tq_ref_count); | |||||
858 | __kmp_release_lock(&curr_queue->tq_link_lck, global_tid); | |||||
859 | KMP_DEBUG_REF_CTS(if (kmp_f_debug >= 1) { __kmp_debug_printf ("line %d gtid %d: Q %p inc %d\n" , 860, global_tid, queue, ct); }; | |||||
860 | ("line %d gtid %d: Q %p inc %d\n", __LINE__, global_tid, queue, ct))if (kmp_f_debug >= 1) { __kmp_debug_printf ("line %d gtid %d: Q %p inc %d\n" , 860, global_tid, queue, ct); };; | |||||
861 | ||||||
862 | pt = __kmp_find_task_in_queue(global_tid, queue); | |||||
863 | ||||||
864 | if (pt != NULL__null) { | |||||
865 | int ct; | |||||
866 | ||||||
867 | __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid); | |||||
868 | // Make sure data structures in consistent state before querying them | |||||
869 | // Seems to work without this for digital/alpha, needed for IBM/RS6000 | |||||
870 | KMP_MB(); | |||||
871 | ||||||
872 | ct = --(queue->tq_ref_count); | |||||
873 | KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", __LINE__,if (kmp_f_debug >= 1) { __kmp_debug_printf ("line %d gtid %d: Q %p dec %d\n" , 873, global_tid, queue, ct); }; | |||||
874 | global_tid, queue, ct))if (kmp_f_debug >= 1) { __kmp_debug_printf ("line %d gtid %d: Q %p dec %d\n" , 873, global_tid, queue, ct); };; | |||||
875 | KMP_DEBUG_ASSERT(queue->tq_ref_count >= 0)if (!(queue->tq_ref_count >= 0)) { __kmp_debug_assert("queue->tq_ref_count >= 0" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 875); }; | |||||
876 | ||||||
877 | __kmp_release_lock(&curr_queue->tq_link_lck, global_tid); | |||||
878 | ||||||
879 | return pt; | |||||
880 | } | |||||
881 | ||||||
882 | /* although reference count stays active during descendant walk, shouldn't | |||||
883 | matter since if children still exist, reference counts aren't being | |||||
884 | monitored anyway */ | |||||
885 | ||||||
886 | pt = __kmp_find_task_in_descendant_queue(global_tid, queue); | |||||
887 | ||||||
888 | if (pt != NULL__null) { | |||||
889 | int ct; | |||||
890 | ||||||
891 | __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid); | |||||
892 | // Make sure data structures in consistent state before querying them | |||||
893 | // Seems to work without this for digital/alpha, needed for IBM/RS6000 | |||||
894 | KMP_MB(); | |||||
895 | ||||||
896 | ct = --(queue->tq_ref_count); | |||||
897 | KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", __LINE__,if (kmp_f_debug >= 1) { __kmp_debug_printf ("line %d gtid %d: Q %p dec %d\n" , 897, global_tid, queue, ct); }; | |||||
898 | global_tid, queue, ct))if (kmp_f_debug >= 1) { __kmp_debug_printf ("line %d gtid %d: Q %p dec %d\n" , 897, global_tid, queue, ct); };; | |||||
899 | KMP_DEBUG_ASSERT(ct >= 0)if (!(ct >= 0)) { __kmp_debug_assert("ct >= 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 899); }; | |||||
900 | ||||||
901 | __kmp_release_lock(&curr_queue->tq_link_lck, global_tid); | |||||
902 | ||||||
903 | return pt; | |||||
904 | } | |||||
905 | ||||||
906 | __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid); | |||||
907 | // Make sure data structures in consistent state before querying them | |||||
908 | // Seems to work without this for digital/alpha, needed for IBM/RS6000 | |||||
909 | KMP_MB(); | |||||
910 | ||||||
911 | next = queue->tq_next_child; | |||||
912 | ||||||
913 | ct = --(queue->tq_ref_count); | |||||
914 | KMP_DEBUG_REF_CTS(if (kmp_f_debug >= 1) { __kmp_debug_printf ("line %d gtid %d: Q %p dec %d\n" , 915, global_tid, queue, ct); }; | |||||
915 | ("line %d gtid %d: Q %p dec %d\n", __LINE__, global_tid, queue, ct))if (kmp_f_debug >= 1) { __kmp_debug_printf ("line %d gtid %d: Q %p dec %d\n" , 915, global_tid, queue, ct); };; | |||||
916 | KMP_DEBUG_ASSERT(ct >= 0)if (!(ct >= 0)) { __kmp_debug_assert("ct >= 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 916); }; | |||||
917 | ||||||
918 | queue = next; | |||||
919 | } | |||||
920 | ||||||
921 | __kmp_release_lock(&curr_queue->tq_link_lck, global_tid); | |||||
922 | } | |||||
923 | ||||||
924 | return pt; | |||||
925 | } | |||||
926 | ||||||
927 | /* Walk up the taskq tree looking for a task to execute. If we get to the root, | |||||
928 | search the tree for a descendent queue task. Must only be called when | |||||
929 | in_parallel=TRUE */ | |||||
930 | static kmpc_thunk_t * | |||||
931 | __kmp_find_task_in_ancestor_queue(kmp_taskq_t *tq, kmp_int32 global_tid, | |||||
932 | kmpc_task_queue_t *curr_queue) { | |||||
933 | kmpc_task_queue_t *queue; | |||||
934 | kmpc_thunk_t *pt; | |||||
935 | ||||||
936 | pt = NULL__null; | |||||
937 | ||||||
938 | if (curr_queue->tq.tq_parent != NULL__null) { | |||||
939 | queue = curr_queue->tq.tq_parent; | |||||
940 | ||||||
941 | while (queue != NULL__null) { | |||||
942 | if (queue->tq.tq_parent != NULL__null) { | |||||
943 | int ct; | |||||
944 | __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); | |||||
945 | // Make sure data structures in consistent state before querying them | |||||
946 | // Seems to work without this for digital/alpha, needed for IBM/RS6000 | |||||
947 | KMP_MB(); | |||||
948 | ||||||
949 | ct = ++(queue->tq_ref_count); | |||||
950 | __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); | |||||
951 | KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n", __LINE__,if (kmp_f_debug >= 1) { __kmp_debug_printf ("line %d gtid %d: Q %p inc %d\n" , 951, global_tid, queue, ct); }; | |||||
952 | global_tid, queue, ct))if (kmp_f_debug >= 1) { __kmp_debug_printf ("line %d gtid %d: Q %p inc %d\n" , 951, global_tid, queue, ct); };; | |||||
953 | } | |||||
954 | ||||||
955 | pt = __kmp_find_task_in_queue(global_tid, queue); | |||||
956 | if (pt != NULL__null) { | |||||
957 | if (queue->tq.tq_parent != NULL__null) { | |||||
958 | int ct; | |||||
959 | __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); | |||||
960 | // Make sure data structures in consistent state before querying them | |||||
961 | // Seems to work without this for digital/alpha, needed for IBM/RS6000 | |||||
962 | KMP_MB(); | |||||
963 | ||||||
964 | ct = --(queue->tq_ref_count); | |||||
965 | KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", __LINE__,if (kmp_f_debug >= 1) { __kmp_debug_printf ("line %d gtid %d: Q %p dec %d\n" , 965, global_tid, queue, ct); }; | |||||
966 | global_tid, queue, ct))if (kmp_f_debug >= 1) { __kmp_debug_printf ("line %d gtid %d: Q %p dec %d\n" , 965, global_tid, queue, ct); };; | |||||
967 | KMP_DEBUG_ASSERT(ct >= 0)if (!(ct >= 0)) { __kmp_debug_assert("ct >= 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 967); }; | |||||
968 | ||||||
969 | __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); | |||||
970 | } | |||||
971 | ||||||
972 | return pt; | |||||
973 | } | |||||
974 | ||||||
975 | if (queue->tq.tq_parent != NULL__null) { | |||||
976 | int ct; | |||||
977 | __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); | |||||
978 | // Make sure data structures in consistent state before querying them | |||||
979 | // Seems to work without this for digital/alpha, needed for IBM/RS6000 | |||||
980 | KMP_MB(); | |||||
981 | ||||||
982 | ct = --(queue->tq_ref_count); | |||||
983 | KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", __LINE__,if (kmp_f_debug >= 1) { __kmp_debug_printf ("line %d gtid %d: Q %p dec %d\n" , 983, global_tid, queue, ct); }; | |||||
984 | global_tid, queue, ct))if (kmp_f_debug >= 1) { __kmp_debug_printf ("line %d gtid %d: Q %p dec %d\n" , 983, global_tid, queue, ct); };; | |||||
985 | KMP_DEBUG_ASSERT(ct >= 0)if (!(ct >= 0)) { __kmp_debug_assert("ct >= 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 985); }; | |||||
986 | } | |||||
987 | queue = queue->tq.tq_parent; | |||||
988 | ||||||
989 | if (queue != NULL__null) | |||||
990 | __kmp_release_lock(&queue->tq_link_lck, global_tid); | |||||
991 | } | |||||
992 | } | |||||
993 | ||||||
994 | pt = __kmp_find_task_in_descendant_queue(global_tid, tq->tq_root); | |||||
995 | ||||||
996 | return pt; | |||||
997 | } | |||||
998 | ||||||
999 | static int __kmp_taskq_tasks_finished(kmpc_task_queue_t *queue) { | |||||
1000 | int i; | |||||
1001 | ||||||
1002 | /* KMP_MB(); */ /* is this really necessary? */ | |||||
1003 | ||||||
1004 | for (i = 0; i < queue->tq_nproc; i++) { | |||||
1005 | if (queue->tq_th_thunks[i].ai_data != 0) | |||||
1006 | return FALSE0; | |||||
1007 | } | |||||
1008 | ||||||
1009 | return TRUE(!0); | |||||
1010 | } | |||||
1011 | ||||||
1012 | static int __kmp_taskq_has_any_children(kmpc_task_queue_t *queue) { | |||||
1013 | return (queue->tq_first_child != NULL__null); | |||||
1014 | } | |||||
1015 | ||||||
1016 | static void __kmp_remove_queue_from_tree(kmp_taskq_t *tq, kmp_int32 global_tid, | |||||
1017 | kmpc_task_queue_t *queue, | |||||
1018 | int in_parallel) { | |||||
1019 | #ifdef KMP_DEBUG1 | |||||
1020 | kmp_int32 i; | |||||
1021 | kmpc_thunk_t *thunk; | |||||
1022 | #endif | |||||
1023 | ||||||
1024 | KF_TRACE(50,if (kmp_f_debug >= 50) { __kmp_debug_printf ("Before Deletion of TaskQ at %p on (%d):\n" , queue, global_tid); } | |||||
1025 | ("Before Deletion of TaskQ at %p on (%d):\n", queue, global_tid))if (kmp_f_debug >= 50) { __kmp_debug_printf ("Before Deletion of TaskQ at %p on (%d):\n" , queue, global_tid); }; | |||||
1026 | KF_DUMP(50, __kmp_dump_task_queue(tq, queue, global_tid))if (kmp_f_debug >= 50) { int ks; __kmp_disable(&ks); ( __kmp_dump_task_queue(tq, queue, global_tid)); __kmp_enable(ks ); }; | |||||
1027 | ||||||
1028 | /* sub-queue in a recursion, not the root task queue */ | |||||
1029 | KMP_DEBUG_ASSERT(queue->tq.tq_parent != NULL)if (!(queue->tq.tq_parent != __null)) { __kmp_debug_assert ("queue->tq.tq_parent != __null", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 1029); }; | |||||
1030 | ||||||
1031 | if (in_parallel) { | |||||
1032 | __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); | |||||
1033 | // Make sure data structures are in consistent state before querying them | |||||
1034 | // Seems to work without this call for digital/alpha, needed for IBM/RS6000 | |||||
1035 | KMP_MB(); | |||||
1036 | } | |||||
1037 | ||||||
1038 | KMP_DEBUG_ASSERT(queue->tq_first_child == NULL)if (!(queue->tq_first_child == __null)) { __kmp_debug_assert ("queue->tq_first_child == __null", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 1038); }; | |||||
1039 | ||||||
1040 | /* unlink queue from its siblings if any at this level */ | |||||
1041 | if (queue->tq_prev_child != NULL__null) | |||||
1042 | queue->tq_prev_child->tq_next_child = queue->tq_next_child; | |||||
1043 | if (queue->tq_next_child != NULL__null) | |||||
1044 | queue->tq_next_child->tq_prev_child = queue->tq_prev_child; | |||||
1045 | if (queue->tq.tq_parent->tq_first_child == queue) | |||||
1046 | queue->tq.tq_parent->tq_first_child = queue->tq_next_child; | |||||
1047 | ||||||
1048 | queue->tq_prev_child = NULL__null; | |||||
1049 | queue->tq_next_child = NULL__null; | |||||
1050 | ||||||
1051 | if (in_parallel) { | |||||
1052 | KMP_DEBUG_REF_CTS(if (kmp_f_debug >= 1) { __kmp_debug_printf ("line %d gtid %d: Q %p waiting for ref_count of %d to reach 1\n" , 1054, global_tid, queue, queue->tq_ref_count); }; | |||||
1053 | ("line %d gtid %d: Q %p waiting for ref_count of %d to reach 1\n",if (kmp_f_debug >= 1) { __kmp_debug_printf ("line %d gtid %d: Q %p waiting for ref_count of %d to reach 1\n" , 1054, global_tid, queue, queue->tq_ref_count); }; | |||||
1054 | __LINE__, global_tid, queue, queue->tq_ref_count))if (kmp_f_debug >= 1) { __kmp_debug_printf ("line %d gtid %d: Q %p waiting for ref_count of %d to reach 1\n" , 1054, global_tid, queue, queue->tq_ref_count); };; | |||||
1055 | ||||||
1056 | /* wait until all other threads have stopped accessing this queue */ | |||||
1057 | while (queue->tq_ref_count > 1) { | |||||
1058 | __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); | |||||
1059 | ||||||
1060 | KMP_WAIT_YIELD__kmp_wait_yield_4((volatile kmp_uint32 *)&queue->tq_ref_count, 1, KMP_LE__kmp_le_4, | |||||
1061 | NULL__null); | |||||
1062 | ||||||
1063 | __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); | |||||
1064 | // Make sure data structures are in consistent state before querying them | |||||
1065 | // Seems to work without this for digital/alpha, needed for IBM/RS6000 | |||||
1066 | KMP_MB(); | |||||
1067 | } | |||||
1068 | ||||||
1069 | __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); | |||||
1070 | } | |||||
1071 | ||||||
1072 | KMP_DEBUG_REF_CTS(if (kmp_f_debug >= 1) { __kmp_debug_printf ("line %d gtid %d: Q %p freeing queue\n" , 1073, global_tid, queue); }; | |||||
1073 | ("line %d gtid %d: Q %p freeing queue\n", __LINE__, global_tid, queue))if (kmp_f_debug >= 1) { __kmp_debug_printf ("line %d gtid %d: Q %p freeing queue\n" , 1073, global_tid, queue); };; | |||||
1074 | ||||||
1075 | #ifdef KMP_DEBUG1 | |||||
1076 | KMP_DEBUG_ASSERT(queue->tq_flags & TQF_ALL_TASKS_QUEUED)if (!(queue->tq_flags & 0x0800)) { __kmp_debug_assert( "queue->tq_flags & 0x0800", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 1076); }; | |||||
1077 | KMP_DEBUG_ASSERT(queue->tq_nfull == 0)if (!(queue->tq_nfull == 0)) { __kmp_debug_assert("queue->tq_nfull == 0" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 1077); }; | |||||
1078 | ||||||
1079 | for (i = 0; i < queue->tq_nproc; i++) { | |||||
1080 | KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0)if (!(queue->tq_th_thunks[i].ai_data == 0)) { __kmp_debug_assert ("queue->tq_th_thunks[i].ai_data == 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 1080); }; | |||||
1081 | } | |||||
1082 | ||||||
1083 | i = 0; | |||||
1084 | for (thunk = queue->tq_free_thunks; thunk != NULL__null; | |||||
1085 | thunk = thunk->th.th_next_free) | |||||
1086 | ++i; | |||||
1087 | ||||||
1088 | KMP_ASSERT(i ==if (!(i == queue->tq_nslots + (queue->tq_nproc * 1))) { __kmp_debug_assert("i == queue->tq_nslots + (queue->tq_nproc * __KMP_TASKQ_THUNKS_PER_TH)" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 1089); } | |||||
1089 | queue->tq_nslots + (queue->tq_nproc * __KMP_TASKQ_THUNKS_PER_TH))if (!(i == queue->tq_nslots + (queue->tq_nproc * 1))) { __kmp_debug_assert("i == queue->tq_nslots + (queue->tq_nproc * __KMP_TASKQ_THUNKS_PER_TH)" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 1089); }; | |||||
1090 | #endif | |||||
1091 | ||||||
1092 | /* release storage for queue entry */ | |||||
1093 | __kmp_free_taskq(tq, queue, TRUE(!0), global_tid); | |||||
1094 | ||||||
1095 | KF_TRACE(50, ("After Deletion of TaskQ at %p on (%d):\n", queue, global_tid))if (kmp_f_debug >= 50) { __kmp_debug_printf ("After Deletion of TaskQ at %p on (%d):\n" , queue, global_tid); }; | |||||
1096 | KF_DUMP(50, __kmp_dump_task_queue_tree(tq, tq->tq_root, global_tid))if (kmp_f_debug >= 50) { int ks; __kmp_disable(&ks); ( __kmp_dump_task_queue_tree(tq, tq->tq_root, global_tid)); __kmp_enable (ks); }; | |||||
1097 | } | |||||
1098 | ||||||
1099 | /* Starting from indicated queue, proceed downward through tree and remove all | |||||
1100 | taskqs which are finished, but only go down to taskqs which have the "nowait" | |||||
1101 | clause present. Assume this is only called when in_parallel=TRUE. */ | |||||
1102 | ||||||
1103 | static void __kmp_find_and_remove_finished_child_taskq( | |||||
1104 | kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue) { | |||||
1105 | kmpc_task_queue_t *queue = curr_queue; | |||||
1106 | ||||||
1107 | if (curr_queue->tq_first_child != NULL__null) { | |||||
1108 | __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid); | |||||
1109 | // Make sure data structures are in consistent state before querying them | |||||
1110 | // Seems to work without this call for digital/alpha, needed for IBM/RS6000 | |||||
1111 | KMP_MB(); | |||||
1112 | ||||||
1113 | queue = CCAST(kmpc_task_queue_t *, curr_queue->tq_first_child)const_cast<kmpc_task_queue_t *>(curr_queue->tq_first_child ); | |||||
1114 | if (queue != NULL__null) { | |||||
1115 | __kmp_release_lock(&curr_queue->tq_link_lck, global_tid); | |||||
1116 | return; | |||||
1117 | } | |||||
1118 | ||||||
1119 | while (queue != NULL__null) { | |||||
1120 | kmpc_task_queue_t *next; | |||||
1121 | int ct = ++(queue->tq_ref_count); | |||||
1122 | KMP_DEBUG_REF_CTS(if (kmp_f_debug >= 1) { __kmp_debug_printf ("line %d gtid %d: Q %p inc %d\n" , 1123, global_tid, queue, ct); }; | |||||
1123 | ("line %d gtid %d: Q %p inc %d\n", __LINE__, global_tid, queue, ct))if (kmp_f_debug >= 1) { __kmp_debug_printf ("line %d gtid %d: Q %p inc %d\n" , 1123, global_tid, queue, ct); };; | |||||
1124 | ||||||
1125 | /* although reference count stays active during descendant walk, */ | |||||
1126 | /* shouldn't matter since if children still exist, reference */ | |||||
1127 | /* counts aren't being monitored anyway */ | |||||
1128 | ||||||
1129 | if (queue->tq_flags & TQF_IS_NOWAIT0x0004) { | |||||
1130 | __kmp_find_and_remove_finished_child_taskq(tq, global_tid, queue); | |||||
1131 | ||||||
1132 | if ((queue->tq_flags & TQF_ALL_TASKS_QUEUED0x0800) && | |||||
1133 | (queue->tq_nfull == 0) && __kmp_taskq_tasks_finished(queue) && | |||||
1134 | !__kmp_taskq_has_any_children(queue)) { | |||||
1135 | ||||||
1136 | /* Only remove this if we have not already marked it for deallocation. | |||||
1137 | This should prevent multiple threads from trying to free this. */ | |||||
1138 | ||||||
1139 | if (__kmp_test_lock(&queue->tq_queue_lck, global_tid)) { | |||||
1140 | if (!(queue->tq_flags & TQF_DEALLOCATED0x2000)) { | |||||
1141 | queue->tq_flags |= TQF_DEALLOCATED0x2000; | |||||
1142 | __kmp_release_lock(&queue->tq_queue_lck, global_tid); | |||||
1143 | ||||||
1144 | __kmp_remove_queue_from_tree(tq, global_tid, queue, TRUE(!0)); | |||||
1145 | ||||||
1146 | /* Can't do any more here since can't be sure where sibling queue | |||||
1147 | * is so just exit this level */ | |||||
1148 | return; | |||||
1149 | } else { | |||||
1150 | __kmp_release_lock(&queue->tq_queue_lck, global_tid); | |||||
1151 | } | |||||
1152 | } | |||||
1153 | /* otherwise, just fall through and decrement reference count */ | |||||
1154 | } | |||||
1155 | } | |||||
1156 | ||||||
1157 | __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid); | |||||
1158 | // Make sure data structures are in consistent state before querying them | |||||
1159 | // Seems to work without this for digital/alpha, needed for IBM/RS6000 | |||||
1160 | KMP_MB(); | |||||
1161 | ||||||
1162 | next = queue->tq_next_child; | |||||
1163 | ||||||
1164 | ct = --(queue->tq_ref_count); | |||||
1165 | KMP_DEBUG_REF_CTS(if (kmp_f_debug >= 1) { __kmp_debug_printf ("line %d gtid %d: Q %p dec %d\n" , 1166, global_tid, queue, ct); }; | |||||
1166 | ("line %d gtid %d: Q %p dec %d\n", __LINE__, global_tid, queue, ct))if (kmp_f_debug >= 1) { __kmp_debug_printf ("line %d gtid %d: Q %p dec %d\n" , 1166, global_tid, queue, ct); };; | |||||
1167 | KMP_DEBUG_ASSERT(ct >= 0)if (!(ct >= 0)) { __kmp_debug_assert("ct >= 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 1167); }; | |||||
1168 | ||||||
1169 | queue = next; | |||||
1170 | } | |||||
1171 | ||||||
1172 | __kmp_release_lock(&curr_queue->tq_link_lck, global_tid); | |||||
1173 | } | |||||
1174 | } | |||||
1175 | ||||||
1176 | /* Starting from indicated queue, proceed downward through tree and remove all | |||||
1177 | taskq's assuming all are finished and assuming NO other threads are executing | |||||
1178 | at this point. */ | |||||
1179 | static void __kmp_remove_all_child_taskq(kmp_taskq_t *tq, kmp_int32 global_tid, | |||||
1180 | kmpc_task_queue_t *queue) { | |||||
1181 | kmpc_task_queue_t *next_child; | |||||
1182 | ||||||
1183 | queue = CCAST(kmpc_task_queue_t *, queue->tq_first_child)const_cast<kmpc_task_queue_t *>(queue->tq_first_child ); | |||||
1184 | ||||||
1185 | while (queue != NULL__null) { | |||||
1186 | __kmp_remove_all_child_taskq(tq, global_tid, queue); | |||||
1187 | ||||||
1188 | next_child = queue->tq_next_child; | |||||
1189 | queue->tq_flags |= TQF_DEALLOCATED0x2000; | |||||
1190 | __kmp_remove_queue_from_tree(tq, global_tid, queue, FALSE0); | |||||
1191 | queue = next_child; | |||||
1192 | } | |||||
1193 | } | |||||
1194 | ||||||
1195 | static void __kmp_execute_task_from_queue(kmp_taskq_t *tq, ident_t *loc, | |||||
1196 | kmp_int32 global_tid, | |||||
1197 | kmpc_thunk_t *thunk, | |||||
1198 | int in_parallel) { | |||||
1199 | kmpc_task_queue_t *queue = thunk->th.th_shareds->sv_queue; | |||||
1200 | kmp_int32 tid = __kmp_tid_from_gtid(global_tid); | |||||
1201 | ||||||
1202 | KF_TRACE(100, ("After dequeueing this Task on (%d):\n", global_tid))if (kmp_f_debug >= 100) { __kmp_debug_printf ("After dequeueing this Task on (%d):\n" , global_tid); }; | |||||
1203 | KF_DUMP(100, __kmp_dump_thunk(tq, thunk, global_tid))if (kmp_f_debug >= 100) { int ks; __kmp_disable(&ks); ( __kmp_dump_thunk(tq, thunk, global_tid)); __kmp_enable(ks); }; | |||||
1204 | KF_TRACE(100, ("Task Queue: %p looks like this (%d):\n", queue, global_tid))if (kmp_f_debug >= 100) { __kmp_debug_printf ("Task Queue: %p looks like this (%d):\n" , queue, global_tid); }; | |||||
1205 | KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid))if (kmp_f_debug >= 100) { int ks; __kmp_disable(&ks); ( __kmp_dump_task_queue(tq, queue, global_tid)); __kmp_enable(ks ); }; | |||||
1206 | ||||||
1207 | /* For the taskq task, the curr_thunk pushes and pop pairs are set up as | |||||
1208 | * follows: | |||||
1209 | * | |||||
1210 | * happens exactly once: | |||||
1211 | * 1) __kmpc_taskq : push (if returning thunk only) | |||||
1212 | * 4) __kmpc_end_taskq_task : pop | |||||
1213 | * | |||||
1214 | * optionally happens *each* time taskq task is dequeued/enqueued: | |||||
1215 | * 2) __kmpc_taskq_task : pop | |||||
1216 | * 3) __kmp_execute_task_from_queue : push | |||||
1217 | * | |||||
1218 | * execution ordering: 1,(2,3)*,4 | |||||
1219 | */ | |||||
1220 | ||||||
1221 | if (!(thunk->th_flags & TQF_TASKQ_TASK0x0200)) { | |||||
1222 | kmp_int32 index = (queue == tq->tq_root) ? tid : 0; | |||||
1223 | thunk->th.th_shareds = | |||||
1224 | CCAST(kmpc_shared_vars_t *, queue->tq_shareds[index].ai_data)const_cast<kmpc_shared_vars_t *>(queue->tq_shareds[index ].ai_data); | |||||
1225 | ||||||
1226 | if (__kmp_env_consistency_check) { | |||||
1227 | __kmp_push_workshare(global_tid, | |||||
1228 | (queue->tq_flags & TQF_IS_ORDERED0x0001) ? ct_task_ordered | |||||
1229 | : ct_task, | |||||
1230 | queue->tq_loc); | |||||
1231 | } | |||||
1232 | } else { | |||||
1233 | if (__kmp_env_consistency_check) | |||||
1234 | __kmp_push_workshare(global_tid, ct_taskq, queue->tq_loc); | |||||
1235 | } | |||||
1236 | ||||||
1237 | if (in_parallel) { | |||||
1238 | thunk->th_encl_thunk = tq->tq_curr_thunk[tid]; | |||||
1239 | tq->tq_curr_thunk[tid] = thunk; | |||||
1240 | ||||||
1241 | KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid))if (kmp_f_debug >= 200) { int ks; __kmp_disable(&ks); ( __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid) ); __kmp_enable(ks); }; | |||||
1242 | } | |||||
1243 | ||||||
1244 | KF_TRACE(50, ("Begin Executing Thunk %p from queue %p on (%d)\n", thunk,if (kmp_f_debug >= 50) { __kmp_debug_printf ("Begin Executing Thunk %p from queue %p on (%d)\n" , thunk, queue, global_tid); } | |||||
1245 | queue, global_tid))if (kmp_f_debug >= 50) { __kmp_debug_printf ("Begin Executing Thunk %p from queue %p on (%d)\n" , thunk, queue, global_tid); }; | |||||
1246 | thunk->th_task(global_tid, thunk); | |||||
1247 | KF_TRACE(50, ("End Executing Thunk %p from queue %p on (%d)\n", thunk, queue,if (kmp_f_debug >= 50) { __kmp_debug_printf ("End Executing Thunk %p from queue %p on (%d)\n" , thunk, queue, global_tid); } | |||||
1248 | global_tid))if (kmp_f_debug >= 50) { __kmp_debug_printf ("End Executing Thunk %p from queue %p on (%d)\n" , thunk, queue, global_tid); }; | |||||
1249 | ||||||
1250 | if (!(thunk->th_flags & TQF_TASKQ_TASK0x0200)) { | |||||
1251 | if (__kmp_env_consistency_check) | |||||
1252 | __kmp_pop_workshare(global_tid, | |||||
1253 | (queue->tq_flags & TQF_IS_ORDERED0x0001) ? ct_task_ordered | |||||
1254 | : ct_task, | |||||
1255 | queue->tq_loc); | |||||
1256 | ||||||
1257 | if (in_parallel) { | |||||
1258 | tq->tq_curr_thunk[tid] = thunk->th_encl_thunk; | |||||
1259 | thunk->th_encl_thunk = NULL__null; | |||||
1260 | KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid))if (kmp_f_debug >= 200) { int ks; __kmp_disable(&ks); ( __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid) ); __kmp_enable(ks); }; | |||||
1261 | } | |||||
1262 | ||||||
1263 | if ((thunk->th_flags & TQF_IS_ORDERED0x0001) && in_parallel) { | |||||
1264 | __kmp_taskq_check_ordered(global_tid, thunk); | |||||
1265 | } | |||||
1266 | ||||||
1267 | __kmp_free_thunk(queue, thunk, in_parallel, global_tid); | |||||
1268 | ||||||
1269 | KF_TRACE(100, ("T#%d After freeing thunk: %p, TaskQ looks like this:\n",if (kmp_f_debug >= 100) { __kmp_debug_printf ("T#%d After freeing thunk: %p, TaskQ looks like this:\n" , global_tid, thunk); } | |||||
1270 | global_tid, thunk))if (kmp_f_debug >= 100) { __kmp_debug_printf ("T#%d After freeing thunk: %p, TaskQ looks like this:\n" , global_tid, thunk); }; | |||||
1271 | KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid))if (kmp_f_debug >= 100) { int ks; __kmp_disable(&ks); ( __kmp_dump_task_queue(tq, queue, global_tid)); __kmp_enable(ks ); }; | |||||
1272 | ||||||
1273 | if (in_parallel) { | |||||
1274 | KMP_MB(); /* needed so thunk put on free list before outstanding thunk | |||||
1275 | count is decremented */ | |||||
1276 | ||||||
1277 | KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data >= 1)if (!(queue->tq_th_thunks[tid].ai_data >= 1)) { __kmp_debug_assert ("queue->tq_th_thunks[tid].ai_data >= 1", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 1277); }; | |||||
1278 | ||||||
1279 | KF_TRACE(if (kmp_f_debug >= 200) { __kmp_debug_printf ("__kmp_execute_task_from_queue: T#%d has %d thunks in queue %p\n" , global_tid, queue->tq_th_thunks[tid].ai_data - 1, queue) ; } | |||||
1280 | 200,if (kmp_f_debug >= 200) { __kmp_debug_printf ("__kmp_execute_task_from_queue: T#%d has %d thunks in queue %p\n" , global_tid, queue->tq_th_thunks[tid].ai_data - 1, queue) ; } | |||||
1281 | ("__kmp_execute_task_from_queue: T#%d has %d thunks in queue %p\n",if (kmp_f_debug >= 200) { __kmp_debug_printf ("__kmp_execute_task_from_queue: T#%d has %d thunks in queue %p\n" , global_tid, queue->tq_th_thunks[tid].ai_data - 1, queue) ; } | |||||
1282 | global_tid, queue->tq_th_thunks[tid].ai_data - 1, queue))if (kmp_f_debug >= 200) { __kmp_debug_printf ("__kmp_execute_task_from_queue: T#%d has %d thunks in queue %p\n" , global_tid, queue->tq_th_thunks[tid].ai_data - 1, queue) ; }; | |||||
1283 | ||||||
1284 | queue->tq_th_thunks[tid].ai_data--; | |||||
1285 | ||||||
1286 | /* KMP_MB(); */ /* is MB really necessary ? */ | |||||
1287 | } | |||||
1288 | ||||||
1289 | if (queue->tq.tq_parent != NULL__null && in_parallel) { | |||||
1290 | int ct; | |||||
1291 | __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); | |||||
1292 | ct = --(queue->tq_ref_count); | |||||
1293 | __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); | |||||
1294 | KMP_DEBUG_REF_CTS(if (kmp_f_debug >= 1) { __kmp_debug_printf ("line %d gtid %d: Q %p dec %d\n" , 1295, global_tid, queue, ct); }; | |||||
1295 | ("line %d gtid %d: Q %p dec %d\n", __LINE__, global_tid, queue, ct))if (kmp_f_debug >= 1) { __kmp_debug_printf ("line %d gtid %d: Q %p dec %d\n" , 1295, global_tid, queue, ct); };; | |||||
1296 | KMP_DEBUG_ASSERT(ct >= 0)if (!(ct >= 0)) { __kmp_debug_assert("ct >= 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 1296); }; | |||||
1297 | } | |||||
1298 | } | |||||
1299 | } | |||||
1300 | ||||||
1301 | /* starts a taskq; creates and returns a thunk for the taskq_task */ | |||||
1302 | /* also, returns pointer to shared vars for this thread in "shareds" arg */ | |||||
1303 | kmpc_thunk_t *__kmpc_taskq(ident_t *loc, kmp_int32 global_tid, | |||||
1304 | kmpc_task_t taskq_task, size_t sizeof_thunk, | |||||
1305 | size_t sizeof_shareds, kmp_int32 flags, | |||||
1306 | kmpc_shared_vars_t **shareds) { | |||||
1307 | int in_parallel; | |||||
1308 | kmp_int32 nslots, nthunks, nshareds, nproc; | |||||
1309 | kmpc_task_queue_t *new_queue, *curr_queue; | |||||
1310 | kmpc_thunk_t *new_taskq_thunk; | |||||
1311 | kmp_info_t *th; | |||||
1312 | kmp_team_t *team; | |||||
1313 | kmp_taskq_t *tq; | |||||
1314 | kmp_int32 tid; | |||||
1315 | ||||||
1316 | KE_TRACE(10, ("__kmpc_taskq called (%d)\n", global_tid))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_taskq called (%d)\n" , global_tid); }; | |||||
1317 | ||||||
1318 | th = __kmp_threads[global_tid]; | |||||
1319 | team = th->th.th_team; | |||||
1320 | tq = &team->t.t_taskq; | |||||
1321 | nproc = team->t.t_nproc; | |||||
1322 | tid = __kmp_tid_from_gtid(global_tid); | |||||
1323 | ||||||
1324 | /* find out whether this is a parallel taskq or serialized one. */ | |||||
1325 | in_parallel = in_parallel_context(team); | |||||
| ||||||
1326 | ||||||
1327 | if (!tq->tq_root) { | |||||
1328 | if (in_parallel) { | |||||
1329 | /* Vector ORDERED SECTION to taskq version */ | |||||
1330 | th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo; | |||||
1331 | ||||||
1332 | /* Vector ORDERED SECTION to taskq version */ | |||||
1333 | th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo; | |||||
1334 | } | |||||
1335 | ||||||
1336 | if (in_parallel) { | |||||
1337 | // This shouldn't be a barrier region boundary, it will confuse the user. | |||||
1338 | /* Need the boundary to be at the end taskq instead. */ | |||||
1339 | if (__kmp_barrier(bs_plain_barrier, global_tid, TRUE(!0), 0, NULL__null, NULL__null)) { | |||||
1340 | /* Creating the active root queue, and we are not the master thread. */ | |||||
1341 | /* The master thread below created the queue and tasks have been */ | |||||
1342 | /* enqueued, and the master thread released this barrier. This */ | |||||
1343 | /* worker thread can now proceed and execute tasks. See also the */ | |||||
1344 | /* TQF_RELEASE_WORKERS which is used to handle this case. */ | |||||
1345 | *shareds = | |||||
1346 | CCAST(kmpc_shared_vars_t *, tq->tq_root->tq_shareds[tid].ai_data)const_cast<kmpc_shared_vars_t *>(tq->tq_root->tq_shareds [tid].ai_data); | |||||
| ||||||
1347 | KE_TRACE(10, ("__kmpc_taskq return (%d)\n", global_tid))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_taskq return (%d)\n" , global_tid); }; | |||||
1348 | ||||||
1349 | return NULL__null; | |||||
1350 | } | |||||
1351 | } | |||||
1352 | ||||||
1353 | /* master thread only executes this code */ | |||||
1354 | if (tq->tq_curr_thunk_capacity < nproc) { | |||||
1355 | if (tq->tq_curr_thunk) | |||||
1356 | __kmp_free(tq->tq_curr_thunk)___kmp_free((tq->tq_curr_thunk), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 1356); | |||||
1357 | else { | |||||
1358 | /* only need to do this once at outer level, i.e. when tq_curr_thunk is | |||||
1359 | * still NULL */ | |||||
1360 | __kmp_init_lock(&tq->tq_freelist_lck); | |||||
1361 | } | |||||
1362 | ||||||
1363 | tq->tq_curr_thunk = | |||||
1364 | (kmpc_thunk_t **)__kmp_allocate(nproc * sizeof(kmpc_thunk_t *))___kmp_allocate((nproc * sizeof(kmpc_thunk_t *)), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 1364); | |||||
1365 | tq->tq_curr_thunk_capacity = nproc; | |||||
1366 | } | |||||
1367 | ||||||
1368 | if (in_parallel) | |||||
1369 | tq->tq_global_flags = TQF_RELEASE_WORKERS0x0400; | |||||
1370 | } | |||||
1371 | ||||||
1372 | /* dkp: in future, if flags & TQF_HEURISTICS, will choose nslots based */ | |||||
1373 | /* on some heuristics (e.g., depth of queue nesting?). */ | |||||
1374 | nslots = (in_parallel) ? (2 * nproc) : 1; | |||||
1375 | ||||||
1376 | /* There must be nproc * __KMP_TASKQ_THUNKS_PER_TH extra slots for pending */ | |||||
1377 | /* jobs being executed by other threads, and one extra for taskq slot */ | |||||
1378 | nthunks = (in_parallel) ? (nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH1) + 1) | |||||
1379 | : nslots + 2; | |||||
1380 | ||||||
1381 | /* Only the root taskq gets a per-thread array of shareds. */ | |||||
1382 | /* The rest of the taskq's only get one copy of the shared vars. */ | |||||
1383 | nshareds = (!tq->tq_root && in_parallel) ? nproc : 1; | |||||
1384 | ||||||
1385 | /* create overall queue data structure and its components that require | |||||
1386 | * allocation */ | |||||
1387 | new_queue = __kmp_alloc_taskq(tq, in_parallel, nslots, nthunks, nshareds, | |||||
1388 | nproc, sizeof_thunk, sizeof_shareds, | |||||
1389 | &new_taskq_thunk, global_tid); | |||||
1390 | ||||||
1391 | /* rest of new_queue initializations */ | |||||
1392 | new_queue->tq_flags = flags & TQF_INTERFACE_FLAGS0x00ff; | |||||
1393 | ||||||
1394 | if (in_parallel) { | |||||
1395 | new_queue->tq_tasknum_queuing = 0; | |||||
1396 | new_queue->tq_tasknum_serving = 0; | |||||
1397 | new_queue->tq_flags |= TQF_PARALLEL_CONTEXT0x1000; | |||||
1398 | } | |||||
1399 | ||||||
1400 | new_queue->tq_taskq_slot = NULL__null; | |||||
1401 | new_queue->tq_nslots = nslots; | |||||
1402 | new_queue->tq_hiwat = HIGH_WATER_MARK(nslots)(((nslots)*3) / 4); | |||||
1403 | new_queue->tq_nfull = 0; | |||||
1404 | new_queue->tq_head = 0; | |||||
1405 | new_queue->tq_tail = 0; | |||||
1406 | new_queue->tq_loc = loc; | |||||
1407 | ||||||
1408 | if ((new_queue->tq_flags & TQF_IS_ORDERED0x0001) && in_parallel) { | |||||
1409 | /* prepare to serve the first-queued task's ORDERED directive */ | |||||
1410 | new_queue->tq_tasknum_serving = 1; | |||||
1411 | ||||||
1412 | /* Vector ORDERED SECTION to taskq version */ | |||||
1413 | th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo; | |||||
1414 | ||||||
1415 | /* Vector ORDERED SECTION to taskq version */ | |||||
1416 | th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo; | |||||
1417 | } | |||||
1418 | ||||||
1419 | /* create a new thunk for the taskq_task in the new_queue */ | |||||
1420 | *shareds = CCAST(kmpc_shared_vars_t *, new_queue->tq_shareds[0].ai_data)const_cast<kmpc_shared_vars_t *>(new_queue->tq_shareds [0].ai_data); | |||||
1421 | ||||||
1422 | new_taskq_thunk->th.th_shareds = *shareds; | |||||
1423 | new_taskq_thunk->th_task = taskq_task; | |||||
1424 | new_taskq_thunk->th_flags = new_queue->tq_flags | TQF_TASKQ_TASK0x0200; | |||||
1425 | new_taskq_thunk->th_status = 0; | |||||
1426 | ||||||
1427 | KMP_DEBUG_ASSERT(new_taskq_thunk->th_flags & TQF_TASKQ_TASK)if (!(new_taskq_thunk->th_flags & 0x0200)) { __kmp_debug_assert ("new_taskq_thunk->th_flags & 0x0200", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 1427); }; | |||||
1428 | ||||||
1429 | // Make sure these inits complete before threads start using this queue | |||||
1430 | /* KMP_MB(); */ // (necessary?) | |||||
1431 | ||||||
1432 | /* insert the new task queue into the tree, but only after all fields | |||||
1433 | * initialized */ | |||||
1434 | ||||||
1435 | if (in_parallel) { | |||||
1436 | if (!tq->tq_root) { | |||||
1437 | new_queue->tq.tq_parent = NULL__null; | |||||
1438 | new_queue->tq_first_child = NULL__null; | |||||
1439 | new_queue->tq_next_child = NULL__null; | |||||
1440 | new_queue->tq_prev_child = NULL__null; | |||||
1441 | new_queue->tq_ref_count = 1; | |||||
1442 | tq->tq_root = new_queue; | |||||
1443 | } else { | |||||
1444 | curr_queue = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue; | |||||
1445 | new_queue->tq.tq_parent = curr_queue; | |||||
1446 | new_queue->tq_first_child = NULL__null; | |||||
1447 | new_queue->tq_prev_child = NULL__null; | |||||
1448 | new_queue->tq_ref_count = | |||||
1449 | 1; /* for this the thread that built the queue */ | |||||
1450 | ||||||
1451 | KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p alloc %d\n", __LINE__,if (kmp_f_debug >= 1) { __kmp_debug_printf ("line %d gtid %d: Q %p alloc %d\n" , 1451, global_tid, new_queue, new_queue->tq_ref_count); } ; | |||||
1452 | global_tid, new_queue, new_queue->tq_ref_count))if (kmp_f_debug >= 1) { __kmp_debug_printf ("line %d gtid %d: Q %p alloc %d\n" , 1451, global_tid, new_queue, new_queue->tq_ref_count); } ;; | |||||
1453 | ||||||
1454 | __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid); | |||||
1455 | ||||||
1456 | // Make sure data structures are in consistent state before querying them | |||||
1457 | // Seems to work without this for digital/alpha, needed for IBM/RS6000 | |||||
1458 | KMP_MB(); | |||||
1459 | ||||||
1460 | new_queue->tq_next_child = | |||||
1461 | CCAST(struct kmpc_task_queue_t *, curr_queue->tq_first_child)const_cast<struct kmpc_task_queue_t *>(curr_queue->tq_first_child ); | |||||
1462 | ||||||
1463 | if (curr_queue->tq_first_child != NULL__null) | |||||
1464 | curr_queue->tq_first_child->tq_prev_child = new_queue; | |||||
1465 | ||||||
1466 | curr_queue->tq_first_child = new_queue; | |||||
1467 | ||||||
1468 | __kmp_release_lock(&curr_queue->tq_link_lck, global_tid); | |||||
1469 | } | |||||
1470 | ||||||
1471 | /* set up thunk stack only after code that determines curr_queue above */ | |||||
1472 | new_taskq_thunk->th_encl_thunk = tq->tq_curr_thunk[tid]; | |||||
1473 | tq->tq_curr_thunk[tid] = new_taskq_thunk; | |||||
1474 | ||||||
1475 | KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid))if (kmp_f_debug >= 200) { int ks; __kmp_disable(&ks); ( __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid) ); __kmp_enable(ks); }; | |||||
1476 | } else { | |||||
1477 | new_taskq_thunk->th_encl_thunk = 0; | |||||
1478 | new_queue->tq.tq_parent = NULL__null; | |||||
1479 | new_queue->tq_first_child = NULL__null; | |||||
1480 | new_queue->tq_next_child = NULL__null; | |||||
1481 | new_queue->tq_prev_child = NULL__null; | |||||
1482 | new_queue->tq_ref_count = 1; | |||||
1483 | } | |||||
1484 | ||||||
1485 | #ifdef KMP_DEBUG1 | |||||
1486 | KF_TRACE(150, ("Creating TaskQ Task on (%d):\n", global_tid))if (kmp_f_debug >= 150) { __kmp_debug_printf ("Creating TaskQ Task on (%d):\n" , global_tid); }; | |||||
1487 | KF_DUMP(150, __kmp_dump_thunk(tq, new_taskq_thunk, global_tid))if (kmp_f_debug >= 150) { int ks; __kmp_disable(&ks); ( __kmp_dump_thunk(tq, new_taskq_thunk, global_tid)); __kmp_enable (ks); }; | |||||
1488 | ||||||
1489 | if (in_parallel) { | |||||
1490 | KF_TRACE(25,if (kmp_f_debug >= 25) { __kmp_debug_printf ("After TaskQ at %p Creation on (%d):\n" , new_queue, global_tid); } | |||||
1491 | ("After TaskQ at %p Creation on (%d):\n", new_queue, global_tid))if (kmp_f_debug >= 25) { __kmp_debug_printf ("After TaskQ at %p Creation on (%d):\n" , new_queue, global_tid); }; | |||||
1492 | } else { | |||||
1493 | KF_TRACE(25, ("After Serial TaskQ at %p Creation on (%d):\n", new_queue,if (kmp_f_debug >= 25) { __kmp_debug_printf ("After Serial TaskQ at %p Creation on (%d):\n" , new_queue, global_tid); } | |||||
1494 | global_tid))if (kmp_f_debug >= 25) { __kmp_debug_printf ("After Serial TaskQ at %p Creation on (%d):\n" , new_queue, global_tid); }; | |||||
1495 | } | |||||
1496 | ||||||
1497 | KF_DUMP(25, __kmp_dump_task_queue(tq, new_queue, global_tid))if (kmp_f_debug >= 25) { int ks; __kmp_disable(&ks); ( __kmp_dump_task_queue(tq, new_queue, global_tid)); __kmp_enable (ks); }; | |||||
1498 | ||||||
1499 | if (in_parallel) { | |||||
1500 | KF_DUMP(50, __kmp_dump_task_queue_tree(tq, tq->tq_root, global_tid))if (kmp_f_debug >= 50) { int ks; __kmp_disable(&ks); ( __kmp_dump_task_queue_tree(tq, tq->tq_root, global_tid)); __kmp_enable (ks); }; | |||||
1501 | } | |||||
1502 | #endif /* KMP_DEBUG */ | |||||
1503 | ||||||
1504 | if (__kmp_env_consistency_check) | |||||
1505 | __kmp_push_workshare(global_tid, ct_taskq, new_queue->tq_loc); | |||||
1506 | ||||||
1507 | KE_TRACE(10, ("__kmpc_taskq return (%d)\n", global_tid))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_taskq return (%d)\n" , global_tid); }; | |||||
1508 | ||||||
1509 | return new_taskq_thunk; | |||||
1510 | } | |||||
1511 | ||||||
1512 | /* ends a taskq; last thread out destroys the queue */ | |||||
1513 | ||||||
1514 | void __kmpc_end_taskq(ident_t *loc, kmp_int32 global_tid, | |||||
1515 | kmpc_thunk_t *taskq_thunk) { | |||||
1516 | #ifdef KMP_DEBUG1 | |||||
1517 | kmp_int32 i; | |||||
1518 | #endif | |||||
1519 | kmp_taskq_t *tq; | |||||
1520 | int in_parallel; | |||||
1521 | kmp_info_t *th; | |||||
1522 | kmp_int32 is_outermost; | |||||
1523 | kmpc_task_queue_t *queue; | |||||
1524 | kmpc_thunk_t *thunk; | |||||
1525 | int nproc; | |||||
1526 | ||||||
1527 | KE_TRACE(10, ("__kmpc_end_taskq called (%d)\n", global_tid))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_end_taskq called (%d)\n" , global_tid); }; | |||||
1528 | ||||||
1529 | tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq; | |||||
1530 | nproc = __kmp_threads[global_tid]->th.th_team->t.t_nproc; | |||||
1531 | ||||||
1532 | /* For the outermost taskq only, all but one thread will have taskq_thunk == | |||||
1533 | * NULL */ | |||||
1534 | queue = (taskq_thunk == NULL__null) ? tq->tq_root | |||||
1535 | : taskq_thunk->th.th_shareds->sv_queue; | |||||
1536 | ||||||
1537 | KE_TRACE(50, ("__kmpc_end_taskq queue=%p (%d) \n", queue, global_tid))if (kmp_e_debug >= 50) { __kmp_debug_printf ("__kmpc_end_taskq queue=%p (%d) \n" , queue, global_tid); }; | |||||
1538 | is_outermost = (queue == tq->tq_root); | |||||
1539 | in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT0x1000); | |||||
1540 | ||||||
1541 | if (in_parallel) { | |||||
1542 | kmp_uint32 spins; | |||||
1543 | ||||||
1544 | /* this is just a safeguard to release the waiting threads if */ | |||||
1545 | /* the outermost taskq never queues a task */ | |||||
1546 | ||||||
1547 | if (is_outermost && (KMP_MASTER_GTID(global_tid)(__kmp_tid_from_gtid((global_tid)) == 0))) { | |||||
1548 | if (tq->tq_global_flags & TQF_RELEASE_WORKERS0x0400) { | |||||
1549 | /* no lock needed, workers are still in spin mode */ | |||||
1550 | tq->tq_global_flags &= ~TQF_RELEASE_WORKERS0x0400; | |||||
1551 | ||||||
1552 | __kmp_end_split_barrier(bs_plain_barrier, global_tid); | |||||
1553 | } | |||||
1554 | } | |||||
1555 | ||||||
1556 | /* keep dequeueing work until all tasks are queued and dequeued */ | |||||
1557 | ||||||
1558 | do { | |||||
1559 | /* wait until something is available to dequeue */ | |||||
1560 | KMP_INIT_YIELD(spins){ (spins) = __kmp_yield_init; }; | |||||
1561 | ||||||
1562 | while ((queue->tq_nfull == 0) && (queue->tq_taskq_slot == NULL__null) && | |||||
1563 | (!__kmp_taskq_has_any_children(queue)) && | |||||
1564 | (!(queue->tq_flags & TQF_ALL_TASKS_QUEUED0x0800))) { | |||||
1565 | KMP_YIELD_WHEN(TRUE, spins){ __kmp_x86_pause(); (spins) -= 2; if (!(spins)) { __kmp_yield ((!0)); (spins) = __kmp_yield_next; } }; | |||||
1566 | } | |||||
1567 | ||||||
1568 | /* check to see if we can execute tasks in the queue */ | |||||
1569 | while (((queue->tq_nfull != 0) || (queue->tq_taskq_slot != NULL__null)) && | |||||
1570 | (thunk = __kmp_find_task_in_queue(global_tid, queue)) != NULL__null) { | |||||
1571 | KF_TRACE(50, ("Found thunk: %p in primary queue %p (%d)\n", thunk,if (kmp_f_debug >= 50) { __kmp_debug_printf ("Found thunk: %p in primary queue %p (%d)\n" , thunk, queue, global_tid); } | |||||
1572 | queue, global_tid))if (kmp_f_debug >= 50) { __kmp_debug_printf ("Found thunk: %p in primary queue %p (%d)\n" , thunk, queue, global_tid); }; | |||||
1573 | __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel); | |||||
1574 | } | |||||
1575 | ||||||
1576 | /* see if work found can be found in a descendant queue */ | |||||
1577 | if ((__kmp_taskq_has_any_children(queue)) && | |||||
1578 | (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) != | |||||
1579 | NULL__null) { | |||||
1580 | ||||||
1581 | KF_TRACE(50,if (kmp_f_debug >= 50) { __kmp_debug_printf ("Stole thunk: %p in descendant queue: %p while waiting in " "queue: %p (%d)\n", thunk, thunk->th.th_shareds->sv_queue , queue, global_tid); } | |||||
1582 | ("Stole thunk: %p in descendant queue: %p while waiting in "if (kmp_f_debug >= 50) { __kmp_debug_printf ("Stole thunk: %p in descendant queue: %p while waiting in " "queue: %p (%d)\n", thunk, thunk->th.th_shareds->sv_queue , queue, global_tid); } | |||||
1583 | "queue: %p (%d)\n",if (kmp_f_debug >= 50) { __kmp_debug_printf ("Stole thunk: %p in descendant queue: %p while waiting in " "queue: %p (%d)\n", thunk, thunk->th.th_shareds->sv_queue , queue, global_tid); } | |||||
1584 | thunk, thunk->th.th_shareds->sv_queue, queue, global_tid))if (kmp_f_debug >= 50) { __kmp_debug_printf ("Stole thunk: %p in descendant queue: %p while waiting in " "queue: %p (%d)\n", thunk, thunk->th.th_shareds->sv_queue , queue, global_tid); }; | |||||
1585 | ||||||
1586 | __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel); | |||||
1587 | } | |||||
1588 | ||||||
1589 | } while ((!(queue->tq_flags & TQF_ALL_TASKS_QUEUED0x0800)) || | |||||
1590 | (queue->tq_nfull != 0)); | |||||
1591 | ||||||
1592 | KF_TRACE(50, ("All tasks queued and dequeued in queue: %p (%d)\n", queue,if (kmp_f_debug >= 50) { __kmp_debug_printf ("All tasks queued and dequeued in queue: %p (%d)\n" , queue, global_tid); } | |||||
1593 | global_tid))if (kmp_f_debug >= 50) { __kmp_debug_printf ("All tasks queued and dequeued in queue: %p (%d)\n" , queue, global_tid); }; | |||||
1594 | ||||||
1595 | /* wait while all tasks are not finished and more work found | |||||
1596 | in descendant queues */ | |||||
1597 | ||||||
1598 | while ((!__kmp_taskq_tasks_finished(queue)) && | |||||
1599 | (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) != | |||||
1600 | NULL__null) { | |||||
1601 | ||||||
1602 | KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in "if (kmp_f_debug >= 50) { __kmp_debug_printf ("Stole thunk: %p in descendant queue: %p while waiting in " "queue: %p (%d)\n", thunk, thunk->th.th_shareds->sv_queue , queue, global_tid); } | |||||
1603 | "queue: %p (%d)\n",if (kmp_f_debug >= 50) { __kmp_debug_printf ("Stole thunk: %p in descendant queue: %p while waiting in " "queue: %p (%d)\n", thunk, thunk->th.th_shareds->sv_queue , queue, global_tid); } | |||||
1604 | thunk, thunk->th.th_shareds->sv_queue, queue, global_tid))if (kmp_f_debug >= 50) { __kmp_debug_printf ("Stole thunk: %p in descendant queue: %p while waiting in " "queue: %p (%d)\n", thunk, thunk->th.th_shareds->sv_queue , queue, global_tid); }; | |||||
1605 | ||||||
1606 | __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel); | |||||
1607 | } | |||||
1608 | ||||||
1609 | KF_TRACE(50, ("No work found in descendent queues or all work finished in "if (kmp_f_debug >= 50) { __kmp_debug_printf ("No work found in descendent queues or all work finished in " "queue: %p (%d)\n", queue, global_tid); } | |||||
1610 | "queue: %p (%d)\n",if (kmp_f_debug >= 50) { __kmp_debug_printf ("No work found in descendent queues or all work finished in " "queue: %p (%d)\n", queue, global_tid); } | |||||
1611 | queue, global_tid))if (kmp_f_debug >= 50) { __kmp_debug_printf ("No work found in descendent queues or all work finished in " "queue: %p (%d)\n", queue, global_tid); }; | |||||
1612 | ||||||
1613 | if (!is_outermost) { | |||||
1614 | /* need to return if NOWAIT present and not outermost taskq */ | |||||
1615 | ||||||
1616 | if (queue->tq_flags & TQF_IS_NOWAIT0x0004) { | |||||
1617 | __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); | |||||
1618 | queue->tq_ref_count--; | |||||
1619 | KMP_DEBUG_ASSERT(queue->tq_ref_count >= 0)if (!(queue->tq_ref_count >= 0)) { __kmp_debug_assert("queue->tq_ref_count >= 0" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 1619); }; | |||||
1620 | __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); | |||||
1621 | ||||||
1622 | KE_TRACE(if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_end_taskq return for nowait case (%d)\n" , global_tid); } | |||||
1623 | 10, ("__kmpc_end_taskq return for nowait case (%d)\n", global_tid))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_end_taskq return for nowait case (%d)\n" , global_tid); }; | |||||
1624 | ||||||
1625 | return; | |||||
1626 | } | |||||
1627 | ||||||
1628 | __kmp_find_and_remove_finished_child_taskq(tq, global_tid, queue); | |||||
1629 | ||||||
1630 | /* WAIT until all tasks are finished and no child queues exist before | |||||
1631 | * proceeding */ | |||||
1632 | KMP_INIT_YIELD(spins){ (spins) = __kmp_yield_init; }; | |||||
1633 | ||||||
1634 | while (!__kmp_taskq_tasks_finished(queue) || | |||||
1635 | __kmp_taskq_has_any_children(queue)) { | |||||
1636 | thunk = __kmp_find_task_in_ancestor_queue(tq, global_tid, queue); | |||||
1637 | ||||||
1638 | if (thunk != NULL__null) { | |||||
1639 | KF_TRACE(50,if (kmp_f_debug >= 50) { __kmp_debug_printf ("Stole thunk: %p in ancestor queue: %p while waiting in " "queue: %p (%d)\n", thunk, thunk->th.th_shareds->sv_queue , queue, global_tid); } | |||||
1640 | ("Stole thunk: %p in ancestor queue: %p while waiting in "if (kmp_f_debug >= 50) { __kmp_debug_printf ("Stole thunk: %p in ancestor queue: %p while waiting in " "queue: %p (%d)\n", thunk, thunk->th.th_shareds->sv_queue , queue, global_tid); } | |||||
1641 | "queue: %p (%d)\n",if (kmp_f_debug >= 50) { __kmp_debug_printf ("Stole thunk: %p in ancestor queue: %p while waiting in " "queue: %p (%d)\n", thunk, thunk->th.th_shareds->sv_queue , queue, global_tid); } | |||||
1642 | thunk, thunk->th.th_shareds->sv_queue, queue, global_tid))if (kmp_f_debug >= 50) { __kmp_debug_printf ("Stole thunk: %p in ancestor queue: %p while waiting in " "queue: %p (%d)\n", thunk, thunk->th.th_shareds->sv_queue , queue, global_tid); }; | |||||
1643 | __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, | |||||
1644 | in_parallel); | |||||
1645 | } | |||||
1646 | ||||||
1647 | KMP_YIELD_WHEN(thunk == NULL, spins){ __kmp_x86_pause(); (spins) -= 2; if (!(spins)) { __kmp_yield (thunk == __null); (spins) = __kmp_yield_next; } }; | |||||
1648 | ||||||
1649 | __kmp_find_and_remove_finished_child_taskq(tq, global_tid, queue); | |||||
1650 | } | |||||
1651 | ||||||
1652 | __kmp_acquire_lock(&queue->tq_queue_lck, global_tid); | |||||
1653 | if (!(queue->tq_flags & TQF_DEALLOCATED0x2000)) { | |||||
1654 | queue->tq_flags |= TQF_DEALLOCATED0x2000; | |||||
1655 | } | |||||
1656 | __kmp_release_lock(&queue->tq_queue_lck, global_tid); | |||||
1657 | ||||||
1658 | /* only the allocating thread can deallocate the queue */ | |||||
1659 | if (taskq_thunk != NULL__null) { | |||||
1660 | __kmp_remove_queue_from_tree(tq, global_tid, queue, TRUE(!0)); | |||||
1661 | } | |||||
1662 | ||||||
1663 | KE_TRACE(if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_end_taskq return for non_outermost queue, wait case (%d)\n" , global_tid); } | |||||
1664 | 10,if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_end_taskq return for non_outermost queue, wait case (%d)\n" , global_tid); } | |||||
1665 | ("__kmpc_end_taskq return for non_outermost queue, wait case (%d)\n",if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_end_taskq return for non_outermost queue, wait case (%d)\n" , global_tid); } | |||||
1666 | global_tid))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_end_taskq return for non_outermost queue, wait case (%d)\n" , global_tid); }; | |||||
1667 | ||||||
1668 | return; | |||||
1669 | } | |||||
1670 | ||||||
1671 | // Outermost Queue: steal work from descendants until all tasks are finished | |||||
1672 | ||||||
1673 | KMP_INIT_YIELD(spins){ (spins) = __kmp_yield_init; }; | |||||
1674 | ||||||
1675 | while (!__kmp_taskq_tasks_finished(queue)) { | |||||
1676 | thunk = __kmp_find_task_in_descendant_queue(global_tid, queue); | |||||
1677 | ||||||
1678 | if (thunk != NULL__null) { | |||||
1679 | KF_TRACE(50,if (kmp_f_debug >= 50) { __kmp_debug_printf ("Stole thunk: %p in descendant queue: %p while waiting in " "queue: %p (%d)\n", thunk, thunk->th.th_shareds->sv_queue , queue, global_tid); } | |||||
1680 | ("Stole thunk: %p in descendant queue: %p while waiting in "if (kmp_f_debug >= 50) { __kmp_debug_printf ("Stole thunk: %p in descendant queue: %p while waiting in " "queue: %p (%d)\n", thunk, thunk->th.th_shareds->sv_queue , queue, global_tid); } | |||||
1681 | "queue: %p (%d)\n",if (kmp_f_debug >= 50) { __kmp_debug_printf ("Stole thunk: %p in descendant queue: %p while waiting in " "queue: %p (%d)\n", thunk, thunk->th.th_shareds->sv_queue , queue, global_tid); } | |||||
1682 | thunk, thunk->th.th_shareds->sv_queue, queue, global_tid))if (kmp_f_debug >= 50) { __kmp_debug_printf ("Stole thunk: %p in descendant queue: %p while waiting in " "queue: %p (%d)\n", thunk, thunk->th.th_shareds->sv_queue , queue, global_tid); }; | |||||
1683 | ||||||
1684 | __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel); | |||||
1685 | } | |||||
1686 | ||||||
1687 | KMP_YIELD_WHEN(thunk == NULL, spins){ __kmp_x86_pause(); (spins) -= 2; if (!(spins)) { __kmp_yield (thunk == __null); (spins) = __kmp_yield_next; } }; | |||||
1688 | } | |||||
1689 | ||||||
1690 | /* Need this barrier to prevent destruction of queue before threads have all | |||||
1691 | * executed above code */ | |||||
1692 | /* This may need to be done earlier when NOWAIT is implemented for the | |||||
1693 | * outermost level */ | |||||
1694 | ||||||
1695 | if (!__kmp_barrier(bs_plain_barrier, global_tid, TRUE(!0), 0, NULL__null, NULL__null)) { | |||||
1696 | /* the queue->tq_flags & TQF_IS_NOWAIT case is not yet handled here; */ | |||||
1697 | /* for right now, everybody waits, and the master thread destroys the */ | |||||
1698 | /* remaining queues. */ | |||||
1699 | ||||||
1700 | __kmp_remove_all_child_taskq(tq, global_tid, queue); | |||||
1701 | ||||||
1702 | /* Now destroy the root queue */ | |||||
1703 | KF_TRACE(100, ("T#%d Before Deletion of top-level TaskQ at %p:\n",if (kmp_f_debug >= 100) { __kmp_debug_printf ("T#%d Before Deletion of top-level TaskQ at %p:\n" , global_tid, queue); } | |||||
1704 | global_tid, queue))if (kmp_f_debug >= 100) { __kmp_debug_printf ("T#%d Before Deletion of top-level TaskQ at %p:\n" , global_tid, queue); }; | |||||
1705 | KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid))if (kmp_f_debug >= 100) { int ks; __kmp_disable(&ks); ( __kmp_dump_task_queue(tq, queue, global_tid)); __kmp_enable(ks ); }; | |||||
1706 | ||||||
1707 | #ifdef KMP_DEBUG1 | |||||
1708 | /* the root queue entry */ | |||||
1709 | KMP_DEBUG_ASSERT((queue->tq.tq_parent == NULL) &&if (!((queue->tq.tq_parent == __null) && (queue-> tq_next_child == __null))) { __kmp_debug_assert("(queue->tq.tq_parent == __null) && (queue->tq_next_child == __null)" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 1710); } | |||||
1710 | (queue->tq_next_child == NULL))if (!((queue->tq.tq_parent == __null) && (queue-> tq_next_child == __null))) { __kmp_debug_assert("(queue->tq.tq_parent == __null) && (queue->tq_next_child == __null)" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 1710); }; | |||||
1711 | ||||||
1712 | /* children must all be gone by now because of barrier above */ | |||||
1713 | KMP_DEBUG_ASSERT(queue->tq_first_child == NULL)if (!(queue->tq_first_child == __null)) { __kmp_debug_assert ("queue->tq_first_child == __null", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 1713); }; | |||||
1714 | ||||||
1715 | for (i = 0; i < nproc; i++) { | |||||
1716 | KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0)if (!(queue->tq_th_thunks[i].ai_data == 0)) { __kmp_debug_assert ("queue->tq_th_thunks[i].ai_data == 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 1716); }; | |||||
1717 | } | |||||
1718 | ||||||
1719 | for (i = 0, thunk = queue->tq_free_thunks; thunk != NULL__null; | |||||
1720 | i++, thunk = thunk->th.th_next_free) | |||||
1721 | ; | |||||
1722 | ||||||
1723 | KMP_DEBUG_ASSERT(i ==if (!(i == queue->tq_nslots + (nproc * 1))) { __kmp_debug_assert ("i == queue->tq_nslots + (nproc * 1)", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 1724); } | |||||
1724 | queue->tq_nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH))if (!(i == queue->tq_nslots + (nproc * 1))) { __kmp_debug_assert ("i == queue->tq_nslots + (nproc * 1)", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 1724); }; | |||||
1725 | ||||||
1726 | for (i = 0; i < nproc; i++) { | |||||
1727 | KMP_DEBUG_ASSERT(!tq->tq_curr_thunk[i])if (!(!tq->tq_curr_thunk[i])) { __kmp_debug_assert("!tq->tq_curr_thunk[i]" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 1727); }; | |||||
1728 | } | |||||
1729 | #endif | |||||
1730 | /* unlink the root queue entry */ | |||||
1731 | tq->tq_root = NULL__null; | |||||
1732 | ||||||
1733 | /* release storage for root queue entry */ | |||||
1734 | KF_TRACE(50, ("After Deletion of top-level TaskQ at %p on (%d):\n", queue,if (kmp_f_debug >= 50) { __kmp_debug_printf ("After Deletion of top-level TaskQ at %p on (%d):\n" , queue, global_tid); } | |||||
1735 | global_tid))if (kmp_f_debug >= 50) { __kmp_debug_printf ("After Deletion of top-level TaskQ at %p on (%d):\n" , queue, global_tid); }; | |||||
1736 | ||||||
1737 | queue->tq_flags |= TQF_DEALLOCATED0x2000; | |||||
1738 | __kmp_free_taskq(tq, queue, in_parallel, global_tid); | |||||
1739 | ||||||
1740 | KF_DUMP(50, __kmp_dump_task_queue_tree(tq, tq->tq_root, global_tid))if (kmp_f_debug >= 50) { int ks; __kmp_disable(&ks); ( __kmp_dump_task_queue_tree(tq, tq->tq_root, global_tid)); __kmp_enable (ks); }; | |||||
1741 | ||||||
1742 | /* release the workers now that the data structures are up to date */ | |||||
1743 | __kmp_end_split_barrier(bs_plain_barrier, global_tid); | |||||
1744 | } | |||||
1745 | ||||||
1746 | th = __kmp_threads[global_tid]; | |||||
1747 | ||||||
1748 | /* Reset ORDERED SECTION to parallel version */ | |||||
1749 | th->th.th_dispatch->th_deo_fcn = 0; | |||||
1750 | ||||||
1751 | /* Reset ORDERED SECTION to parallel version */ | |||||
1752 | th->th.th_dispatch->th_dxo_fcn = 0; | |||||
1753 | } else { | |||||
1754 | /* in serial execution context, dequeue the last task */ | |||||
1755 | /* and execute it, if there were any tasks encountered */ | |||||
1756 | ||||||
1757 | if (queue->tq_nfull > 0) { | |||||
1758 | KMP_DEBUG_ASSERT(queue->tq_nfull == 1)if (!(queue->tq_nfull == 1)) { __kmp_debug_assert("queue->tq_nfull == 1" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 1758); }; | |||||
1759 | ||||||
1760 | thunk = __kmp_dequeue_task(global_tid, queue, in_parallel); | |||||
1761 | ||||||
1762 | if (queue->tq_flags & TQF_IS_LAST_TASK0x0100) { | |||||
1763 | /* TQF_IS_LASTPRIVATE, one thing in queue, __kmpc_end_taskq_task() */ | |||||
1764 | /* has been run so this is last task, run with TQF_IS_LAST_TASK so */ | |||||
1765 | /* instrumentation does copy-out. */ | |||||
1766 | ||||||
1767 | /* no need for test_then_or call since already locked */ | |||||
1768 | thunk->th_flags |= TQF_IS_LAST_TASK0x0100; | |||||
1769 | } | |||||
1770 | ||||||
1771 | KF_TRACE(50, ("T#%d found thunk: %p in serial queue: %p\n", global_tid,if (kmp_f_debug >= 50) { __kmp_debug_printf ("T#%d found thunk: %p in serial queue: %p\n" , global_tid, thunk, queue); } | |||||
1772 | thunk, queue))if (kmp_f_debug >= 50) { __kmp_debug_printf ("T#%d found thunk: %p in serial queue: %p\n" , global_tid, thunk, queue); }; | |||||
1773 | ||||||
1774 | __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel); | |||||
1775 | } | |||||
1776 | ||||||
1777 | // destroy the unattached serial queue now that there is no more work to do | |||||
1778 | KF_TRACE(100, ("Before Deletion of Serialized TaskQ at %p on (%d):\n",if (kmp_f_debug >= 100) { __kmp_debug_printf ("Before Deletion of Serialized TaskQ at %p on (%d):\n" , queue, global_tid); } | |||||
1779 | queue, global_tid))if (kmp_f_debug >= 100) { __kmp_debug_printf ("Before Deletion of Serialized TaskQ at %p on (%d):\n" , queue, global_tid); }; | |||||
1780 | KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid))if (kmp_f_debug >= 100) { int ks; __kmp_disable(&ks); ( __kmp_dump_task_queue(tq, queue, global_tid)); __kmp_enable(ks ); }; | |||||
1781 | ||||||
1782 | #ifdef KMP_DEBUG1 | |||||
1783 | i = 0; | |||||
1784 | for (thunk = queue->tq_free_thunks; thunk != NULL__null; | |||||
1785 | thunk = thunk->th.th_next_free) | |||||
1786 | ++i; | |||||
1787 | KMP_DEBUG_ASSERT(i == queue->tq_nslots + 1)if (!(i == queue->tq_nslots + 1)) { __kmp_debug_assert("i == queue->tq_nslots + 1" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 1787); }; | |||||
1788 | #endif | |||||
1789 | /* release storage for unattached serial queue */ | |||||
1790 | KF_TRACE(50,if (kmp_f_debug >= 50) { __kmp_debug_printf ("Serialized TaskQ at %p deleted on (%d).\n" , queue, global_tid); } | |||||
1791 | ("Serialized TaskQ at %p deleted on (%d).\n", queue, global_tid))if (kmp_f_debug >= 50) { __kmp_debug_printf ("Serialized TaskQ at %p deleted on (%d).\n" , queue, global_tid); }; | |||||
1792 | ||||||
1793 | queue->tq_flags |= TQF_DEALLOCATED0x2000; | |||||
1794 | __kmp_free_taskq(tq, queue, in_parallel, global_tid); | |||||
1795 | } | |||||
1796 | ||||||
1797 | KE_TRACE(10, ("__kmpc_end_taskq return (%d)\n", global_tid))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_end_taskq return (%d)\n" , global_tid); }; | |||||
1798 | } | |||||
1799 | ||||||
1800 | /* Enqueues a task for thunk previously created by __kmpc_task_buffer. */ | |||||
1801 | /* Returns nonzero if just filled up queue */ | |||||
1802 | ||||||
1803 | kmp_int32 __kmpc_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk) { | |||||
1804 | kmp_int32 ret; | |||||
1805 | kmpc_task_queue_t *queue; | |||||
1806 | int in_parallel; | |||||
1807 | kmp_taskq_t *tq; | |||||
1808 | ||||||
1809 | KE_TRACE(10, ("__kmpc_task called (%d)\n", global_tid))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_task called (%d)\n" , global_tid); }; | |||||
1810 | ||||||
1811 | KMP_DEBUG_ASSERT(!(thunk->th_flags &if (!(!(thunk->th_flags & 0x0200))) { __kmp_debug_assert ("!(thunk->th_flags & 0x0200)", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 1812); } | |||||
1812 | TQF_TASKQ_TASK))if (!(!(thunk->th_flags & 0x0200))) { __kmp_debug_assert ("!(thunk->th_flags & 0x0200)", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 1812); }; /* thunk->th_task is a regular task */ | |||||
1813 | ||||||
1814 | tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq; | |||||
1815 | queue = thunk->th.th_shareds->sv_queue; | |||||
1816 | in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT0x1000); | |||||
1817 | ||||||
1818 | if (in_parallel && (thunk->th_flags & TQF_IS_ORDERED0x0001)) | |||||
1819 | thunk->th_tasknum = ++queue->tq_tasknum_queuing; | |||||
1820 | ||||||
1821 | /* For serial execution dequeue the preceding task and execute it, if one | |||||
1822 | * exists */ | |||||
1823 | /* This cannot be the last task. That one is handled in __kmpc_end_taskq */ | |||||
1824 | ||||||
1825 | if (!in_parallel && queue->tq_nfull > 0) { | |||||
1826 | kmpc_thunk_t *prev_thunk; | |||||
1827 | ||||||
1828 | KMP_DEBUG_ASSERT(queue->tq_nfull == 1)if (!(queue->tq_nfull == 1)) { __kmp_debug_assert("queue->tq_nfull == 1" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 1828); }; | |||||
1829 | ||||||
1830 | prev_thunk = __kmp_dequeue_task(global_tid, queue, in_parallel); | |||||
1831 | ||||||
1832 | KF_TRACE(50, ("T#%d found thunk: %p in serial queue: %p\n", global_tid,if (kmp_f_debug >= 50) { __kmp_debug_printf ("T#%d found thunk: %p in serial queue: %p\n" , global_tid, prev_thunk, queue); } | |||||
1833 | prev_thunk, queue))if (kmp_f_debug >= 50) { __kmp_debug_printf ("T#%d found thunk: %p in serial queue: %p\n" , global_tid, prev_thunk, queue); }; | |||||
1834 | ||||||
1835 | __kmp_execute_task_from_queue(tq, loc, global_tid, prev_thunk, in_parallel); | |||||
1836 | } | |||||
1837 | ||||||
1838 | /* The instrumentation sequence is: __kmpc_task_buffer(), initialize private | |||||
1839 | variables, __kmpc_task(). The __kmpc_task_buffer routine checks that the | |||||
1840 | task queue is not full and allocates a thunk (which is then passed to | |||||
1841 | __kmpc_task()). So, the enqueue below should never fail due to a full | |||||
1842 | queue. */ | |||||
1843 | ||||||
1844 | KF_TRACE(100, ("After enqueueing this Task on (%d):\n", global_tid))if (kmp_f_debug >= 100) { __kmp_debug_printf ("After enqueueing this Task on (%d):\n" , global_tid); }; | |||||
1845 | KF_DUMP(100, __kmp_dump_thunk(tq, thunk, global_tid))if (kmp_f_debug >= 100) { int ks; __kmp_disable(&ks); ( __kmp_dump_thunk(tq, thunk, global_tid)); __kmp_enable(ks); }; | |||||
1846 | ||||||
1847 | ret = __kmp_enqueue_task(tq, global_tid, queue, thunk, in_parallel); | |||||
1848 | ||||||
1849 | KF_TRACE(100, ("Task Queue looks like this on (%d):\n", global_tid))if (kmp_f_debug >= 100) { __kmp_debug_printf ("Task Queue looks like this on (%d):\n" , global_tid); }; | |||||
1850 | KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid))if (kmp_f_debug >= 100) { int ks; __kmp_disable(&ks); ( __kmp_dump_task_queue(tq, queue, global_tid)); __kmp_enable(ks ); }; | |||||
1851 | ||||||
1852 | KE_TRACE(10, ("__kmpc_task return (%d)\n", global_tid))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_task return (%d)\n" , global_tid); }; | |||||
1853 | ||||||
1854 | return ret; | |||||
1855 | } | |||||
1856 | ||||||
1857 | /* enqueues a taskq_task for thunk previously created by __kmpc_taskq */ | |||||
1858 | /* this should never be called unless in a parallel context */ | |||||
1859 | ||||||
1860 | void __kmpc_taskq_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, | |||||
1861 | kmp_int32 status) { | |||||
1862 | kmpc_task_queue_t *queue; | |||||
1863 | kmp_taskq_t *tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq; | |||||
1864 | int tid = __kmp_tid_from_gtid(global_tid); | |||||
1865 | ||||||
1866 | KE_TRACE(10, ("__kmpc_taskq_task called (%d)\n", global_tid))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_taskq_task called (%d)\n" , global_tid); }; | |||||
1867 | KF_TRACE(100, ("TaskQ Task argument thunk on (%d):\n", global_tid))if (kmp_f_debug >= 100) { __kmp_debug_printf ("TaskQ Task argument thunk on (%d):\n" , global_tid); }; | |||||
1868 | KF_DUMP(100, __kmp_dump_thunk(tq, thunk, global_tid))if (kmp_f_debug >= 100) { int ks; __kmp_disable(&ks); ( __kmp_dump_thunk(tq, thunk, global_tid)); __kmp_enable(ks); }; | |||||
1869 | ||||||
1870 | queue = thunk->th.th_shareds->sv_queue; | |||||
1871 | ||||||
1872 | if (__kmp_env_consistency_check) | |||||
1873 | __kmp_pop_workshare(global_tid, ct_taskq, loc); | |||||
1874 | ||||||
1875 | /* thunk->th_task is the taskq_task */ | |||||
1876 | KMP_DEBUG_ASSERT(thunk->th_flags & TQF_TASKQ_TASK)if (!(thunk->th_flags & 0x0200)) { __kmp_debug_assert( "thunk->th_flags & 0x0200", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 1876); }; | |||||
1877 | ||||||
1878 | /* not supposed to call __kmpc_taskq_task if it's already enqueued */ | |||||
1879 | KMP_DEBUG_ASSERT(queue->tq_taskq_slot == NULL)if (!(queue->tq_taskq_slot == __null)) { __kmp_debug_assert ("queue->tq_taskq_slot == __null", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 1879); }; | |||||
1880 | ||||||
1881 | /* dequeue taskq thunk from curr_thunk stack */ | |||||
1882 | tq->tq_curr_thunk[tid] = thunk->th_encl_thunk; | |||||
1883 | thunk->th_encl_thunk = NULL__null; | |||||
1884 | ||||||
1885 | KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid))if (kmp_f_debug >= 200) { int ks; __kmp_disable(&ks); ( __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid) ); __kmp_enable(ks); }; | |||||
1886 | ||||||
1887 | thunk->th_status = status; | |||||
1888 | ||||||
1889 | // Flush thunk->th_status before taskq_task enqueued to avoid race condition | |||||
1890 | KMP_MB(); | |||||
1891 | ||||||
1892 | /* enqueue taskq_task in thunk into special slot in queue */ | |||||
1893 | /* GEH - probably don't need to lock taskq slot since only one */ | |||||
1894 | /* thread enqueues & already a lock set at dequeue point */ | |||||
1895 | ||||||
1896 | queue->tq_taskq_slot = thunk; | |||||
1897 | ||||||
1898 | KE_TRACE(10, ("__kmpc_taskq_task return (%d)\n", global_tid))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_taskq_task return (%d)\n" , global_tid); }; | |||||
1899 | } | |||||
1900 | ||||||
1901 | /* ends a taskq_task; done generating tasks */ | |||||
1902 | ||||||
1903 | void __kmpc_end_taskq_task(ident_t *loc, kmp_int32 global_tid, | |||||
1904 | kmpc_thunk_t *thunk) { | |||||
1905 | kmp_taskq_t *tq; | |||||
1906 | kmpc_task_queue_t *queue; | |||||
1907 | int in_parallel; | |||||
1908 | int tid; | |||||
1909 | ||||||
1910 | KE_TRACE(10, ("__kmpc_end_taskq_task called (%d)\n", global_tid))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_end_taskq_task called (%d)\n" , global_tid); }; | |||||
1911 | ||||||
1912 | tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq; | |||||
1913 | queue = thunk->th.th_shareds->sv_queue; | |||||
1914 | in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT0x1000); | |||||
1915 | tid = __kmp_tid_from_gtid(global_tid); | |||||
1916 | ||||||
1917 | if (__kmp_env_consistency_check) | |||||
1918 | __kmp_pop_workshare(global_tid, ct_taskq, loc); | |||||
1919 | ||||||
1920 | if (in_parallel) { | |||||
1921 | #if KMP_ARCH_X860 || KMP_ARCH_X86_641 | |||||
1922 | KMP_TEST_THEN_OR32(RCAST(volatile kmp_uint32 *, &queue->tq_flags),__sync_fetch_and_or((volatile kmp_uint32 *)(reinterpret_cast< volatile kmp_uint32 *>(&queue->tq_flags)), (kmp_uint32 )(0x0800)) | |||||
1923 | TQF_ALL_TASKS_QUEUED)__sync_fetch_and_or((volatile kmp_uint32 *)(reinterpret_cast< volatile kmp_uint32 *>(&queue->tq_flags)), (kmp_uint32 )(0x0800)); | |||||
1924 | #else | |||||
1925 | { | |||||
1926 | __kmp_acquire_lock(&queue->tq_queue_lck, global_tid); | |||||
1927 | ||||||
1928 | // Make sure data structures are in consistent state before querying them | |||||
1929 | // Seems to work without this for digital/alpha, needed for IBM/RS6000 | |||||
1930 | KMP_MB(); | |||||
1931 | ||||||
1932 | queue->tq_flags |= TQF_ALL_TASKS_QUEUED0x0800; | |||||
1933 | __kmp_release_lock(&queue->tq_queue_lck, global_tid); | |||||
1934 | } | |||||
1935 | #endif | |||||
1936 | } | |||||
1937 | ||||||
1938 | if (thunk->th_flags & TQF_IS_LASTPRIVATE0x0002) { | |||||
1939 | /* Normally, __kmp_find_task_in_queue() refuses to schedule the last task in | |||||
1940 | the queue if TQF_IS_LASTPRIVATE so we can positively identify that last | |||||
1941 | task and run it with its TQF_IS_LAST_TASK bit turned on in th_flags. | |||||
1942 | When __kmpc_end_taskq_task() is called we are done generating all the | |||||
1943 | tasks, so we know the last one in the queue is the lastprivate task. | |||||
1944 | Mark the queue as having gotten to this state via tq_flags & | |||||
1945 | TQF_IS_LAST_TASK; when that task actually executes mark it via th_flags & | |||||
1946 | TQF_IS_LAST_TASK (this th_flags bit signals the instrumented code to do | |||||
1947 | copy-outs after execution). */ | |||||
1948 | if (!in_parallel) { | |||||
1949 | /* No synchronization needed for serial context */ | |||||
1950 | queue->tq_flags |= TQF_IS_LAST_TASK0x0100; | |||||
1951 | } else { | |||||
1952 | #if KMP_ARCH_X860 || KMP_ARCH_X86_641 | |||||
1953 | KMP_TEST_THEN_OR32(RCAST(volatile kmp_uint32 *, &queue->tq_flags),__sync_fetch_and_or((volatile kmp_uint32 *)(reinterpret_cast< volatile kmp_uint32 *>(&queue->tq_flags)), (kmp_uint32 )(0x0100)) | |||||
1954 | TQF_IS_LAST_TASK)__sync_fetch_and_or((volatile kmp_uint32 *)(reinterpret_cast< volatile kmp_uint32 *>(&queue->tq_flags)), (kmp_uint32 )(0x0100)); | |||||
1955 | #else | |||||
1956 | { | |||||
1957 | __kmp_acquire_lock(&queue->tq_queue_lck, global_tid); | |||||
1958 | ||||||
1959 | // Make sure data structures in consistent state before querying them | |||||
1960 | // Seems to work without this for digital/alpha, needed for IBM/RS6000 | |||||
1961 | KMP_MB(); | |||||
1962 | ||||||
1963 | queue->tq_flags |= TQF_IS_LAST_TASK0x0100; | |||||
1964 | __kmp_release_lock(&queue->tq_queue_lck, global_tid); | |||||
1965 | } | |||||
1966 | #endif | |||||
1967 | /* to prevent race condition where last task is dequeued but */ | |||||
1968 | /* flag isn't visible yet (not sure about this) */ | |||||
1969 | KMP_MB(); | |||||
1970 | } | |||||
1971 | } | |||||
1972 | ||||||
1973 | /* dequeue taskq thunk from curr_thunk stack */ | |||||
1974 | if (in_parallel) { | |||||
1975 | tq->tq_curr_thunk[tid] = thunk->th_encl_thunk; | |||||
1976 | thunk->th_encl_thunk = NULL__null; | |||||
1977 | ||||||
1978 | KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid))if (kmp_f_debug >= 200) { int ks; __kmp_disable(&ks); ( __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid) ); __kmp_enable(ks); }; | |||||
1979 | } | |||||
1980 | ||||||
1981 | KE_TRACE(10, ("__kmpc_end_taskq_task return (%d)\n", global_tid))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_end_taskq_task return (%d)\n" , global_tid); }; | |||||
1982 | } | |||||
1983 | ||||||
1984 | /* returns thunk for a regular task based on taskq_thunk */ | |||||
1985 | /* (__kmpc_taskq_task does the analogous thing for a TQF_TASKQ_TASK) */ | |||||
1986 | ||||||
1987 | kmpc_thunk_t *__kmpc_task_buffer(ident_t *loc, kmp_int32 global_tid, | |||||
1988 | kmpc_thunk_t *taskq_thunk, kmpc_task_t task) { | |||||
1989 | kmp_taskq_t *tq; | |||||
1990 | kmpc_task_queue_t *queue; | |||||
1991 | kmpc_thunk_t *new_thunk; | |||||
1992 | int in_parallel; | |||||
1993 | ||||||
1994 | KE_TRACE(10, ("__kmpc_task_buffer called (%d)\n", global_tid))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_task_buffer called (%d)\n" , global_tid); }; | |||||
1995 | ||||||
1996 | KMP_DEBUG_ASSERT(if (!(taskq_thunk->th_flags & 0x0200)) { __kmp_debug_assert ("taskq_thunk->th_flags & 0x0200", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 1998); } | |||||
1997 | taskq_thunk->th_flags &if (!(taskq_thunk->th_flags & 0x0200)) { __kmp_debug_assert ("taskq_thunk->th_flags & 0x0200", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 1998); } | |||||
1998 | TQF_TASKQ_TASK)if (!(taskq_thunk->th_flags & 0x0200)) { __kmp_debug_assert ("taskq_thunk->th_flags & 0x0200", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 1998); }; /* taskq_thunk->th_task is the taskq_task */ | |||||
1999 | ||||||
2000 | tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq; | |||||
2001 | queue = taskq_thunk->th.th_shareds->sv_queue; | |||||
2002 | in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT0x1000); | |||||
2003 | ||||||
2004 | /* The instrumentation sequence is: __kmpc_task_buffer(), initialize private | |||||
2005 | variables, __kmpc_task(). The __kmpc_task_buffer routine checks that the | |||||
2006 | task queue is not full and allocates a thunk (which is then passed to | |||||
2007 | __kmpc_task()). So, we can pre-allocate a thunk here assuming it will be | |||||
2008 | the next to be enqueued in __kmpc_task(). */ | |||||
2009 | ||||||
2010 | new_thunk = __kmp_alloc_thunk(queue, in_parallel, global_tid); | |||||
2011 | new_thunk->th.th_shareds = | |||||
2012 | CCAST(kmpc_shared_vars_t *, queue->tq_shareds[0].ai_data)const_cast<kmpc_shared_vars_t *>(queue->tq_shareds[0 ].ai_data); | |||||
2013 | new_thunk->th_encl_thunk = NULL__null; | |||||
2014 | new_thunk->th_task = task; | |||||
2015 | ||||||
2016 | /* GEH - shouldn't need to lock the read of tq_flags here */ | |||||
2017 | new_thunk->th_flags = queue->tq_flags & TQF_INTERFACE_FLAGS0x00ff; | |||||
2018 | ||||||
2019 | new_thunk->th_status = 0; | |||||
2020 | ||||||
2021 | KMP_DEBUG_ASSERT(!(new_thunk->th_flags & TQF_TASKQ_TASK))if (!(!(new_thunk->th_flags & 0x0200))) { __kmp_debug_assert ("!(new_thunk->th_flags & 0x0200)", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_taskq.cpp" , 2021); }; | |||||
2022 | ||||||
2023 | KF_TRACE(100, ("Creating Regular Task on (%d):\n", global_tid))if (kmp_f_debug >= 100) { __kmp_debug_printf ("Creating Regular Task on (%d):\n" , global_tid); }; | |||||
2024 | KF_DUMP(100, __kmp_dump_thunk(tq, new_thunk, global_tid))if (kmp_f_debug >= 100) { int ks; __kmp_disable(&ks); ( __kmp_dump_thunk(tq, new_thunk, global_tid)); __kmp_enable(ks ); }; | |||||
2025 | ||||||
2026 | KE_TRACE(10, ("__kmpc_task_buffer return (%d)\n", global_tid))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_task_buffer return (%d)\n" , global_tid); }; | |||||
2027 | ||||||
2028 | return new_thunk; | |||||
2029 | } |