File: | build/source/openmp/runtime/src/kmp_taskdeps.cpp |
Warning: | line 66, column 38 Division by zero |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* | |||
2 | * kmp_taskdeps.cpp | |||
3 | */ | |||
4 | ||||
5 | //===----------------------------------------------------------------------===// | |||
6 | // | |||
7 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
8 | // See https://llvm.org/LICENSE.txt for license information. | |||
9 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
10 | // | |||
11 | //===----------------------------------------------------------------------===// | |||
12 | ||||
13 | //#define KMP_SUPPORT_GRAPH_OUTPUT 1 | |||
14 | ||||
15 | #include "kmp.h" | |||
16 | #include "kmp_io.h" | |||
17 | #include "kmp_wait_release.h" | |||
18 | #include "kmp_taskdeps.h" | |||
19 | #if OMPT_SUPPORT1 | |||
20 | #include "ompt-specific.h" | |||
21 | #endif | |||
22 | ||||
23 | // TODO: Improve memory allocation? keep a list of pre-allocated structures? | |||
24 | // allocate in blocks? re-use list finished list entries? | |||
25 | // TODO: don't use atomic ref counters for stack-allocated nodes. | |||
26 | // TODO: find an alternate to atomic refs for heap-allocated nodes? | |||
27 | // TODO: Finish graph output support | |||
28 | // TODO: kmp_lock_t seems a tad to big (and heavy weight) for this. Check other | |||
29 | // runtime locks | |||
30 | // TODO: Any ITT support needed? | |||
31 | ||||
32 | #ifdef KMP_SUPPORT_GRAPH_OUTPUT | |||
33 | static std::atomic<kmp_int32> kmp_node_id_seed = ATOMIC_VAR_INIT(0){ 0 }; | |||
34 | #endif | |||
35 | ||||
36 | static void __kmp_init_node(kmp_depnode_t *node) { | |||
37 | node->dn.successors = NULL__null; | |||
38 | node->dn.task = NULL__null; // will point to the right task | |||
39 | // once dependences have been processed | |||
40 | for (int i = 0; i < MAX_MTX_DEPS4; ++i) | |||
41 | node->dn.mtx_locks[i] = NULL__null; | |||
42 | node->dn.mtx_num_locks = 0; | |||
43 | __kmp_init_lock(&node->dn.lock); | |||
44 | KMP_ATOMIC_ST_RLX(&node->dn.nrefs, 1)(&node->dn.nrefs)->store(1, std::memory_order_relaxed ); // init creates the first reference | |||
45 | #ifdef KMP_SUPPORT_GRAPH_OUTPUT | |||
46 | node->dn.id = KMP_ATOMIC_INC(&kmp_node_id_seed)(&kmp_node_id_seed)->fetch_add(1, std::memory_order_acq_rel ); | |||
47 | #endif | |||
48 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | |||
49 | __itt_sync_create(!__kmp_itt_sync_create_ptr__3_0) ? (void)0 : __kmp_itt_sync_create_ptr__3_0(node, "OMP task dep node", NULL__null, 0); | |||
50 | #endif | |||
51 | } | |||
52 | ||||
53 | static inline kmp_depnode_t *__kmp_node_ref(kmp_depnode_t *node) { | |||
54 | KMP_ATOMIC_INC(&node->dn.nrefs)(&node->dn.nrefs)->fetch_add(1, std::memory_order_acq_rel ); | |||
55 | return node; | |||
56 | } | |||
57 | ||||
58 | enum { KMP_DEPHASH_OTHER_SIZE = 97, KMP_DEPHASH_MASTER_SIZE = 997 }; | |||
59 | ||||
60 | size_t sizes[] = {997, 2003, 4001, 8191, 16001, 32003, 64007, 131071, 270029}; | |||
61 | const size_t MAX_GEN = 8; | |||
62 | ||||
63 | static inline size_t __kmp_dephash_hash(kmp_intptr_t addr, size_t hsize) { | |||
64 | // TODO alternate to try: set = (((Addr64)(addrUsefulBits * 9.618)) % | |||
65 | // m_num_sets ); | |||
66 | return ((addr >> 6) ^ (addr >> 2)) % hsize; | |||
| ||||
67 | } | |||
68 | ||||
69 | static kmp_dephash_t *__kmp_dephash_extend(kmp_info_t *thread, | |||
70 | kmp_dephash_t *current_dephash) { | |||
71 | kmp_dephash_t *h; | |||
72 | ||||
73 | size_t gen = current_dephash->generation + 1; | |||
74 | if (gen >= MAX_GEN) | |||
75 | return current_dephash; | |||
76 | size_t new_size = sizes[gen]; | |||
77 | ||||
78 | size_t size_to_allocate = | |||
79 | new_size * sizeof(kmp_dephash_entry_t *) + sizeof(kmp_dephash_t); | |||
80 | ||||
81 | #if USE_FAST_MEMORY3 | |||
82 | h = (kmp_dephash_t *)__kmp_fast_allocate(thread, size_to_allocate)___kmp_fast_allocate((thread), (size_to_allocate), "openmp/runtime/src/kmp_taskdeps.cpp" , 82); | |||
83 | #else | |||
84 | h = (kmp_dephash_t *)__kmp_thread_malloc(thread, size_to_allocate)___kmp_thread_malloc((thread), (size_to_allocate), "openmp/runtime/src/kmp_taskdeps.cpp" , 84); | |||
85 | #endif | |||
86 | ||||
87 | h->size = new_size; | |||
88 | h->nelements = current_dephash->nelements; | |||
89 | h->buckets = (kmp_dephash_entry **)(h + 1); | |||
90 | h->generation = gen; | |||
91 | h->nconflicts = 0; | |||
92 | h->last_all = current_dephash->last_all; | |||
93 | ||||
94 | // make sure buckets are properly initialized | |||
95 | for (size_t i = 0; i < new_size; i++) { | |||
96 | h->buckets[i] = NULL__null; | |||
97 | } | |||
98 | ||||
99 | // insert existing elements in the new table | |||
100 | for (size_t i = 0; i
| |||
101 | kmp_dephash_entry_t *next, *entry; | |||
102 | for (entry = current_dephash->buckets[i]; entry; entry = next) { | |||
103 | next = entry->next_in_bucket; | |||
104 | // Compute the new hash using the new size, and insert the entry in | |||
105 | // the new bucket. | |||
106 | size_t new_bucket = __kmp_dephash_hash(entry->addr, h->size); | |||
107 | entry->next_in_bucket = h->buckets[new_bucket]; | |||
108 | if (entry->next_in_bucket) { | |||
109 | h->nconflicts++; | |||
110 | } | |||
111 | h->buckets[new_bucket] = entry; | |||
112 | } | |||
113 | } | |||
114 | ||||
115 | // Free old hash table | |||
116 | #if USE_FAST_MEMORY3 | |||
117 | __kmp_fast_free(thread, current_dephash)___kmp_fast_free((thread), (current_dephash), "openmp/runtime/src/kmp_taskdeps.cpp" , 117); | |||
118 | #else | |||
119 | __kmp_thread_free(thread, current_dephash)___kmp_thread_free((thread), (current_dephash), "openmp/runtime/src/kmp_taskdeps.cpp" , 119); | |||
120 | #endif | |||
121 | ||||
122 | return h; | |||
123 | } | |||
124 | ||||
125 | static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread, | |||
126 | kmp_taskdata_t *current_task) { | |||
127 | kmp_dephash_t *h; | |||
128 | ||||
129 | size_t h_size; | |||
130 | ||||
131 | if (current_task->td_flags.tasktype == TASK_IMPLICIT0) | |||
132 | h_size = KMP_DEPHASH_MASTER_SIZE; | |||
133 | else | |||
134 | h_size = KMP_DEPHASH_OTHER_SIZE; | |||
135 | ||||
136 | size_t size = h_size * sizeof(kmp_dephash_entry_t *) + sizeof(kmp_dephash_t); | |||
137 | ||||
138 | #if USE_FAST_MEMORY3 | |||
139 | h = (kmp_dephash_t *)__kmp_fast_allocate(thread, size)___kmp_fast_allocate((thread), (size), "openmp/runtime/src/kmp_taskdeps.cpp" , 139); | |||
140 | #else | |||
141 | h = (kmp_dephash_t *)__kmp_thread_malloc(thread, size)___kmp_thread_malloc((thread), (size), "openmp/runtime/src/kmp_taskdeps.cpp" , 141); | |||
142 | #endif | |||
143 | h->size = h_size; | |||
144 | ||||
145 | h->generation = 0; | |||
146 | h->nelements = 0; | |||
147 | h->nconflicts = 0; | |||
148 | h->buckets = (kmp_dephash_entry **)(h + 1); | |||
149 | h->last_all = NULL__null; | |||
150 | ||||
151 | for (size_t i = 0; i < h_size; i++) | |||
152 | h->buckets[i] = 0; | |||
153 | ||||
154 | return h; | |||
155 | } | |||
156 | ||||
157 | static kmp_dephash_entry *__kmp_dephash_find(kmp_info_t *thread, | |||
158 | kmp_dephash_t **hash, | |||
159 | kmp_intptr_t addr) { | |||
160 | kmp_dephash_t *h = *hash; | |||
161 | if (h->nelements != 0 && h->nconflicts / h->size >= 1) { | |||
162 | *hash = __kmp_dephash_extend(thread, h); | |||
163 | h = *hash; | |||
164 | } | |||
165 | size_t bucket = __kmp_dephash_hash(addr, h->size); | |||
166 | ||||
167 | kmp_dephash_entry_t *entry; | |||
168 | for (entry = h->buckets[bucket]; entry; entry = entry->next_in_bucket) | |||
169 | if (entry->addr == addr) | |||
170 | break; | |||
171 | ||||
172 | if (entry == NULL__null) { | |||
173 | // create entry. This is only done by one thread so no locking required | |||
174 | #if USE_FAST_MEMORY3 | |||
175 | entry = (kmp_dephash_entry_t *)__kmp_fast_allocate(___kmp_fast_allocate((thread), (sizeof(kmp_dephash_entry_t)), "openmp/runtime/src/kmp_taskdeps.cpp", 176) | |||
176 | thread, sizeof(kmp_dephash_entry_t))___kmp_fast_allocate((thread), (sizeof(kmp_dephash_entry_t)), "openmp/runtime/src/kmp_taskdeps.cpp", 176); | |||
177 | #else | |||
178 | entry = (kmp_dephash_entry_t *)__kmp_thread_malloc(___kmp_thread_malloc((thread), (sizeof(kmp_dephash_entry_t)), "openmp/runtime/src/kmp_taskdeps.cpp", 179) | |||
179 | thread, sizeof(kmp_dephash_entry_t))___kmp_thread_malloc((thread), (sizeof(kmp_dephash_entry_t)), "openmp/runtime/src/kmp_taskdeps.cpp", 179); | |||
180 | #endif | |||
181 | entry->addr = addr; | |||
182 | if (!h->last_all) // no predecessor task with omp_all_memory dependence | |||
183 | entry->last_out = NULL__null; | |||
184 | else // else link the omp_all_memory depnode to the new entry | |||
185 | entry->last_out = __kmp_node_ref(h->last_all); | |||
186 | entry->last_set = NULL__null; | |||
187 | entry->prev_set = NULL__null; | |||
188 | entry->last_flag = 0; | |||
189 | entry->mtx_lock = NULL__null; | |||
190 | entry->next_in_bucket = h->buckets[bucket]; | |||
191 | h->buckets[bucket] = entry; | |||
192 | h->nelements++; | |||
193 | if (entry->next_in_bucket) | |||
194 | h->nconflicts++; | |||
195 | } | |||
196 | return entry; | |||
197 | } | |||
198 | ||||
199 | static kmp_depnode_list_t *__kmp_add_node(kmp_info_t *thread, | |||
200 | kmp_depnode_list_t *list, | |||
201 | kmp_depnode_t *node) { | |||
202 | kmp_depnode_list_t *new_head; | |||
203 | ||||
204 | #if USE_FAST_MEMORY3 | |||
205 | new_head = (kmp_depnode_list_t *)__kmp_fast_allocate(___kmp_fast_allocate((thread), (sizeof(kmp_depnode_list_t)), "openmp/runtime/src/kmp_taskdeps.cpp" , 206) | |||
206 | thread, sizeof(kmp_depnode_list_t))___kmp_fast_allocate((thread), (sizeof(kmp_depnode_list_t)), "openmp/runtime/src/kmp_taskdeps.cpp" , 206); | |||
207 | #else | |||
208 | new_head = (kmp_depnode_list_t *)__kmp_thread_malloc(___kmp_thread_malloc((thread), (sizeof(kmp_depnode_list_t)), "openmp/runtime/src/kmp_taskdeps.cpp" , 209) | |||
209 | thread, sizeof(kmp_depnode_list_t))___kmp_thread_malloc((thread), (sizeof(kmp_depnode_list_t)), "openmp/runtime/src/kmp_taskdeps.cpp" , 209); | |||
210 | #endif | |||
211 | ||||
212 | new_head->node = __kmp_node_ref(node); | |||
213 | new_head->next = list; | |||
214 | ||||
215 | return new_head; | |||
216 | } | |||
217 | ||||
218 | static inline void __kmp_track_dependence(kmp_int32 gtid, kmp_depnode_t *source, | |||
219 | kmp_depnode_t *sink, | |||
220 | kmp_task_t *sink_task) { | |||
221 | #ifdef KMP_SUPPORT_GRAPH_OUTPUT | |||
222 | kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task)(((kmp_taskdata_t *)source->dn.task) - 1); | |||
223 | // do not use sink->dn.task as that is only filled after the dependences | |||
224 | // are already processed! | |||
225 | kmp_taskdata_t *task_sink = KMP_TASK_TO_TASKDATA(sink_task)(((kmp_taskdata_t *)sink_task) - 1); | |||
226 | ||||
227 | __kmp_printf("%d(%s) -> %d(%s)\n", source->dn.id, | |||
228 | task_source->td_ident->psource, sink->dn.id, | |||
229 | task_sink->td_ident->psource); | |||
230 | #endif | |||
231 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
232 | /* OMPT tracks dependences between task (a=source, b=sink) in which | |||
233 | task a blocks the execution of b through the ompt_new_dependence_callback | |||
234 | */ | |||
235 | if (ompt_enabled.ompt_callback_task_dependence) { | |||
236 | kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task)(((kmp_taskdata_t *)source->dn.task) - 1); | |||
237 | ompt_data_t *sink_data; | |||
238 | if (sink_task) | |||
239 | sink_data = &(KMP_TASK_TO_TASKDATA(sink_task)(((kmp_taskdata_t *)sink_task) - 1)->ompt_task_info.task_data); | |||
240 | else | |||
241 | sink_data = &__kmp_threads[gtid]->th.ompt_thread_info.task_data; | |||
242 | ||||
243 | ompt_callbacks.ompt_callback(ompt_callback_task_dependence)ompt_callback_task_dependence_callback( | |||
244 | &(task_source->ompt_task_info.task_data), sink_data); | |||
245 | } | |||
246 | #endif /* OMPT_SUPPORT && OMPT_OPTIONAL */ | |||
247 | } | |||
248 | ||||
249 | static inline kmp_int32 | |||
250 | __kmp_depnode_link_successor(kmp_int32 gtid, kmp_info_t *thread, | |||
251 | kmp_task_t *task, kmp_depnode_t *node, | |||
252 | kmp_depnode_list_t *plist) { | |||
253 | if (!plist) | |||
254 | return 0; | |||
255 | kmp_int32 npredecessors = 0; | |||
256 | // link node as successor of list elements | |||
257 | for (kmp_depnode_list_t *p = plist; p; p = p->next) { | |||
258 | kmp_depnode_t *dep = p->node; | |||
259 | if (dep->dn.task) { | |||
260 | KMP_ACQUIRE_DEPNODE(gtid, dep)__kmp_acquire_lock(&(dep)->dn.lock, (gtid)); | |||
261 | if (dep->dn.task) { | |||
262 | __kmp_track_dependence(gtid, dep, node, task); | |||
263 | dep->dn.successors = __kmp_add_node(thread, dep->dn.successors, node); | |||
264 | KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "if (kmp_a_debug >= 40) { __kmp_debug_printf ("__kmp_process_deps: T#%d adding dependence from %p to " "%p\n", gtid, (((kmp_taskdata_t *)dep->dn.task) - 1), ((( kmp_taskdata_t *)task) - 1)); } | |||
265 | "%p\n",if (kmp_a_debug >= 40) { __kmp_debug_printf ("__kmp_process_deps: T#%d adding dependence from %p to " "%p\n", gtid, (((kmp_taskdata_t *)dep->dn.task) - 1), ((( kmp_taskdata_t *)task) - 1)); } | |||
266 | gtid, KMP_TASK_TO_TASKDATA(dep->dn.task),if (kmp_a_debug >= 40) { __kmp_debug_printf ("__kmp_process_deps: T#%d adding dependence from %p to " "%p\n", gtid, (((kmp_taskdata_t *)dep->dn.task) - 1), ((( kmp_taskdata_t *)task) - 1)); } | |||
267 | KMP_TASK_TO_TASKDATA(task)))if (kmp_a_debug >= 40) { __kmp_debug_printf ("__kmp_process_deps: T#%d adding dependence from %p to " "%p\n", gtid, (((kmp_taskdata_t *)dep->dn.task) - 1), ((( kmp_taskdata_t *)task) - 1)); }; | |||
268 | npredecessors++; | |||
269 | } | |||
270 | KMP_RELEASE_DEPNODE(gtid, dep)__kmp_release_lock(&(dep)->dn.lock, (gtid)); | |||
271 | } | |||
272 | } | |||
273 | return npredecessors; | |||
274 | } | |||
275 | ||||
276 | static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid, | |||
277 | kmp_info_t *thread, | |||
278 | kmp_task_t *task, | |||
279 | kmp_depnode_t *source, | |||
280 | kmp_depnode_t *sink) { | |||
281 | if (!sink) | |||
282 | return 0; | |||
283 | kmp_int32 npredecessors = 0; | |||
284 | if (sink->dn.task) { | |||
285 | // synchronously add source to sink' list of successors | |||
286 | KMP_ACQUIRE_DEPNODE(gtid, sink)__kmp_acquire_lock(&(sink)->dn.lock, (gtid)); | |||
287 | if (sink->dn.task) { | |||
288 | __kmp_track_dependence(gtid, sink, source, task); | |||
289 | sink->dn.successors = __kmp_add_node(thread, sink->dn.successors, source); | |||
290 | KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "if (kmp_a_debug >= 40) { __kmp_debug_printf ("__kmp_process_deps: T#%d adding dependence from %p to " "%p\n", gtid, (((kmp_taskdata_t *)sink->dn.task) - 1), (( (kmp_taskdata_t *)task) - 1)); } | |||
291 | "%p\n",if (kmp_a_debug >= 40) { __kmp_debug_printf ("__kmp_process_deps: T#%d adding dependence from %p to " "%p\n", gtid, (((kmp_taskdata_t *)sink->dn.task) - 1), (( (kmp_taskdata_t *)task) - 1)); } | |||
292 | gtid, KMP_TASK_TO_TASKDATA(sink->dn.task),if (kmp_a_debug >= 40) { __kmp_debug_printf ("__kmp_process_deps: T#%d adding dependence from %p to " "%p\n", gtid, (((kmp_taskdata_t *)sink->dn.task) - 1), (( (kmp_taskdata_t *)task) - 1)); } | |||
293 | KMP_TASK_TO_TASKDATA(task)))if (kmp_a_debug >= 40) { __kmp_debug_printf ("__kmp_process_deps: T#%d adding dependence from %p to " "%p\n", gtid, (((kmp_taskdata_t *)sink->dn.task) - 1), (( (kmp_taskdata_t *)task) - 1)); }; | |||
294 | npredecessors++; | |||
295 | } | |||
296 | KMP_RELEASE_DEPNODE(gtid, sink)__kmp_release_lock(&(sink)->dn.lock, (gtid)); | |||
297 | } | |||
298 | return npredecessors; | |||
299 | } | |||
300 | ||||
301 | static inline kmp_int32 | |||
302 | __kmp_process_dep_all(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *h, | |||
303 | bool dep_barrier, kmp_task_t *task) { | |||
304 | KA_TRACE(30, ("__kmp_process_dep_all: T#%d processing dep_all, "if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_process_dep_all: T#%d processing dep_all, " "dep_barrier = %d\n", gtid, dep_barrier); } | |||
305 | "dep_barrier = %d\n",if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_process_dep_all: T#%d processing dep_all, " "dep_barrier = %d\n", gtid, dep_barrier); } | |||
306 | gtid, dep_barrier))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_process_dep_all: T#%d processing dep_all, " "dep_barrier = %d\n", gtid, dep_barrier); }; | |||
307 | kmp_info_t *thread = __kmp_threads[gtid]; | |||
308 | kmp_int32 npredecessors = 0; | |||
309 | ||||
310 | // process previous omp_all_memory node if any | |||
311 | npredecessors += | |||
312 | __kmp_depnode_link_successor(gtid, thread, task, node, h->last_all); | |||
313 | __kmp_node_deref(thread, h->last_all); | |||
314 | if (!dep_barrier) { | |||
315 | h->last_all = __kmp_node_ref(node); | |||
316 | } else { | |||
317 | // if this is a sync point in the serial sequence, then the previous | |||
318 | // outputs are guaranteed to be completed after the execution of this | |||
319 | // task so the previous output nodes can be cleared. | |||
320 | h->last_all = NULL__null; | |||
321 | } | |||
322 | ||||
323 | // process all regular dependences | |||
324 | for (size_t i = 0; i < h->size; i++) { | |||
325 | kmp_dephash_entry_t *info = h->buckets[i]; | |||
326 | if (!info) // skip empty slots in dephash | |||
327 | continue; | |||
328 | for (; info; info = info->next_in_bucket) { | |||
329 | // for each entry the omp_all_memory works as OUT dependence | |||
330 | kmp_depnode_t *last_out = info->last_out; | |||
331 | kmp_depnode_list_t *last_set = info->last_set; | |||
332 | kmp_depnode_list_t *prev_set = info->prev_set; | |||
333 | if (last_set) { | |||
334 | npredecessors += | |||
335 | __kmp_depnode_link_successor(gtid, thread, task, node, last_set); | |||
336 | __kmp_depnode_list_free(thread, last_set); | |||
337 | __kmp_depnode_list_free(thread, prev_set); | |||
338 | info->last_set = NULL__null; | |||
339 | info->prev_set = NULL__null; | |||
340 | info->last_flag = 0; // no sets in this dephash entry | |||
341 | } else { | |||
342 | npredecessors += | |||
343 | __kmp_depnode_link_successor(gtid, thread, task, node, last_out); | |||
344 | } | |||
345 | __kmp_node_deref(thread, last_out); | |||
346 | if (!dep_barrier) { | |||
347 | info->last_out = __kmp_node_ref(node); | |||
348 | } else { | |||
349 | info->last_out = NULL__null; | |||
350 | } | |||
351 | } | |||
352 | } | |||
353 | KA_TRACE(30, ("__kmp_process_dep_all: T#%d found %d predecessors\n", gtid,if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_process_dep_all: T#%d found %d predecessors\n" , gtid, npredecessors); } | |||
354 | npredecessors))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_process_dep_all: T#%d found %d predecessors\n" , gtid, npredecessors); }; | |||
355 | return npredecessors; | |||
356 | } | |||
357 | ||||
358 | template <bool filter> | |||
359 | static inline kmp_int32 | |||
360 | __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash, | |||
361 | bool dep_barrier, kmp_int32 ndeps, | |||
362 | kmp_depend_info_t *dep_list, kmp_task_t *task) { | |||
363 | KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d processing %d dependences : "if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_process_deps<%d>: T#%d processing %d dependences : " "dep_barrier = %d\n", filter, gtid, ndeps, dep_barrier); } | |||
364 | "dep_barrier = %d\n",if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_process_deps<%d>: T#%d processing %d dependences : " "dep_barrier = %d\n", filter, gtid, ndeps, dep_barrier); } | |||
365 | filter, gtid, ndeps, dep_barrier))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_process_deps<%d>: T#%d processing %d dependences : " "dep_barrier = %d\n", filter, gtid, ndeps, dep_barrier); }; | |||
366 | ||||
367 | kmp_info_t *thread = __kmp_threads[gtid]; | |||
368 | kmp_int32 npredecessors = 0; | |||
369 | for (kmp_int32 i = 0; i < ndeps; i++) { | |||
370 | const kmp_depend_info_t *dep = &dep_list[i]; | |||
371 | ||||
372 | if (filter && dep->base_addr == 0) | |||
373 | continue; // skip filtered entries | |||
374 | ||||
375 | kmp_dephash_entry_t *info = | |||
376 | __kmp_dephash_find(thread, hash, dep->base_addr); | |||
377 | kmp_depnode_t *last_out = info->last_out; | |||
378 | kmp_depnode_list_t *last_set = info->last_set; | |||
379 | kmp_depnode_list_t *prev_set = info->prev_set; | |||
380 | ||||
381 | if (dep->flags.out) { // out or inout --> clean lists if any | |||
382 | if (last_set) { | |||
383 | npredecessors += | |||
384 | __kmp_depnode_link_successor(gtid, thread, task, node, last_set); | |||
385 | __kmp_depnode_list_free(thread, last_set); | |||
386 | __kmp_depnode_list_free(thread, prev_set); | |||
387 | info->last_set = NULL__null; | |||
388 | info->prev_set = NULL__null; | |||
389 | info->last_flag = 0; // no sets in this dephash entry | |||
390 | } else { | |||
391 | npredecessors += | |||
392 | __kmp_depnode_link_successor(gtid, thread, task, node, last_out); | |||
393 | } | |||
394 | __kmp_node_deref(thread, last_out); | |||
395 | if (!dep_barrier) { | |||
396 | info->last_out = __kmp_node_ref(node); | |||
397 | } else { | |||
398 | // if this is a sync point in the serial sequence, then the previous | |||
399 | // outputs are guaranteed to be completed after the execution of this | |||
400 | // task so the previous output nodes can be cleared. | |||
401 | info->last_out = NULL__null; | |||
402 | } | |||
403 | } else { // either IN or MTX or SET | |||
404 | if (info->last_flag == 0 || info->last_flag == dep->flag) { | |||
405 | // last_set either didn't exist or of same dep kind | |||
406 | // link node as successor of the last_out if any | |||
407 | npredecessors += | |||
408 | __kmp_depnode_link_successor(gtid, thread, task, node, last_out); | |||
409 | // link node as successor of all nodes in the prev_set if any | |||
410 | npredecessors += | |||
411 | __kmp_depnode_link_successor(gtid, thread, task, node, prev_set); | |||
412 | if (dep_barrier) { | |||
413 | // clean last_out and prev_set if any; don't touch last_set | |||
414 | __kmp_node_deref(thread, last_out); | |||
415 | info->last_out = NULL__null; | |||
416 | __kmp_depnode_list_free(thread, prev_set); | |||
417 | info->prev_set = NULL__null; | |||
418 | } | |||
419 | } else { // last_set is of different dep kind, make it prev_set | |||
420 | // link node as successor of all nodes in the last_set | |||
421 | npredecessors += | |||
422 | __kmp_depnode_link_successor(gtid, thread, task, node, last_set); | |||
423 | // clean last_out if any | |||
424 | __kmp_node_deref(thread, last_out); | |||
425 | info->last_out = NULL__null; | |||
426 | // clean prev_set if any | |||
427 | __kmp_depnode_list_free(thread, prev_set); | |||
428 | if (!dep_barrier) { | |||
429 | // move last_set to prev_set, new last_set will be allocated | |||
430 | info->prev_set = last_set; | |||
431 | } else { | |||
432 | info->prev_set = NULL__null; | |||
433 | info->last_flag = 0; | |||
434 | } | |||
435 | info->last_set = NULL__null; | |||
436 | } | |||
437 | // for dep_barrier last_flag value should remain: | |||
438 | // 0 if last_set is empty, unchanged otherwise | |||
439 | if (!dep_barrier) { | |||
440 | info->last_flag = dep->flag; // store dep kind of the last_set | |||
441 | info->last_set = __kmp_add_node(thread, info->last_set, node); | |||
442 | } | |||
443 | // check if we are processing MTX dependency | |||
444 | if (dep->flag == KMP_DEP_MTX0x4) { | |||
445 | if (info->mtx_lock == NULL__null) { | |||
446 | info->mtx_lock = (kmp_lock_t *)__kmp_allocate(sizeof(kmp_lock_t))___kmp_allocate((sizeof(kmp_lock_t)), "openmp/runtime/src/kmp_taskdeps.cpp" , 446); | |||
447 | __kmp_init_lock(info->mtx_lock); | |||
448 | } | |||
449 | KMP_DEBUG_ASSERT(node->dn.mtx_num_locks < MAX_MTX_DEPS)if (!(node->dn.mtx_num_locks < 4)) { __kmp_debug_assert ("node->dn.mtx_num_locks < 4", "openmp/runtime/src/kmp_taskdeps.cpp" , 449); }; | |||
450 | kmp_int32 m; | |||
451 | // Save lock in node's array | |||
452 | for (m = 0; m < MAX_MTX_DEPS4; ++m) { | |||
453 | // sort pointers in decreasing order to avoid potential livelock | |||
454 | if (node->dn.mtx_locks[m] < info->mtx_lock) { | |||
455 | KMP_DEBUG_ASSERT(!node->dn.mtx_locks[node->dn.mtx_num_locks])if (!(!node->dn.mtx_locks[node->dn.mtx_num_locks])) { __kmp_debug_assert ("!node->dn.mtx_locks[node->dn.mtx_num_locks]", "openmp/runtime/src/kmp_taskdeps.cpp" , 455); }; | |||
456 | for (int n = node->dn.mtx_num_locks; n > m; --n) { | |||
457 | // shift right all lesser non-NULL pointers | |||
458 | KMP_DEBUG_ASSERT(node->dn.mtx_locks[n - 1] != NULL)if (!(node->dn.mtx_locks[n - 1] != __null)) { __kmp_debug_assert ("node->dn.mtx_locks[n - 1] != __null", "openmp/runtime/src/kmp_taskdeps.cpp" , 458); }; | |||
459 | node->dn.mtx_locks[n] = node->dn.mtx_locks[n - 1]; | |||
460 | } | |||
461 | node->dn.mtx_locks[m] = info->mtx_lock; | |||
462 | break; | |||
463 | } | |||
464 | } | |||
465 | KMP_DEBUG_ASSERT(m < MAX_MTX_DEPS)if (!(m < 4)) { __kmp_debug_assert("m < 4", "openmp/runtime/src/kmp_taskdeps.cpp" , 465); }; // must break from loop | |||
466 | node->dn.mtx_num_locks++; | |||
467 | } | |||
468 | } | |||
469 | } | |||
470 | KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d found %d predecessors\n", filter,if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_process_deps<%d>: T#%d found %d predecessors\n" , filter, gtid, npredecessors); } | |||
471 | gtid, npredecessors))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_process_deps<%d>: T#%d found %d predecessors\n" , filter, gtid, npredecessors); }; | |||
472 | return npredecessors; | |||
473 | } | |||
474 | ||||
475 | #define NO_DEP_BARRIER(false) (false) | |||
476 | #define DEP_BARRIER(true) (true) | |||
477 | ||||
478 | // returns true if the task has any outstanding dependence | |||
479 | static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node, | |||
480 | kmp_task_t *task, kmp_dephash_t **hash, | |||
481 | bool dep_barrier, kmp_int32 ndeps, | |||
482 | kmp_depend_info_t *dep_list, | |||
483 | kmp_int32 ndeps_noalias, | |||
484 | kmp_depend_info_t *noalias_dep_list) { | |||
485 | int i, n_mtxs = 0, dep_all = 0; | |||
486 | #if KMP_DEBUG1 | |||
487 | kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task)(((kmp_taskdata_t *)task) - 1); | |||
488 | #endif | |||
489 | KA_TRACE(20, ("__kmp_check_deps: T#%d checking dependences for task %p : %d "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_check_deps: T#%d checking dependences for task %p : %d " "possibly aliased dependences, %d non-aliased dependences : " "dep_barrier=%d .\n", gtid, taskdata, ndeps, ndeps_noalias, dep_barrier ); } | |||
490 | "possibly aliased dependences, %d non-aliased dependences : "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_check_deps: T#%d checking dependences for task %p : %d " "possibly aliased dependences, %d non-aliased dependences : " "dep_barrier=%d .\n", gtid, taskdata, ndeps, ndeps_noalias, dep_barrier ); } | |||
491 | "dep_barrier=%d .\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_check_deps: T#%d checking dependences for task %p : %d " "possibly aliased dependences, %d non-aliased dependences : " "dep_barrier=%d .\n", gtid, taskdata, ndeps, ndeps_noalias, dep_barrier ); } | |||
492 | gtid, taskdata, ndeps, ndeps_noalias, dep_barrier))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_check_deps: T#%d checking dependences for task %p : %d " "possibly aliased dependences, %d non-aliased dependences : " "dep_barrier=%d .\n", gtid, taskdata, ndeps, ndeps_noalias, dep_barrier ); }; | |||
493 | ||||
494 | // Filter deps in dep_list | |||
495 | // TODO: Different algorithm for large dep_list ( > 10 ? ) | |||
496 | for (i = 0; i < ndeps; i++) { | |||
497 | if (dep_list[i].base_addr != 0 && | |||
498 | dep_list[i].base_addr != (kmp_intptr_t)KMP_SIZE_T_MAX(0xFFFFFFFFFFFFFFFF)) { | |||
499 | KMP_DEBUG_ASSERT(if (!(dep_list[i].flag == 0x1 || dep_list[i].flag == 0x2 || dep_list [i].flag == 0x3 || dep_list[i].flag == 0x4 || dep_list[i].flag == 0x8)) { __kmp_debug_assert("dep_list[i].flag == 0x1 || dep_list[i].flag == 0x2 || dep_list[i].flag == 0x3 || dep_list[i].flag == 0x4 || dep_list[i].flag == 0x8" , "openmp/runtime/src/kmp_taskdeps.cpp", 502); } | |||
500 | dep_list[i].flag == KMP_DEP_IN || dep_list[i].flag == KMP_DEP_OUT ||if (!(dep_list[i].flag == 0x1 || dep_list[i].flag == 0x2 || dep_list [i].flag == 0x3 || dep_list[i].flag == 0x4 || dep_list[i].flag == 0x8)) { __kmp_debug_assert("dep_list[i].flag == 0x1 || dep_list[i].flag == 0x2 || dep_list[i].flag == 0x3 || dep_list[i].flag == 0x4 || dep_list[i].flag == 0x8" , "openmp/runtime/src/kmp_taskdeps.cpp", 502); } | |||
501 | dep_list[i].flag == KMP_DEP_INOUT ||if (!(dep_list[i].flag == 0x1 || dep_list[i].flag == 0x2 || dep_list [i].flag == 0x3 || dep_list[i].flag == 0x4 || dep_list[i].flag == 0x8)) { __kmp_debug_assert("dep_list[i].flag == 0x1 || dep_list[i].flag == 0x2 || dep_list[i].flag == 0x3 || dep_list[i].flag == 0x4 || dep_list[i].flag == 0x8" , "openmp/runtime/src/kmp_taskdeps.cpp", 502); } | |||
502 | dep_list[i].flag == KMP_DEP_MTX || dep_list[i].flag == KMP_DEP_SET)if (!(dep_list[i].flag == 0x1 || dep_list[i].flag == 0x2 || dep_list [i].flag == 0x3 || dep_list[i].flag == 0x4 || dep_list[i].flag == 0x8)) { __kmp_debug_assert("dep_list[i].flag == 0x1 || dep_list[i].flag == 0x2 || dep_list[i].flag == 0x3 || dep_list[i].flag == 0x4 || dep_list[i].flag == 0x8" , "openmp/runtime/src/kmp_taskdeps.cpp", 502); }; | |||
503 | for (int j = i + 1; j < ndeps; j++) { | |||
504 | if (dep_list[i].base_addr == dep_list[j].base_addr) { | |||
505 | if (dep_list[i].flag != dep_list[j].flag) { | |||
506 | // two different dependences on same address work identical to OUT | |||
507 | dep_list[i].flag = KMP_DEP_OUT0x2; | |||
508 | } | |||
509 | dep_list[j].base_addr = 0; // Mark j element as void | |||
510 | } | |||
511 | } | |||
512 | if (dep_list[i].flag == KMP_DEP_MTX0x4) { | |||
513 | // limit number of mtx deps to MAX_MTX_DEPS per node | |||
514 | if (n_mtxs < MAX_MTX_DEPS4 && task != NULL__null) { | |||
515 | ++n_mtxs; | |||
516 | } else { | |||
517 | dep_list[i].flag = KMP_DEP_OUT0x2; // downgrade mutexinoutset to inout | |||
518 | } | |||
519 | } | |||
520 | } else if (dep_list[i].flag == KMP_DEP_ALL0x80 || | |||
521 | dep_list[i].base_addr == (kmp_intptr_t)KMP_SIZE_T_MAX(0xFFFFFFFFFFFFFFFF)) { | |||
522 | // omp_all_memory dependence can be marked by compiler by either | |||
523 | // (addr=0 && flag=0x80) (flag KMP_DEP_ALL), or (addr=-1). | |||
524 | // omp_all_memory overrides all other dependences if any | |||
525 | dep_all = 1; | |||
526 | break; | |||
527 | } | |||
528 | } | |||
529 | ||||
530 | // doesn't need to be atomic as no other thread is going to be accessing this | |||
531 | // node just yet. | |||
532 | // npredecessors is set -1 to ensure that none of the releasing tasks queues | |||
533 | // this task before we have finished processing all the dependences | |||
534 | node->dn.npredecessors = -1; | |||
535 | ||||
536 | // used to pack all npredecessors additions into a single atomic operation at | |||
537 | // the end | |||
538 | int npredecessors; | |||
539 | ||||
540 | if (!dep_all
| |||
541 | npredecessors = __kmp_process_deps<true>(gtid, node, hash, dep_barrier, | |||
542 | ndeps, dep_list, task); | |||
543 | npredecessors += __kmp_process_deps<false>( | |||
544 | gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list, task); | |||
545 | } else { // omp_all_memory dependence | |||
546 | npredecessors = __kmp_process_dep_all(gtid, node, *hash, dep_barrier, task); | |||
547 | } | |||
548 | ||||
549 | node->dn.task = task; | |||
550 | KMP_MB(); | |||
551 | ||||
552 | // Account for our initial fake value | |||
553 | npredecessors++; | |||
554 | ||||
555 | // Update predecessors and obtain current value to check if there are still | |||
556 | // any outstanding dependences (some tasks may have finished while we | |||
557 | // processed the dependences) | |||
558 | npredecessors = | |||
559 | node->dn.npredecessors.fetch_add(npredecessors) + npredecessors; | |||
560 | ||||
561 | KA_TRACE(20, ("__kmp_check_deps: T#%d found %d predecessors for task %p \n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_check_deps: T#%d found %d predecessors for task %p \n" , gtid, npredecessors, taskdata); } | |||
562 | gtid, npredecessors, taskdata))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_check_deps: T#%d found %d predecessors for task %p \n" , gtid, npredecessors, taskdata); }; | |||
563 | ||||
564 | // beyond this point the task could be queued (and executed) by a releasing | |||
565 | // task... | |||
566 | return npredecessors > 0 ? true : false; | |||
567 | } | |||
568 | ||||
569 | /*! | |||
570 | @ingroup TASKING | |||
571 | @param loc_ref location of the original task directive | |||
572 | @param gtid Global Thread ID of encountering thread | |||
573 | @param new_task task thunk allocated by __kmp_omp_task_alloc() for the ''new | |||
574 | task'' | |||
575 | @param ndeps Number of depend items with possible aliasing | |||
576 | @param dep_list List of depend items with possible aliasing | |||
577 | @param ndeps_noalias Number of depend items with no aliasing | |||
578 | @param noalias_dep_list List of depend items with no aliasing | |||
579 | ||||
580 | @return Returns either TASK_CURRENT_NOT_QUEUED if the current task was not | |||
581 | suspended and queued, or TASK_CURRENT_QUEUED if it was suspended and queued | |||
582 | ||||
583 | Schedule a non-thread-switchable task with dependences for execution | |||
584 | */ | |||
585 | kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, | |||
586 | kmp_task_t *new_task, kmp_int32 ndeps, | |||
587 | kmp_depend_info_t *dep_list, | |||
588 | kmp_int32 ndeps_noalias, | |||
589 | kmp_depend_info_t *noalias_dep_list) { | |||
590 | ||||
591 | kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task)(((kmp_taskdata_t *)new_task) - 1); | |||
592 | KA_TRACE(10, ("__kmpc_omp_task_with_deps(enter): T#%d loc=%p task=%p\n", gtid,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_with_deps(enter): T#%d loc=%p task=%p\n" , gtid, loc_ref, new_taskdata); } | |||
593 | loc_ref, new_taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_with_deps(enter): T#%d loc=%p task=%p\n" , gtid, loc_ref, new_taskdata); }; | |||
594 | __kmp_assert_valid_gtid(gtid); | |||
595 | kmp_info_t *thread = __kmp_threads[gtid]; | |||
596 | kmp_taskdata_t *current_task = thread->th.th_current_task; | |||
597 | ||||
598 | #if OMPT_SUPPORT1 | |||
599 | if (ompt_enabled.enabled) { | |||
600 | if (!current_task->ompt_task_info.frame.enter_frame.ptr) | |||
601 | current_task->ompt_task_info.frame.enter_frame.ptr = | |||
602 | OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0); | |||
603 | if (ompt_enabled.ompt_callback_task_create) { | |||
604 | ompt_callbacks.ompt_callback(ompt_callback_task_create)ompt_callback_task_create_callback( | |||
605 | &(current_task->ompt_task_info.task_data), | |||
606 | &(current_task->ompt_task_info.frame), | |||
607 | &(new_taskdata->ompt_task_info.task_data), | |||
608 | ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata)((new_taskdata->td_flags.task_serial || new_taskdata->td_flags .tasking_ser) ? ompt_task_undeferred : 0x0) | ((!(new_taskdata ->td_flags.tiedness)) ? ompt_task_untied : 0x0) | (new_taskdata ->td_flags.final ? ompt_task_final : 0x0) | (new_taskdata-> td_flags.merged_if0 ? ompt_task_mergeable : 0x0), 1, | |||
609 | OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid)((ompt_enabled.enabled && gtid >= 0 && __kmp_threads [gtid] && __kmp_threads[gtid]->th.ompt_thread_info .return_address) ? __ompt_load_return_address(gtid) : __builtin_return_address (0))); | |||
610 | } | |||
611 | ||||
612 | new_taskdata->ompt_task_info.frame.enter_frame.ptr = | |||
613 | OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0); | |||
614 | } | |||
615 | ||||
616 | #if OMPT_OPTIONAL1 | |||
617 | /* OMPT grab all dependences if requested by the tool */ | |||
618 | if (ndeps + ndeps_noalias > 0 && ompt_enabled.ompt_callback_dependences) { | |||
619 | kmp_int32 i; | |||
620 | ||||
621 | int ompt_ndeps = ndeps + ndeps_noalias; | |||
622 | ompt_dependence_t *ompt_deps = (ompt_dependence_t *)KMP_OMPT_DEPS_ALLOC(___kmp_thread_malloc((thread), ((ndeps + ndeps_noalias) * sizeof (ompt_dependence_t)), "openmp/runtime/src/kmp_taskdeps.cpp", 623 ) | |||
623 | thread, (ndeps + ndeps_noalias) * sizeof(ompt_dependence_t))___kmp_thread_malloc((thread), ((ndeps + ndeps_noalias) * sizeof (ompt_dependence_t)), "openmp/runtime/src/kmp_taskdeps.cpp", 623 ); | |||
624 | ||||
625 | KMP_ASSERT(ompt_deps != NULL)if (!(ompt_deps != __null)) { __kmp_debug_assert("ompt_deps != NULL" , "openmp/runtime/src/kmp_taskdeps.cpp", 625); }; | |||
626 | ||||
627 | for (i = 0; i < ndeps; i++) { | |||
628 | ompt_deps[i].variable.ptr = (void *)dep_list[i].base_addr; | |||
629 | if (dep_list[i].flags.in && dep_list[i].flags.out) | |||
630 | ompt_deps[i].dependence_type = ompt_dependence_type_inout; | |||
631 | else if (dep_list[i].flags.out) | |||
632 | ompt_deps[i].dependence_type = ompt_dependence_type_out; | |||
633 | else if (dep_list[i].flags.in) | |||
634 | ompt_deps[i].dependence_type = ompt_dependence_type_in; | |||
635 | else if (dep_list[i].flags.mtx) | |||
636 | ompt_deps[i].dependence_type = ompt_dependence_type_mutexinoutset; | |||
637 | else if (dep_list[i].flags.set) | |||
638 | ompt_deps[i].dependence_type = ompt_dependence_type_inoutset; | |||
639 | } | |||
640 | for (i = 0; i < ndeps_noalias; i++) { | |||
641 | ompt_deps[ndeps + i].variable.ptr = (void *)noalias_dep_list[i].base_addr; | |||
642 | if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out) | |||
643 | ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inout; | |||
644 | else if (noalias_dep_list[i].flags.out) | |||
645 | ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_out; | |||
646 | else if (noalias_dep_list[i].flags.in) | |||
647 | ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_in; | |||
648 | else if (noalias_dep_list[i].flags.mtx) | |||
649 | ompt_deps[ndeps + i].dependence_type = | |||
650 | ompt_dependence_type_mutexinoutset; | |||
651 | else if (noalias_dep_list[i].flags.set) | |||
652 | ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inoutset; | |||
653 | } | |||
654 | ompt_callbacks.ompt_callback(ompt_callback_dependences)ompt_callback_dependences_callback( | |||
655 | &(new_taskdata->ompt_task_info.task_data), ompt_deps, ompt_ndeps); | |||
656 | /* We can now free the allocated memory for the dependences */ | |||
657 | /* For OMPD we might want to delay the free until end of this function */ | |||
658 | KMP_OMPT_DEPS_FREE(thread, ompt_deps)___kmp_thread_free((thread), (ompt_deps), "openmp/runtime/src/kmp_taskdeps.cpp" , 658); | |||
659 | } | |||
660 | #endif /* OMPT_OPTIONAL */ | |||
661 | #endif /* OMPT_SUPPORT */ | |||
662 | ||||
663 | bool serial = current_task->td_flags.team_serial || | |||
664 | current_task->td_flags.tasking_ser || | |||
665 | current_task->td_flags.final; | |||
666 | kmp_task_team_t *task_team = thread->th.th_task_team; | |||
667 | serial = serial && | |||
668 | !(task_team && (task_team->tt.tt_found_proxy_tasks || | |||
669 | task_team->tt.tt_hidden_helper_task_encountered)); | |||
670 | ||||
671 | if (!serial && (ndeps > 0 || ndeps_noalias > 0)) { | |||
672 | /* if no dependences have been tracked yet, create the dependence hash */ | |||
673 | if (current_task->td_dephash == NULL__null) | |||
674 | current_task->td_dephash = __kmp_dephash_create(thread, current_task); | |||
675 | ||||
676 | #if USE_FAST_MEMORY3 | |||
677 | kmp_depnode_t *node = | |||
678 | (kmp_depnode_t *)__kmp_fast_allocate(thread, sizeof(kmp_depnode_t))___kmp_fast_allocate((thread), (sizeof(kmp_depnode_t)), "openmp/runtime/src/kmp_taskdeps.cpp" , 678); | |||
679 | #else | |||
680 | kmp_depnode_t *node = | |||
681 | (kmp_depnode_t *)__kmp_thread_malloc(thread, sizeof(kmp_depnode_t))___kmp_thread_malloc((thread), (sizeof(kmp_depnode_t)), "openmp/runtime/src/kmp_taskdeps.cpp" , 681); | |||
682 | #endif | |||
683 | ||||
684 | __kmp_init_node(node); | |||
685 | new_taskdata->td_depnode = node; | |||
686 | ||||
687 | if (__kmp_check_deps(gtid, node, new_task, ¤t_task->td_dephash, | |||
688 | NO_DEP_BARRIER(false), ndeps, dep_list, ndeps_noalias, | |||
689 | noalias_dep_list)) { | |||
690 | KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had blocking "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_with_deps(exit): T#%d task had blocking " "dependences: " "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n" , gtid, loc_ref, new_taskdata); } | |||
691 | "dependences: "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_with_deps(exit): T#%d task had blocking " "dependences: " "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n" , gtid, loc_ref, new_taskdata); } | |||
692 | "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_with_deps(exit): T#%d task had blocking " "dependences: " "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n" , gtid, loc_ref, new_taskdata); } | |||
693 | gtid, loc_ref, new_taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_with_deps(exit): T#%d task had blocking " "dependences: " "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n" , gtid, loc_ref, new_taskdata); }; | |||
694 | #if OMPT_SUPPORT1 | |||
695 | if (ompt_enabled.enabled) { | |||
696 | current_task->ompt_task_info.frame.enter_frame = ompt_data_none{0}; | |||
697 | } | |||
698 | #endif | |||
699 | return TASK_CURRENT_NOT_QUEUED0; | |||
700 | } | |||
701 | } else { | |||
702 | KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d ignored dependences "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_with_deps(exit): T#%d ignored dependences " "for task (serialized) loc=%p task=%p\n", gtid, loc_ref, new_taskdata ); } | |||
703 | "for task (serialized) loc=%p task=%p\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_with_deps(exit): T#%d ignored dependences " "for task (serialized) loc=%p task=%p\n", gtid, loc_ref, new_taskdata ); } | |||
704 | gtid, loc_ref, new_taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_with_deps(exit): T#%d ignored dependences " "for task (serialized) loc=%p task=%p\n", gtid, loc_ref, new_taskdata ); }; | |||
705 | } | |||
706 | ||||
707 | KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had no blocking "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_with_deps(exit): T#%d task had no blocking " "dependences : " "loc=%p task=%p, transferring to __kmp_omp_task\n" , gtid, loc_ref, new_taskdata); } | |||
708 | "dependences : "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_with_deps(exit): T#%d task had no blocking " "dependences : " "loc=%p task=%p, transferring to __kmp_omp_task\n" , gtid, loc_ref, new_taskdata); } | |||
709 | "loc=%p task=%p, transferring to __kmp_omp_task\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_with_deps(exit): T#%d task had no blocking " "dependences : " "loc=%p task=%p, transferring to __kmp_omp_task\n" , gtid, loc_ref, new_taskdata); } | |||
710 | gtid, loc_ref, new_taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_with_deps(exit): T#%d task had no blocking " "dependences : " "loc=%p task=%p, transferring to __kmp_omp_task\n" , gtid, loc_ref, new_taskdata); }; | |||
711 | ||||
712 | kmp_int32 ret = __kmp_omp_task(gtid, new_task, true); | |||
713 | #if OMPT_SUPPORT1 | |||
714 | if (ompt_enabled.enabled) { | |||
715 | current_task->ompt_task_info.frame.enter_frame = ompt_data_none{0}; | |||
716 | } | |||
717 | #endif | |||
718 | return ret; | |||
719 | } | |||
720 | ||||
721 | #if OMPT_SUPPORT1 | |||
722 | void __ompt_taskwait_dep_finish(kmp_taskdata_t *current_task, | |||
723 | ompt_data_t *taskwait_task_data) { | |||
724 | if (ompt_enabled.ompt_callback_task_schedule) { | |||
725 | ompt_callbacks.ompt_callback(ompt_callback_task_schedule)ompt_callback_task_schedule_callback( | |||
726 | taskwait_task_data, ompt_taskwait_complete, NULL__null); | |||
727 | } | |||
728 | current_task->ompt_task_info.frame.enter_frame.ptr = NULL__null; | |||
729 | *taskwait_task_data = ompt_data_none{0}; | |||
730 | } | |||
731 | #endif /* OMPT_SUPPORT */ | |||
732 | ||||
733 | /*! | |||
734 | @ingroup TASKING | |||
735 | @param loc_ref location of the original task directive | |||
736 | @param gtid Global Thread ID of encountering thread | |||
737 | @param ndeps Number of depend items with possible aliasing | |||
738 | @param dep_list List of depend items with possible aliasing | |||
739 | @param ndeps_noalias Number of depend items with no aliasing | |||
740 | @param noalias_dep_list List of depend items with no aliasing | |||
741 | ||||
742 | Blocks the current task until all specifies dependences have been fulfilled. | |||
743 | */ | |||
744 | void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, | |||
745 | kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, | |||
746 | kmp_depend_info_t *noalias_dep_list) { | |||
747 | __kmpc_omp_taskwait_deps_51(loc_ref, gtid, ndeps, dep_list, ndeps_noalias, | |||
| ||||
748 | noalias_dep_list, false); | |||
749 | } | |||
750 | ||||
751 | /* __kmpc_omp_taskwait_deps_51 : Function for OpenMP 5.1 nowait clause. | |||
752 | Placeholder for taskwait with nowait clause. | |||
753 | Earlier code of __kmpc_omp_wait_deps() is now | |||
754 | in this function. | |||
755 | */ | |||
756 | void __kmpc_omp_taskwait_deps_51(ident_t *loc_ref, kmp_int32 gtid, | |||
757 | kmp_int32 ndeps, kmp_depend_info_t *dep_list, | |||
758 | kmp_int32 ndeps_noalias, | |||
759 | kmp_depend_info_t *noalias_dep_list, | |||
760 | kmp_int32 has_no_wait) { | |||
761 | KA_TRACE(10, ("__kmpc_omp_taskwait_deps(enter): T#%d loc=%p nowait#%d\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_taskwait_deps(enter): T#%d loc=%p nowait#%d\n" , gtid, loc_ref, has_no_wait); } | |||
762 | gtid, loc_ref, has_no_wait))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_taskwait_deps(enter): T#%d loc=%p nowait#%d\n" , gtid, loc_ref, has_no_wait); }; | |||
763 | if (ndeps == 0 && ndeps_noalias == 0) { | |||
764 | KA_TRACE(10, ("__kmpc_omp_taskwait_deps(exit): T#%d has no dependences to "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_taskwait_deps(exit): T#%d has no dependences to " "wait upon : loc=%p\n", gtid, loc_ref); } | |||
765 | "wait upon : loc=%p\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_taskwait_deps(exit): T#%d has no dependences to " "wait upon : loc=%p\n", gtid, loc_ref); } | |||
766 | gtid, loc_ref))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_taskwait_deps(exit): T#%d has no dependences to " "wait upon : loc=%p\n", gtid, loc_ref); }; | |||
767 | return; | |||
768 | } | |||
769 | __kmp_assert_valid_gtid(gtid); | |||
770 | kmp_info_t *thread = __kmp_threads[gtid]; | |||
771 | kmp_taskdata_t *current_task = thread->th.th_current_task; | |||
772 | ||||
773 | #if OMPT_SUPPORT1 | |||
774 | // this function represents a taskwait construct with depend clause | |||
775 | // We signal 4 events: | |||
776 | // - creation of the taskwait task | |||
777 | // - dependences of the taskwait task | |||
778 | // - schedule and finish of the taskwait task | |||
779 | ompt_data_t *taskwait_task_data = &thread->th.ompt_thread_info.task_data; | |||
780 | KMP_ASSERT(taskwait_task_data->ptr == NULL)if (!(taskwait_task_data->ptr == __null)) { __kmp_debug_assert ("taskwait_task_data->ptr == NULL", "openmp/runtime/src/kmp_taskdeps.cpp" , 780); }; | |||
781 | if (ompt_enabled.enabled) { | |||
782 | if (!current_task->ompt_task_info.frame.enter_frame.ptr) | |||
783 | current_task->ompt_task_info.frame.enter_frame.ptr = | |||
784 | OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0); | |||
785 | if (ompt_enabled.ompt_callback_task_create) { | |||
786 | ompt_callbacks.ompt_callback(ompt_callback_task_create)ompt_callback_task_create_callback( | |||
787 | &(current_task->ompt_task_info.task_data), | |||
788 | &(current_task->ompt_task_info.frame), taskwait_task_data, | |||
789 | ompt_task_taskwait | ompt_task_undeferred | ompt_task_mergeable, 1, | |||
790 | OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid)((ompt_enabled.enabled && gtid >= 0 && __kmp_threads [gtid] && __kmp_threads[gtid]->th.ompt_thread_info .return_address) ? __ompt_load_return_address(gtid) : __builtin_return_address (0))); | |||
791 | } | |||
792 | } | |||
793 | ||||
794 | #if OMPT_OPTIONAL1 | |||
795 | /* OMPT grab all dependences if requested by the tool */ | |||
796 | if (ndeps + ndeps_noalias > 0 && ompt_enabled.ompt_callback_dependences) { | |||
797 | kmp_int32 i; | |||
798 | ||||
799 | int ompt_ndeps = ndeps + ndeps_noalias; | |||
800 | ompt_dependence_t *ompt_deps = (ompt_dependence_t *)KMP_OMPT_DEPS_ALLOC(___kmp_thread_malloc((thread), ((ndeps + ndeps_noalias) * sizeof (ompt_dependence_t)), "openmp/runtime/src/kmp_taskdeps.cpp", 801 ) | |||
801 | thread, (ndeps + ndeps_noalias) * sizeof(ompt_dependence_t))___kmp_thread_malloc((thread), ((ndeps + ndeps_noalias) * sizeof (ompt_dependence_t)), "openmp/runtime/src/kmp_taskdeps.cpp", 801 ); | |||
802 | ||||
803 | KMP_ASSERT(ompt_deps != NULL)if (!(ompt_deps != __null)) { __kmp_debug_assert("ompt_deps != NULL" , "openmp/runtime/src/kmp_taskdeps.cpp", 803); }; | |||
804 | ||||
805 | for (i = 0; i < ndeps; i++) { | |||
806 | ompt_deps[i].variable.ptr = (void *)dep_list[i].base_addr; | |||
807 | if (dep_list[i].flags.in && dep_list[i].flags.out) | |||
808 | ompt_deps[i].dependence_type = ompt_dependence_type_inout; | |||
809 | else if (dep_list[i].flags.out) | |||
810 | ompt_deps[i].dependence_type = ompt_dependence_type_out; | |||
811 | else if (dep_list[i].flags.in) | |||
812 | ompt_deps[i].dependence_type = ompt_dependence_type_in; | |||
813 | else if (dep_list[i].flags.mtx) | |||
814 | ompt_deps[ndeps + i].dependence_type = | |||
815 | ompt_dependence_type_mutexinoutset; | |||
816 | else if (dep_list[i].flags.set) | |||
817 | ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inoutset; | |||
818 | } | |||
819 | for (i = 0; i < ndeps_noalias; i++) { | |||
820 | ompt_deps[ndeps + i].variable.ptr = (void *)noalias_dep_list[i].base_addr; | |||
821 | if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out) | |||
822 | ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inout; | |||
823 | else if (noalias_dep_list[i].flags.out) | |||
824 | ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_out; | |||
825 | else if (noalias_dep_list[i].flags.in) | |||
826 | ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_in; | |||
827 | else if (noalias_dep_list[i].flags.mtx) | |||
828 | ompt_deps[ndeps + i].dependence_type = | |||
829 | ompt_dependence_type_mutexinoutset; | |||
830 | else if (noalias_dep_list[i].flags.set) | |||
831 | ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inoutset; | |||
832 | } | |||
833 | ompt_callbacks.ompt_callback(ompt_callback_dependences)ompt_callback_dependences_callback( | |||
834 | taskwait_task_data, ompt_deps, ompt_ndeps); | |||
835 | /* We can now free the allocated memory for the dependences */ | |||
836 | /* For OMPD we might want to delay the free until end of this function */ | |||
837 | KMP_OMPT_DEPS_FREE(thread, ompt_deps)___kmp_thread_free((thread), (ompt_deps), "openmp/runtime/src/kmp_taskdeps.cpp" , 837); | |||
838 | ompt_deps = NULL__null; | |||
839 | } | |||
840 | #endif /* OMPT_OPTIONAL */ | |||
841 | #endif /* OMPT_SUPPORT */ | |||
842 | ||||
843 | // We can return immediately as: | |||
844 | // - dependences are not computed in serial teams (except with proxy tasks) | |||
845 | // - if the dephash is not yet created it means we have nothing to wait for | |||
846 | bool ignore = current_task->td_flags.team_serial || | |||
847 | current_task->td_flags.tasking_ser || | |||
848 | current_task->td_flags.final; | |||
849 | ignore = | |||
850 | ignore && thread->th.th_task_team != NULL__null && | |||
851 | thread->th.th_task_team->tt.tt_found_proxy_tasks == FALSE0 && | |||
852 | thread->th.th_task_team->tt.tt_hidden_helper_task_encountered == FALSE0; | |||
853 | ignore = ignore
| |||
854 | ||||
855 | if (ignore
| |||
856 | KA_TRACE(10, ("__kmpc_omp_taskwait_deps(exit): T#%d has no blocking "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_taskwait_deps(exit): T#%d has no blocking " "dependences : loc=%p\n", gtid, loc_ref); } | |||
857 | "dependences : loc=%p\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_taskwait_deps(exit): T#%d has no blocking " "dependences : loc=%p\n", gtid, loc_ref); } | |||
858 | gtid, loc_ref))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_taskwait_deps(exit): T#%d has no blocking " "dependences : loc=%p\n", gtid, loc_ref); }; | |||
859 | #if OMPT_SUPPORT1 | |||
860 | __ompt_taskwait_dep_finish(current_task, taskwait_task_data); | |||
861 | #endif /* OMPT_SUPPORT */ | |||
862 | return; | |||
863 | } | |||
864 | ||||
865 | kmp_depnode_t node = {0}; | |||
866 | __kmp_init_node(&node); | |||
867 | ||||
868 | if (!__kmp_check_deps(gtid, &node, NULL__null, ¤t_task->td_dephash, | |||
869 | DEP_BARRIER(true), ndeps, dep_list, ndeps_noalias, | |||
870 | noalias_dep_list)) { | |||
871 | KA_TRACE(10, ("__kmpc_omp_taskwait_deps(exit): T#%d has no blocking "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_taskwait_deps(exit): T#%d has no blocking " "dependences : loc=%p\n", gtid, loc_ref); } | |||
872 | "dependences : loc=%p\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_taskwait_deps(exit): T#%d has no blocking " "dependences : loc=%p\n", gtid, loc_ref); } | |||
873 | gtid, loc_ref))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_taskwait_deps(exit): T#%d has no blocking " "dependences : loc=%p\n", gtid, loc_ref); }; | |||
874 | #if OMPT_SUPPORT1 | |||
875 | __ompt_taskwait_dep_finish(current_task, taskwait_task_data); | |||
876 | #endif /* OMPT_SUPPORT */ | |||
877 | return; | |||
878 | } | |||
879 | ||||
880 | int thread_finished = FALSE0; | |||
881 | kmp_flag_32<false, false> flag( | |||
882 | (std::atomic<kmp_uint32> *)&node.dn.npredecessors, 0U); | |||
883 | while (node.dn.npredecessors > 0) { | |||
884 | flag.execute_tasks(thread, gtid, FALSE0, | |||
885 | &thread_finished USE_ITT_BUILD_ARG(NULL), __null, | |||
886 | __kmp_task_stealing_constraint); | |||
887 | } | |||
888 | ||||
889 | #if OMPT_SUPPORT1 | |||
890 | __ompt_taskwait_dep_finish(current_task, taskwait_task_data); | |||
891 | #endif /* OMPT_SUPPORT */ | |||
892 | KA_TRACE(10, ("__kmpc_omp_taskwait_deps(exit): T#%d finished waiting : loc=%p\if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_taskwait_deps(exit): T#%d finished waiting : loc=%p \n" , gtid, loc_ref); } | |||
893 | \n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_taskwait_deps(exit): T#%d finished waiting : loc=%p \n" , gtid, loc_ref); } | |||
894 | gtid, loc_ref))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_taskwait_deps(exit): T#%d finished waiting : loc=%p \n" , gtid, loc_ref); }; | |||
895 | } |