| File: | build/source/openmp/runtime/src/kmp_barrier.cpp |
| Warning: | line 2515, column 7 Called function pointer is null (null dereference) |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | /* | ||||
| 2 | * kmp_barrier.cpp | ||||
| 3 | */ | ||||
| 4 | |||||
| 5 | //===----------------------------------------------------------------------===// | ||||
| 6 | // | ||||
| 7 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||
| 8 | // See https://llvm.org/LICENSE.txt for license information. | ||||
| 9 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||
| 10 | // | ||||
| 11 | //===----------------------------------------------------------------------===// | ||||
| 12 | |||||
| 13 | #include "kmp_wait_release.h" | ||||
| 14 | #include "kmp_barrier.h" | ||||
| 15 | #include "kmp_itt.h" | ||||
| 16 | #include "kmp_os.h" | ||||
| 17 | #include "kmp_stats.h" | ||||
| 18 | #include "ompt-specific.h" | ||||
| 19 | // for distributed barrier | ||||
| 20 | #include "kmp_affinity.h" | ||||
| 21 | |||||
| 22 | #if KMP_MIC0 | ||||
| 23 | #include <immintrin.h> | ||||
| 24 | #define USE_NGO_STORES 1 | ||||
| 25 | #endif // KMP_MIC | ||||
| 26 | |||||
| 27 | #if KMP_MIC0 && USE_NGO_STORES | ||||
| 28 | // ICV copying | ||||
| 29 | #define ngo_load(src)((void)0) __m512d Vt = _mm512_load_pd((void *)(src)) | ||||
| 30 | #define ngo_store_icvs(dst, src)copy_icvs((dst), (src)) _mm512_storenrngo_pd((void *)(dst), Vt) | ||||
| 31 | #define ngo_store_go(dst, src)memcpy((dst), (src), 64) _mm512_storenrngo_pd((void *)(dst), Vt) | ||||
| 32 | #define ngo_sync()((void)0) __asm__ volatile("lock; addl $0,0(%%rsp)" ::: "memory") | ||||
| 33 | #else | ||||
| 34 | #define ngo_load(src)((void)0) ((void)0) | ||||
| 35 | #define ngo_store_icvs(dst, src)copy_icvs((dst), (src)) copy_icvs((dst), (src)) | ||||
| 36 | #define ngo_store_go(dst, src)memcpy((dst), (src), 64) KMP_MEMCPYmemcpy((dst), (src), CACHE_LINE64) | ||||
| 37 | #define ngo_sync()((void)0) ((void)0) | ||||
| 38 | #endif /* KMP_MIC && USE_NGO_STORES */ | ||||
| 39 | |||||
| 40 | void __kmp_print_structure(void); // Forward declaration | ||||
| 41 | |||||
| 42 | // ---------------------------- Barrier Algorithms ---------------------------- | ||||
| 43 | // Distributed barrier | ||||
| 44 | |||||
| 45 | // Compute how many threads to have polling each cache-line. | ||||
| 46 | // We want to limit the number of writes to IDEAL_GO_RESOLUTION. | ||||
| 47 | void distributedBarrier::computeVarsForN(size_t n) { | ||||
| 48 | int nsockets = 1; | ||||
| 49 | if (__kmp_topology) { | ||||
| 50 | int socket_level = __kmp_topology->get_level(KMP_HW_SOCKET); | ||||
| 51 | int core_level = __kmp_topology->get_level(KMP_HW_CORE); | ||||
| 52 | int ncores_per_socket = | ||||
| 53 | __kmp_topology->calculate_ratio(core_level, socket_level); | ||||
| 54 | nsockets = __kmp_topology->get_count(socket_level); | ||||
| 55 | |||||
| 56 | if (nsockets <= 0) | ||||
| 57 | nsockets = 1; | ||||
| 58 | if (ncores_per_socket <= 0) | ||||
| 59 | ncores_per_socket = 1; | ||||
| 60 | |||||
| 61 | threads_per_go = ncores_per_socket >> 1; | ||||
| 62 | if (!fix_threads_per_go) { | ||||
| 63 | // Minimize num_gos | ||||
| 64 | if (threads_per_go > 4) { | ||||
| 65 | if (KMP_OPTIMIZE_FOR_REDUCTIONS0) { | ||||
| 66 | threads_per_go = threads_per_go >> 1; | ||||
| 67 | } | ||||
| 68 | if (threads_per_go > 4 && nsockets == 1) | ||||
| 69 | threads_per_go = threads_per_go >> 1; | ||||
| 70 | } | ||||
| 71 | } | ||||
| 72 | if (threads_per_go == 0) | ||||
| 73 | threads_per_go = 1; | ||||
| 74 | fix_threads_per_go = true; | ||||
| 75 | num_gos = n / threads_per_go; | ||||
| 76 | if (n % threads_per_go) | ||||
| 77 | num_gos++; | ||||
| 78 | if (nsockets == 1 || num_gos == 1) | ||||
| 79 | num_groups = 1; | ||||
| 80 | else { | ||||
| 81 | num_groups = num_gos / nsockets; | ||||
| 82 | if (num_gos % nsockets) | ||||
| 83 | num_groups++; | ||||
| 84 | } | ||||
| 85 | if (num_groups <= 0) | ||||
| 86 | num_groups = 1; | ||||
| 87 | gos_per_group = num_gos / num_groups; | ||||
| 88 | if (num_gos % num_groups) | ||||
| 89 | gos_per_group++; | ||||
| 90 | threads_per_group = threads_per_go * gos_per_group; | ||||
| 91 | } else { | ||||
| 92 | num_gos = n / threads_per_go; | ||||
| 93 | if (n % threads_per_go) | ||||
| 94 | num_gos++; | ||||
| 95 | if (num_gos == 1) | ||||
| 96 | num_groups = 1; | ||||
| 97 | else { | ||||
| 98 | num_groups = num_gos / 2; | ||||
| 99 | if (num_gos % 2) | ||||
| 100 | num_groups++; | ||||
| 101 | } | ||||
| 102 | gos_per_group = num_gos / num_groups; | ||||
| 103 | if (num_gos % num_groups) | ||||
| 104 | gos_per_group++; | ||||
| 105 | threads_per_group = threads_per_go * gos_per_group; | ||||
| 106 | } | ||||
| 107 | } | ||||
| 108 | |||||
| 109 | void distributedBarrier::computeGo(size_t n) { | ||||
| 110 | // Minimize num_gos | ||||
| 111 | for (num_gos = 1;; num_gos++) | ||||
| 112 | if (IDEAL_CONTENTION * num_gos >= n) | ||||
| 113 | break; | ||||
| 114 | threads_per_go = n / num_gos; | ||||
| 115 | if (n % num_gos) | ||||
| 116 | threads_per_go++; | ||||
| 117 | while (num_gos > MAX_GOS) { | ||||
| 118 | threads_per_go++; | ||||
| 119 | num_gos = n / threads_per_go; | ||||
| 120 | if (n % threads_per_go) | ||||
| 121 | num_gos++; | ||||
| 122 | } | ||||
| 123 | computeVarsForN(n); | ||||
| 124 | } | ||||
| 125 | |||||
| 126 | // This function is to resize the barrier arrays when the new number of threads | ||||
| 127 | // exceeds max_threads, which is the current size of all the arrays | ||||
| 128 | void distributedBarrier::resize(size_t nthr) { | ||||
| 129 | KMP_DEBUG_ASSERT(nthr > max_threads)if (!(nthr > max_threads)) { __kmp_debug_assert("nthr > max_threads" , "openmp/runtime/src/kmp_barrier.cpp", 129); }; | ||||
| 130 | |||||
| 131 | // expand to requested size * 2 | ||||
| 132 | max_threads = nthr * 2; | ||||
| 133 | |||||
| 134 | // allocate arrays to new max threads | ||||
| 135 | for (int i = 0; i < MAX_ITERS; ++i) { | ||||
| 136 | if (flags[i]) | ||||
| 137 | flags[i] = (flags_s *)KMP_INTERNAL_REALLOC(flags[i],realloc((flags[i]), (max_threads * sizeof(flags_s))) | ||||
| 138 | max_threads * sizeof(flags_s))realloc((flags[i]), (max_threads * sizeof(flags_s))); | ||||
| 139 | else | ||||
| 140 | flags[i] = (flags_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(flags_s))malloc(max_threads * sizeof(flags_s)); | ||||
| 141 | } | ||||
| 142 | |||||
| 143 | if (go) | ||||
| 144 | go = (go_s *)KMP_INTERNAL_REALLOC(go, max_threads * sizeof(go_s))realloc((go), (max_threads * sizeof(go_s))); | ||||
| 145 | else | ||||
| 146 | go = (go_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(go_s))malloc(max_threads * sizeof(go_s)); | ||||
| 147 | |||||
| 148 | if (iter) | ||||
| 149 | iter = (iter_s *)KMP_INTERNAL_REALLOC(iter, max_threads * sizeof(iter_s))realloc((iter), (max_threads * sizeof(iter_s))); | ||||
| 150 | else | ||||
| 151 | iter = (iter_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(iter_s))malloc(max_threads * sizeof(iter_s)); | ||||
| 152 | |||||
| 153 | if (sleep) | ||||
| 154 | sleep = | ||||
| 155 | (sleep_s *)KMP_INTERNAL_REALLOC(sleep, max_threads * sizeof(sleep_s))realloc((sleep), (max_threads * sizeof(sleep_s))); | ||||
| 156 | else | ||||
| 157 | sleep = (sleep_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(sleep_s))malloc(max_threads * sizeof(sleep_s)); | ||||
| 158 | } | ||||
| 159 | |||||
| 160 | // This function is to set all the go flags that threads might be waiting | ||||
| 161 | // on, and when blocktime is not infinite, it should be followed by a wake-up | ||||
| 162 | // call to each thread | ||||
| 163 | kmp_uint64 distributedBarrier::go_release() { | ||||
| 164 | kmp_uint64 next_go = iter[0].iter + distributedBarrier::MAX_ITERS; | ||||
| 165 | for (size_t j = 0; j < num_gos; j++) { | ||||
| 166 | go[j].go.store(next_go); | ||||
| 167 | } | ||||
| 168 | return next_go; | ||||
| 169 | } | ||||
| 170 | |||||
| 171 | void distributedBarrier::go_reset() { | ||||
| 172 | for (size_t j = 0; j < max_threads; ++j) { | ||||
| 173 | for (size_t i = 0; i < distributedBarrier::MAX_ITERS; ++i) { | ||||
| 174 | flags[i][j].stillNeed = 1; | ||||
| 175 | } | ||||
| 176 | go[j].go.store(0); | ||||
| 177 | iter[j].iter = 0; | ||||
| 178 | } | ||||
| 179 | } | ||||
| 180 | |||||
| 181 | // This function inits/re-inits the distributed barrier for a particular number | ||||
| 182 | // of threads. If a resize of arrays is needed, it calls the resize function. | ||||
| 183 | void distributedBarrier::init(size_t nthr) { | ||||
| 184 | size_t old_max = max_threads; | ||||
| 185 | if (nthr > max_threads) { // need more space in arrays | ||||
| 186 | resize(nthr); | ||||
| 187 | } | ||||
| 188 | |||||
| 189 | for (size_t i = 0; i < max_threads; i++) { | ||||
| 190 | for (size_t j = 0; j < distributedBarrier::MAX_ITERS; j++) { | ||||
| 191 | flags[j][i].stillNeed = 1; | ||||
| 192 | } | ||||
| 193 | go[i].go.store(0); | ||||
| 194 | iter[i].iter = 0; | ||||
| 195 | if (i >= old_max) | ||||
| 196 | sleep[i].sleep = false; | ||||
| 197 | } | ||||
| 198 | |||||
| 199 | // Recalculate num_gos, etc. based on new nthr | ||||
| 200 | computeVarsForN(nthr); | ||||
| 201 | |||||
| 202 | num_threads = nthr; | ||||
| 203 | |||||
| 204 | if (team_icvs == NULL__null) | ||||
| 205 | team_icvs = __kmp_allocate(sizeof(kmp_internal_control_t))___kmp_allocate((sizeof(kmp_internal_control_t)), "openmp/runtime/src/kmp_barrier.cpp" , 205); | ||||
| 206 | } | ||||
| 207 | |||||
| 208 | // This function is used only when KMP_BLOCKTIME is not infinite. | ||||
| 209 | // static | ||||
| 210 | void __kmp_dist_barrier_wakeup(enum barrier_type bt, kmp_team_t *team, | ||||
| 211 | size_t start, size_t stop, size_t inc, | ||||
| 212 | size_t tid) { | ||||
| 213 | KMP_DEBUG_ASSERT(__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME)if (!(__kmp_dflt_blocktime != (2147483647))) { __kmp_debug_assert ("__kmp_dflt_blocktime != (2147483647)", "openmp/runtime/src/kmp_barrier.cpp" , 213); }; | ||||
| 214 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) | ||||
| 215 | return; | ||||
| 216 | |||||
| 217 | kmp_info_t **other_threads = team->t.t_threads; | ||||
| 218 | for (size_t thr = start; thr < stop; thr += inc) { | ||||
| 219 | KMP_DEBUG_ASSERT(other_threads[thr])if (!(other_threads[thr])) { __kmp_debug_assert("other_threads[thr]" , "openmp/runtime/src/kmp_barrier.cpp", 219); }; | ||||
| 220 | int gtid = other_threads[thr]->th.th_info.ds.ds_gtid; | ||||
| 221 | // Wake up worker regardless of if it appears to be sleeping or not | ||||
| 222 | __kmp_atomic_resume_64(gtid, (kmp_atomic_flag_64<> *)NULL__null); | ||||
| 223 | } | ||||
| 224 | } | ||||
| 225 | |||||
| 226 | static void __kmp_dist_barrier_gather( | ||||
| 227 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, | ||||
| 228 | void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) { | ||||
| 229 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_dist_gather)((void)0); | ||||
| 230 | kmp_team_t *team; | ||||
| 231 | distributedBarrier *b; | ||||
| 232 | kmp_info_t **other_threads; | ||||
| 233 | kmp_uint64 my_current_iter, my_next_iter; | ||||
| 234 | kmp_uint32 nproc; | ||||
| 235 | bool group_leader; | ||||
| 236 | |||||
| 237 | team = this_thr->th.th_team; | ||||
| 238 | nproc = this_thr->th.th_team_nproc; | ||||
| 239 | other_threads = team->t.t_threads; | ||||
| 240 | b = team->t.b; | ||||
| 241 | my_current_iter = b->iter[tid].iter; | ||||
| 242 | my_next_iter = (my_current_iter + 1) % distributedBarrier::MAX_ITERS; | ||||
| 243 | group_leader = ((tid % b->threads_per_group) == 0); | ||||
| 244 | |||||
| 245 | KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_gather: T#%d(%d:%d) enter; barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | ||||
| 246 | ("__kmp_dist_barrier_gather: T#%d(%d:%d) enter; barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_gather: T#%d(%d:%d) enter; barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | ||||
| 247 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_gather: T#%d(%d:%d) enter; barrier type %d\n" , gtid, team->t.t_id, tid, bt); }; | ||||
| 248 | |||||
| 249 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | ||||
| 250 | // Barrier imbalance - save arrive time to the thread | ||||
| 251 | if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { | ||||
| 252 | this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = | ||||
| 253 | __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0(); | ||||
| 254 | } | ||||
| 255 | #endif | ||||
| 256 | |||||
| 257 | if (group_leader) { | ||||
| 258 | // Start from the thread after the group leader | ||||
| 259 | size_t group_start = tid + 1; | ||||
| 260 | size_t group_end = tid + b->threads_per_group; | ||||
| 261 | size_t threads_pending = 0; | ||||
| 262 | |||||
| 263 | if (group_end > nproc) | ||||
| 264 | group_end = nproc; | ||||
| 265 | do { // wait for threads in my group | ||||
| 266 | threads_pending = 0; | ||||
| 267 | // Check all the flags every time to avoid branch misspredict | ||||
| 268 | for (size_t thr = group_start; thr < group_end; thr++) { | ||||
| 269 | // Each thread uses a different cache line | ||||
| 270 | threads_pending += b->flags[my_current_iter][thr].stillNeed; | ||||
| 271 | } | ||||
| 272 | // Execute tasks here | ||||
| 273 | if (__kmp_tasking_mode != tskm_immediate_exec) { | ||||
| 274 | kmp_task_team_t *task_team = this_thr->th.th_task_team; | ||||
| 275 | if (task_team != NULL__null) { | ||||
| 276 | if (TCR_SYNC_4(task_team->tt.tt_active)(task_team->tt.tt_active)) { | ||||
| 277 | if (KMP_TASKING_ENABLED(task_team)((!0) == ((task_team)->tt.tt_found_tasks))) { | ||||
| 278 | int tasks_completed = FALSE0; | ||||
| 279 | __kmp_atomic_execute_tasks_64( | ||||
| 280 | this_thr, gtid, (kmp_atomic_flag_64<> *)NULL__null, FALSE0, | ||||
| 281 | &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj, 0); | ||||
| 282 | } else | ||||
| 283 | this_thr->th.th_reap_state = KMP_SAFE_TO_REAP1; | ||||
| 284 | } | ||||
| 285 | } else { | ||||
| 286 | this_thr->th.th_reap_state = KMP_SAFE_TO_REAP1; | ||||
| 287 | } // if | ||||
| 288 | } | ||||
| 289 | if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) { | ||||
| 290 | if (__kmp_global.g.g_abort) | ||||
| 291 | __kmp_abort_thread(); | ||||
| 292 | break; | ||||
| 293 | } else if (__kmp_tasking_mode != tskm_immediate_exec && | ||||
| 294 | this_thr->th.th_reap_state == KMP_SAFE_TO_REAP1) { | ||||
| 295 | this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP0; | ||||
| 296 | } | ||||
| 297 | } while (threads_pending > 0); | ||||
| 298 | |||||
| 299 | if (reduce) { // Perform reduction if needed | ||||
| 300 | OMPT_REDUCTION_DECL(this_thr, gtid)ompt_data_t *my_task_data = (&(this_thr->th.th_current_task ->ompt_task_info.task_data)); ompt_data_t *my_parallel_data = (&(this_thr->th.th_team->t.ompt_team_info.parallel_data )); void *return_address = __ompt_load_return_address(gtid);; | ||||
| 301 | OMPT_REDUCTION_BEGINif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction ) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction , ompt_scope_begin, my_parallel_data, my_task_data, return_address ); }; | ||||
| 302 | // Group leader reduces all threads in group | ||||
| 303 | for (size_t thr = group_start; thr < group_end; thr++) { | ||||
| 304 | (*reduce)(this_thr->th.th_local.reduce_data, | ||||
| 305 | other_threads[thr]->th.th_local.reduce_data); | ||||
| 306 | } | ||||
| 307 | OMPT_REDUCTION_ENDif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction ) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction , ompt_scope_end, my_parallel_data, my_task_data, return_address ); }; | ||||
| 308 | } | ||||
| 309 | |||||
| 310 | // Set flag for next iteration | ||||
| 311 | b->flags[my_next_iter][tid].stillNeed = 1; | ||||
| 312 | // Each thread uses a different cache line; resets stillNeed to 0 to | ||||
| 313 | // indicate it has reached the barrier | ||||
| 314 | b->flags[my_current_iter][tid].stillNeed = 0; | ||||
| 315 | |||||
| 316 | do { // wait for all group leaders | ||||
| 317 | threads_pending = 0; | ||||
| 318 | for (size_t thr = 0; thr < nproc; thr += b->threads_per_group) { | ||||
| 319 | threads_pending += b->flags[my_current_iter][thr].stillNeed; | ||||
| 320 | } | ||||
| 321 | // Execute tasks here | ||||
| 322 | if (__kmp_tasking_mode != tskm_immediate_exec) { | ||||
| 323 | kmp_task_team_t *task_team = this_thr->th.th_task_team; | ||||
| 324 | if (task_team != NULL__null) { | ||||
| 325 | if (TCR_SYNC_4(task_team->tt.tt_active)(task_team->tt.tt_active)) { | ||||
| 326 | if (KMP_TASKING_ENABLED(task_team)((!0) == ((task_team)->tt.tt_found_tasks))) { | ||||
| 327 | int tasks_completed = FALSE0; | ||||
| 328 | __kmp_atomic_execute_tasks_64( | ||||
| 329 | this_thr, gtid, (kmp_atomic_flag_64<> *)NULL__null, FALSE0, | ||||
| 330 | &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj, 0); | ||||
| 331 | } else | ||||
| 332 | this_thr->th.th_reap_state = KMP_SAFE_TO_REAP1; | ||||
| 333 | } | ||||
| 334 | } else { | ||||
| 335 | this_thr->th.th_reap_state = KMP_SAFE_TO_REAP1; | ||||
| 336 | } // if | ||||
| 337 | } | ||||
| 338 | if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) { | ||||
| 339 | if (__kmp_global.g.g_abort) | ||||
| 340 | __kmp_abort_thread(); | ||||
| 341 | break; | ||||
| 342 | } else if (__kmp_tasking_mode != tskm_immediate_exec && | ||||
| 343 | this_thr->th.th_reap_state == KMP_SAFE_TO_REAP1) { | ||||
| 344 | this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP0; | ||||
| 345 | } | ||||
| 346 | } while (threads_pending > 0); | ||||
| 347 | |||||
| 348 | if (reduce) { // Perform reduction if needed | ||||
| 349 | if (KMP_MASTER_TID(tid)(0 == (tid))) { // Master reduces over group leaders | ||||
| 350 | OMPT_REDUCTION_DECL(this_thr, gtid)ompt_data_t *my_task_data = (&(this_thr->th.th_current_task ->ompt_task_info.task_data)); ompt_data_t *my_parallel_data = (&(this_thr->th.th_team->t.ompt_team_info.parallel_data )); void *return_address = __ompt_load_return_address(gtid);; | ||||
| 351 | OMPT_REDUCTION_BEGINif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction ) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction , ompt_scope_begin, my_parallel_data, my_task_data, return_address ); }; | ||||
| 352 | for (size_t thr = b->threads_per_group; thr < nproc; | ||||
| 353 | thr += b->threads_per_group) { | ||||
| 354 | (*reduce)(this_thr->th.th_local.reduce_data, | ||||
| 355 | other_threads[thr]->th.th_local.reduce_data); | ||||
| 356 | } | ||||
| 357 | OMPT_REDUCTION_ENDif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction ) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction , ompt_scope_end, my_parallel_data, my_task_data, return_address ); }; | ||||
| 358 | } | ||||
| 359 | } | ||||
| 360 | } else { | ||||
| 361 | // Set flag for next iteration | ||||
| 362 | b->flags[my_next_iter][tid].stillNeed = 1; | ||||
| 363 | // Each thread uses a different cache line; resets stillNeed to 0 to | ||||
| 364 | // indicate it has reached the barrier | ||||
| 365 | b->flags[my_current_iter][tid].stillNeed = 0; | ||||
| 366 | } | ||||
| 367 | |||||
| 368 | KMP_MFENCE()if (__builtin_expect(!!(!__kmp_cpuinfo.initialized), 0)) { __kmp_query_cpuid (&__kmp_cpuinfo); } if (__kmp_cpuinfo.flags.sse2) { __sync_synchronize (); }; | ||||
| 369 | |||||
| 370 | KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | ||||
| 371 | ("__kmp_dist_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | ||||
| 372 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); }; | ||||
| 373 | } | ||||
| 374 | |||||
| 375 | static void __kmp_dist_barrier_release( | ||||
| 376 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, | ||||
| 377 | int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) { | ||||
| 378 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_dist_release)((void)0); | ||||
| 379 | kmp_team_t *team; | ||||
| 380 | distributedBarrier *b; | ||||
| 381 | kmp_bstate_t *thr_bar; | ||||
| 382 | kmp_uint64 my_current_iter, next_go; | ||||
| 383 | size_t my_go_index; | ||||
| 384 | bool group_leader; | ||||
| 385 | |||||
| 386 | KA_TRACE(20, ("__kmp_dist_barrier_release: T#%d(%d) enter; barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_release: T#%d(%d) enter; barrier type %d\n" , gtid, tid, bt); } | ||||
| 387 | gtid, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_release: T#%d(%d) enter; barrier type %d\n" , gtid, tid, bt); }; | ||||
| 388 | |||||
| 389 | thr_bar = &this_thr->th.th_bar[bt].bb; | ||||
| 390 | |||||
| 391 | if (!KMP_MASTER_TID(tid)(0 == (tid))) { | ||||
| 392 | // workers and non-master group leaders need to check their presence in team | ||||
| 393 | do { | ||||
| 394 | if (this_thr->th.th_used_in_team.load() != 1 && | ||||
| 395 | this_thr->th.th_used_in_team.load() != 3) { | ||||
| 396 | // Thread is not in use in a team. Wait on location in tid's thread | ||||
| 397 | // struct. The 0 value tells anyone looking that this thread is spinning | ||||
| 398 | // or sleeping until this location becomes 3 again; 3 is the transition | ||||
| 399 | // state to get to 1 which is waiting on go and being in the team | ||||
| 400 | kmp_flag_32<false, false> my_flag(&(this_thr->th.th_used_in_team), 3); | ||||
| 401 | if (KMP_COMPARE_AND_STORE_ACQ32(&(this_thr->th.th_used_in_team), 2,__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(this_thr ->th.th_used_in_team)), (kmp_uint32)(2), (kmp_uint32)(0)) | ||||
| 402 | 0)__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(this_thr ->th.th_used_in_team)), (kmp_uint32)(2), (kmp_uint32)(0)) || | ||||
| 403 | this_thr->th.th_used_in_team.load() == 0) { | ||||
| 404 | my_flag.wait(this_thr, true USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 405 | } | ||||
| 406 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | ||||
| 407 | if ((__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 && itt_sync_obj == NULL__null) || KMP_ITT_DEBUG0) { | ||||
| 408 | // In fork barrier where we could not get the object reliably | ||||
| 409 | itt_sync_obj = | ||||
| 410 | __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1); | ||||
| 411 | // Cancel wait on previous parallel region... | ||||
| 412 | __kmp_itt_task_starting(itt_sync_obj); | ||||
| 413 | |||||
| 414 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) | ||||
| 415 | return; | ||||
| 416 | |||||
| 417 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); | ||||
| 418 | if (itt_sync_obj != NULL__null) | ||||
| 419 | // Call prepare as early as possible for "new" barrier | ||||
| 420 | __kmp_itt_task_finished(itt_sync_obj); | ||||
| 421 | } else | ||||
| 422 | #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ | ||||
| 423 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) | ||||
| 424 | return; | ||||
| 425 | } | ||||
| 426 | if (this_thr->th.th_used_in_team.load() != 1 && | ||||
| 427 | this_thr->th.th_used_in_team.load() != 3) // spurious wake-up? | ||||
| 428 | continue; | ||||
| 429 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) | ||||
| 430 | return; | ||||
| 431 | |||||
| 432 | // At this point, the thread thinks it is in use in a team, or in | ||||
| 433 | // transition to be used in a team, but it might have reached this barrier | ||||
| 434 | // before it was marked unused by the team. Unused threads are awoken and | ||||
| 435 | // shifted to wait on local thread struct elsewhere. It also might reach | ||||
| 436 | // this point by being picked up for use by a different team. Either way, | ||||
| 437 | // we need to update the tid. | ||||
| 438 | tid = __kmp_tid_from_gtid(gtid); | ||||
| 439 | team = this_thr->th.th_team; | ||||
| 440 | KMP_DEBUG_ASSERT(tid >= 0)if (!(tid >= 0)) { __kmp_debug_assert("tid >= 0", "openmp/runtime/src/kmp_barrier.cpp" , 440); }; | ||||
| 441 | KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_barrier.cpp" , 441); }; | ||||
| 442 | b = team->t.b; | ||||
| 443 | my_current_iter = b->iter[tid].iter; | ||||
| 444 | next_go = my_current_iter + distributedBarrier::MAX_ITERS; | ||||
| 445 | my_go_index = tid / b->threads_per_go; | ||||
| 446 | if (this_thr->th.th_used_in_team.load() == 3) { | ||||
| 447 | KMP_COMPARE_AND_STORE_ACQ32(&(this_thr->th.th_used_in_team), 3, 1)__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(this_thr ->th.th_used_in_team)), (kmp_uint32)(3), (kmp_uint32)(1)); | ||||
| 448 | } | ||||
| 449 | // Check if go flag is set | ||||
| 450 | if (b->go[my_go_index].go.load() != next_go) { | ||||
| 451 | // Wait on go flag on team | ||||
| 452 | kmp_atomic_flag_64<false, true> my_flag( | ||||
| 453 | &(b->go[my_go_index].go), next_go, &(b->sleep[tid].sleep)); | ||||
| 454 | my_flag.wait(this_thr, true USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 455 | KMP_DEBUG_ASSERT(my_current_iter == b->iter[tid].iter ||if (!(my_current_iter == b->iter[tid].iter || b->iter[tid ].iter == 0)) { __kmp_debug_assert("my_current_iter == b->iter[tid].iter || b->iter[tid].iter == 0" , "openmp/runtime/src/kmp_barrier.cpp", 456); } | ||||
| 456 | b->iter[tid].iter == 0)if (!(my_current_iter == b->iter[tid].iter || b->iter[tid ].iter == 0)) { __kmp_debug_assert("my_current_iter == b->iter[tid].iter || b->iter[tid].iter == 0" , "openmp/runtime/src/kmp_barrier.cpp", 456); }; | ||||
| 457 | KMP_DEBUG_ASSERT(b->sleep[tid].sleep == false)if (!(b->sleep[tid].sleep == false)) { __kmp_debug_assert( "b->sleep[tid].sleep == false", "openmp/runtime/src/kmp_barrier.cpp" , 457); }; | ||||
| 458 | } | ||||
| 459 | |||||
| 460 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) | ||||
| 461 | return; | ||||
| 462 | // At this point, the thread's go location was set. This means the primary | ||||
| 463 | // thread is safely in the barrier, and so this thread's data is | ||||
| 464 | // up-to-date, but we should check again that this thread is really in | ||||
| 465 | // use in the team, as it could have been woken up for the purpose of | ||||
| 466 | // changing team size, or reaping threads at shutdown. | ||||
| 467 | if (this_thr->th.th_used_in_team.load() == 1) | ||||
| 468 | break; | ||||
| 469 | } while (1); | ||||
| 470 | |||||
| 471 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) | ||||
| 472 | return; | ||||
| 473 | |||||
| 474 | group_leader = ((tid % b->threads_per_group) == 0); | ||||
| 475 | if (group_leader) { | ||||
| 476 | // Tell all the threads in my group they can go! | ||||
| 477 | for (size_t go_idx = my_go_index + 1; | ||||
| 478 | go_idx < my_go_index + b->gos_per_group; go_idx++) { | ||||
| 479 | b->go[go_idx].go.store(next_go); | ||||
| 480 | } | ||||
| 481 | // Fence added so that workers can see changes to go. sfence inadequate. | ||||
| 482 | KMP_MFENCE()if (__builtin_expect(!!(!__kmp_cpuinfo.initialized), 0)) { __kmp_query_cpuid (&__kmp_cpuinfo); } if (__kmp_cpuinfo.flags.sse2) { __sync_synchronize (); }; | ||||
| 483 | } | ||||
| 484 | |||||
| 485 | #if KMP_BARRIER_ICV_PUSH1 | ||||
| 486 | if (propagate_icvs) { // copy ICVs to final dest | ||||
| 487 | __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, | ||||
| 488 | tid, FALSE0); | ||||
| 489 | copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, | ||||
| 490 | (kmp_internal_control_t *)team->t.b->team_icvs); | ||||
| 491 | copy_icvs(&thr_bar->th_fixed_icvs, | ||||
| 492 | &team->t.t_implicit_task_taskdata[tid].td_icvs); | ||||
| 493 | } | ||||
| 494 | #endif | ||||
| 495 | if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647) && group_leader) { | ||||
| 496 | // This thread is now awake and participating in the barrier; | ||||
| 497 | // wake up the other threads in the group | ||||
| 498 | size_t nproc = this_thr->th.th_team_nproc; | ||||
| 499 | size_t group_end = tid + b->threads_per_group; | ||||
| 500 | if (nproc < group_end) | ||||
| 501 | group_end = nproc; | ||||
| 502 | __kmp_dist_barrier_wakeup(bt, team, tid + 1, group_end, 1, tid); | ||||
| 503 | } | ||||
| 504 | } else { // Primary thread | ||||
| 505 | team = this_thr->th.th_team; | ||||
| 506 | b = team->t.b; | ||||
| 507 | my_current_iter = b->iter[tid].iter; | ||||
| 508 | next_go = my_current_iter + distributedBarrier::MAX_ITERS; | ||||
| 509 | #if KMP_BARRIER_ICV_PUSH1 | ||||
| 510 | if (propagate_icvs) { | ||||
| 511 | // primary thread has ICVs in final destination; copy | ||||
| 512 | copy_icvs(&thr_bar->th_fixed_icvs, | ||||
| 513 | &team->t.t_implicit_task_taskdata[tid].td_icvs); | ||||
| 514 | } | ||||
| 515 | #endif | ||||
| 516 | // Tell all the group leaders they can go! | ||||
| 517 | for (size_t go_idx = 0; go_idx < b->num_gos; go_idx += b->gos_per_group) { | ||||
| 518 | b->go[go_idx].go.store(next_go); | ||||
| 519 | } | ||||
| 520 | |||||
| 521 | if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) { | ||||
| 522 | // Wake-up the group leaders | ||||
| 523 | size_t nproc = this_thr->th.th_team_nproc; | ||||
| 524 | __kmp_dist_barrier_wakeup(bt, team, tid + b->threads_per_group, nproc, | ||||
| 525 | b->threads_per_group, tid); | ||||
| 526 | } | ||||
| 527 | |||||
| 528 | // Tell all the threads in my group they can go! | ||||
| 529 | for (size_t go_idx = 1; go_idx < b->gos_per_group; go_idx++) { | ||||
| 530 | b->go[go_idx].go.store(next_go); | ||||
| 531 | } | ||||
| 532 | |||||
| 533 | // Fence added so that workers can see changes to go. sfence inadequate. | ||||
| 534 | KMP_MFENCE()if (__builtin_expect(!!(!__kmp_cpuinfo.initialized), 0)) { __kmp_query_cpuid (&__kmp_cpuinfo); } if (__kmp_cpuinfo.flags.sse2) { __sync_synchronize (); }; | ||||
| 535 | |||||
| 536 | if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) { | ||||
| 537 | // Wake-up the other threads in my group | ||||
| 538 | size_t nproc = this_thr->th.th_team_nproc; | ||||
| 539 | size_t group_end = tid + b->threads_per_group; | ||||
| 540 | if (nproc < group_end) | ||||
| 541 | group_end = nproc; | ||||
| 542 | __kmp_dist_barrier_wakeup(bt, team, tid + 1, group_end, 1, tid); | ||||
| 543 | } | ||||
| 544 | } | ||||
| 545 | // Update to next iteration | ||||
| 546 | KMP_ASSERT(my_current_iter == b->iter[tid].iter)if (!(my_current_iter == b->iter[tid].iter)) { __kmp_debug_assert ("my_current_iter == b->iter[tid].iter", "openmp/runtime/src/kmp_barrier.cpp" , 546); }; | ||||
| 547 | b->iter[tid].iter = (b->iter[tid].iter + 1) % distributedBarrier::MAX_ITERS; | ||||
| 548 | |||||
| 549 | KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_release: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | ||||
| 550 | 20, ("__kmp_dist_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_release: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | ||||
| 551 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_release: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); }; | ||||
| 552 | } | ||||
| 553 | |||||
| 554 | // Linear Barrier | ||||
| 555 | template <bool cancellable = false> | ||||
| 556 | static bool __kmp_linear_barrier_gather_template( | ||||
| 557 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, | ||||
| 558 | void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) { | ||||
| 559 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_gather)((void)0); | ||||
| 560 | kmp_team_t *team = this_thr->th.th_team; | ||||
| 561 | kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; | ||||
| 562 | kmp_info_t **other_threads = team->t.t_threads; | ||||
| 563 | |||||
| 564 | KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | ||||
| 565 | 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | ||||
| 566 | ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | ||||
| 567 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n" , gtid, team->t.t_id, tid, bt); }; | ||||
| 568 | KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid])if (!(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid ])) { __kmp_debug_assert("this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]" , "openmp/runtime/src/kmp_barrier.cpp", 568); }; | ||||
| 569 | |||||
| 570 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | ||||
| 571 | // Barrier imbalance - save arrive time to the thread | ||||
| 572 | if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { | ||||
| 573 | this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = | ||||
| 574 | __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0(); | ||||
| 575 | } | ||||
| 576 | #endif | ||||
| 577 | // We now perform a linear reduction to signal that all of the threads have | ||||
| 578 | // arrived. | ||||
| 579 | if (!KMP_MASTER_TID(tid)(0 == (tid))) { | ||||
| 580 | KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)" "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar ->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 << 2)); } | ||||
| 581 | ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)" "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar ->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 << 2)); } | ||||
| 582 | "arrived(%p): %llu => %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)" "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar ->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 << 2)); } | ||||
| 583 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(0, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)" "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar ->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 << 2)); } | ||||
| 584 | team->t.t_id, 0, &thr_bar->b_arrived, thr_bar->b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)" "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar ->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 << 2)); } | ||||
| 585 | thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)" "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar ->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 << 2)); }; | ||||
| 586 | // Mark arrival to primary thread | ||||
| 587 | /* After performing this write, a worker thread may not assume that the team | ||||
| 588 | is valid any more - it could be deallocated by the primary thread at any | ||||
| 589 | time. */ | ||||
| 590 | kmp_flag_64<> flag(&thr_bar->b_arrived, other_threads[0]); | ||||
| 591 | flag.release(); | ||||
| 592 | } else { | ||||
| 593 | kmp_balign_team_t *team_bar = &team->t.t_bar[bt]; | ||||
| 594 | int nproc = this_thr->th.th_team_nproc; | ||||
| 595 | int i; | ||||
| 596 | // Don't have to worry about sleep bit here or atomic since team setting | ||||
| 597 | kmp_uint64 new_state = team_bar->b_arrived + KMP_BARRIER_STATE_BUMP(1 << 2); | ||||
| 598 | |||||
| 599 | // Collect all the worker team member threads. | ||||
| 600 | for (i = 1; i < nproc; ++i) { | ||||
| 601 | #if KMP_CACHE_MANAGE | ||||
| 602 | // Prefetch next thread's arrived count | ||||
| 603 | if (i + 1 < nproc) | ||||
| 604 | KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_arrived); | ||||
| 605 | #endif /* KMP_CACHE_MANAGE */ | ||||
| 606 | KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (i, team), team->t.t_id, i, &other_threads[i]->th.th_bar [bt].bb.b_arrived, new_state); } | ||||
| 607 | "arrived(%p) == %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (i, team), team->t.t_id, i, &other_threads[i]->th.th_bar [bt].bb.b_arrived, new_state); } | ||||
| 608 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (i, team), team->t.t_id, i, &other_threads[i]->th.th_bar [bt].bb.b_arrived, new_state); } | ||||
| 609 | team->t.t_id, i,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (i, team), team->t.t_id, i, &other_threads[i]->th.th_bar [bt].bb.b_arrived, new_state); } | ||||
| 610 | &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (i, team), team->t.t_id, i, &other_threads[i]->th.th_bar [bt].bb.b_arrived, new_state); }; | ||||
| 611 | |||||
| 612 | // Wait for worker thread to arrive | ||||
| 613 | if (cancellable) { | ||||
| 614 | kmp_flag_64<true, false> flag( | ||||
| 615 | &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state); | ||||
| 616 | if (flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj)) | ||||
| 617 | return true; | ||||
| 618 | } else { | ||||
| 619 | kmp_flag_64<> flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived, | ||||
| 620 | new_state); | ||||
| 621 | flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 622 | } | ||||
| 623 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | ||||
| 624 | // Barrier imbalance - write min of the thread time and the other thread | ||||
| 625 | // time to the thread. | ||||
| 626 | if (__kmp_forkjoin_frames_mode == 2) { | ||||
| 627 | this_thr->th.th_bar_min_time = KMP_MIN(((this_thr->th.th_bar_min_time) < (other_threads[i]-> th.th_bar_min_time) ? (this_thr->th.th_bar_min_time) : (other_threads [i]->th.th_bar_min_time)) | ||||
| 628 | this_thr->th.th_bar_min_time, other_threads[i]->th.th_bar_min_time)((this_thr->th.th_bar_min_time) < (other_threads[i]-> th.th_bar_min_time) ? (this_thr->th.th_bar_min_time) : (other_threads [i]->th.th_bar_min_time)); | ||||
| 629 | } | ||||
| 630 | #endif | ||||
| 631 | if (reduce) { | ||||
| 632 | KA_TRACE(100,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n" , gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team ->t.t_id, i); } | ||||
| 633 | ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n" , gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team ->t.t_id, i); } | ||||
| 634 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n" , gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team ->t.t_id, i); } | ||||
| 635 | team->t.t_id, i))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n" , gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team ->t.t_id, i); }; | ||||
| 636 | OMPT_REDUCTION_DECL(this_thr, gtid)ompt_data_t *my_task_data = (&(this_thr->th.th_current_task ->ompt_task_info.task_data)); ompt_data_t *my_parallel_data = (&(this_thr->th.th_team->t.ompt_team_info.parallel_data )); void *return_address = __ompt_load_return_address(gtid);; | ||||
| 637 | OMPT_REDUCTION_BEGINif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction ) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction , ompt_scope_begin, my_parallel_data, my_task_data, return_address ); }; | ||||
| 638 | (*reduce)(this_thr->th.th_local.reduce_data, | ||||
| 639 | other_threads[i]->th.th_local.reduce_data); | ||||
| 640 | OMPT_REDUCTION_ENDif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction ) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction , ompt_scope_end, my_parallel_data, my_task_data, return_address ); }; | ||||
| 641 | } | ||||
| 642 | } | ||||
| 643 | // Don't have to worry about sleep bit here or atomic since team setting | ||||
| 644 | team_bar->b_arrived = new_state; | ||||
| 645 | KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d " "arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team-> t.t_id, &team_bar->b_arrived, new_state); } | ||||
| 646 | "arrived(%p) = %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d " "arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team-> t.t_id, &team_bar->b_arrived, new_state); } | ||||
| 647 | gtid, team->t.t_id, tid, team->t.t_id, &team_bar->b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d " "arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team-> t.t_id, &team_bar->b_arrived, new_state); } | ||||
| 648 | new_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d " "arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team-> t.t_id, &team_bar->b_arrived, new_state); }; | ||||
| 649 | } | ||||
| 650 | KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | ||||
| 651 | 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | ||||
| 652 | ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | ||||
| 653 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); }; | ||||
| 654 | return false; | ||||
| 655 | } | ||||
| 656 | |||||
| 657 | template <bool cancellable = false> | ||||
| 658 | static bool __kmp_linear_barrier_release_template( | ||||
| 659 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, | ||||
| 660 | int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) { | ||||
| 661 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_release)((void)0); | ||||
| 662 | kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; | ||||
| 663 | kmp_team_t *team; | ||||
| 664 | |||||
| 665 | if (KMP_MASTER_TID(tid)(0 == (tid))) { | ||||
| 666 | unsigned int i; | ||||
| 667 | kmp_uint32 nproc = this_thr->th.th_team_nproc; | ||||
| 668 | kmp_info_t **other_threads; | ||||
| 669 | |||||
| 670 | team = __kmp_threads[gtid]->th.th_team; | ||||
| 671 | KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null", "openmp/runtime/src/kmp_barrier.cpp", 671); }; | ||||
| 672 | other_threads = team->t.t_threads; | ||||
| 673 | |||||
| 674 | KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) primary enter for "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) primary enter for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); } | ||||
| 675 | "barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) primary enter for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); } | ||||
| 676 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) primary enter for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); }; | ||||
| 677 | |||||
| 678 | if (nproc > 1) { | ||||
| 679 | #if KMP_BARRIER_ICV_PUSH1 | ||||
| 680 | { | ||||
| 681 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy)((void)0); | ||||
| 682 | if (propagate_icvs) { | ||||
| 683 | ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs)((void)0); | ||||
| 684 | for (i = 1; i < nproc; ++i) { | ||||
| 685 | __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i], | ||||
| 686 | team, i, FALSE0); | ||||
| 687 | ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs,copy_icvs((&team->t.t_implicit_task_taskdata[i].td_icvs ), (&team->t.t_implicit_task_taskdata[0].td_icvs)) | ||||
| 688 | &team->t.t_implicit_task_taskdata[0].td_icvs)copy_icvs((&team->t.t_implicit_task_taskdata[i].td_icvs ), (&team->t.t_implicit_task_taskdata[0].td_icvs)); | ||||
| 689 | } | ||||
| 690 | ngo_sync()((void)0); | ||||
| 691 | } | ||||
| 692 | } | ||||
| 693 | #endif // KMP_BARRIER_ICV_PUSH | ||||
| 694 | |||||
| 695 | // Now, release all of the worker threads | ||||
| 696 | for (i = 1; i < nproc; ++i) { | ||||
| 697 | #if KMP_CACHE_MANAGE | ||||
| 698 | // Prefetch next thread's go flag | ||||
| 699 | if (i + 1 < nproc) | ||||
| 700 | KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_go); | ||||
| 701 | #endif /* KMP_CACHE_MANAGE */ | ||||
| 702 | KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) " "go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads [i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads [i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar [bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1 << 2)); } | ||||
| 703 | 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) " "go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads [i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads [i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar [bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1 << 2)); } | ||||
| 704 | ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) " "go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads [i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads [i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar [bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1 << 2)); } | ||||
| 705 | "go(%p): %u => %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) " "go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads [i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads [i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar [bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1 << 2)); } | ||||
| 706 | gtid, team->t.t_id, tid, other_threads[i]->th.th_info.ds.ds_gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) " "go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads [i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads [i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar [bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1 << 2)); } | ||||
| 707 | team->t.t_id, i, &other_threads[i]->th.th_bar[bt].bb.b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) " "go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads [i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads [i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar [bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1 << 2)); } | ||||
| 708 | other_threads[i]->th.th_bar[bt].bb.b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) " "go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads [i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads [i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar [bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1 << 2)); } | ||||
| 709 | other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) " "go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads [i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads [i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar [bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1 << 2)); }; | ||||
| 710 | kmp_flag_64<> flag(&other_threads[i]->th.th_bar[bt].bb.b_go, | ||||
| 711 | other_threads[i]); | ||||
| 712 | flag.release(); | ||||
| 713 | } | ||||
| 714 | } | ||||
| 715 | } else { // Wait for the PRIMARY thread to release us | ||||
| 716 | KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n" , gtid, &thr_bar->b_go, (1 << 2)); } | ||||
| 717 | gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n" , gtid, &thr_bar->b_go, (1 << 2)); }; | ||||
| 718 | if (cancellable) { | ||||
| 719 | kmp_flag_64<true, false> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP(1 << 2)); | ||||
| 720 | if (flag.wait(this_thr, TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj)) | ||||
| 721 | return true; | ||||
| 722 | } else { | ||||
| 723 | kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP(1 << 2)); | ||||
| 724 | flag.wait(this_thr, TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 725 | } | ||||
| 726 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | ||||
| 727 | if ((__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 && itt_sync_obj == NULL__null) || KMP_ITT_DEBUG0) { | ||||
| 728 | // In a fork barrier; cannot get the object reliably (or ITTNOTIFY is | ||||
| 729 | // disabled) | ||||
| 730 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1); | ||||
| 731 | // Cancel wait on previous parallel region... | ||||
| 732 | __kmp_itt_task_starting(itt_sync_obj); | ||||
| 733 | |||||
| 734 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) | ||||
| 735 | return false; | ||||
| 736 | |||||
| 737 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); | ||||
| 738 | if (itt_sync_obj != NULL__null) | ||||
| 739 | // Call prepare as early as possible for "new" barrier | ||||
| 740 | __kmp_itt_task_finished(itt_sync_obj); | ||||
| 741 | } else | ||||
| 742 | #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ | ||||
| 743 | // Early exit for reaping threads releasing forkjoin barrier | ||||
| 744 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) | ||||
| 745 | return false; | ||||
| 746 | // The worker thread may now assume that the team is valid. | ||||
| 747 | #ifdef KMP_DEBUG1 | ||||
| 748 | tid = __kmp_tid_from_gtid(gtid); | ||||
| 749 | team = __kmp_threads[gtid]->th.th_team; | ||||
| 750 | #endif | ||||
| 751 | KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null", "openmp/runtime/src/kmp_barrier.cpp", 751); }; | ||||
| 752 | TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE)(thr_bar->b_go) = (0); | ||||
| 753 | KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n" , gtid, team->t.t_id, tid, &thr_bar->b_go, 0); } | ||||
| 754 | ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n" , gtid, team->t.t_id, tid, &thr_bar->b_go, 0); } | ||||
| 755 | gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n" , gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }; | ||||
| 756 | KMP_MB(); // Flush all pending memory write invalidates. | ||||
| 757 | } | ||||
| 758 | KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | ||||
| 759 | 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | ||||
| 760 | ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | ||||
| 761 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); }; | ||||
| 762 | return false; | ||||
| 763 | } | ||||
| 764 | |||||
| 765 | static void __kmp_linear_barrier_gather( | ||||
| 766 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, | ||||
| 767 | void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) { | ||||
| 768 | __kmp_linear_barrier_gather_template<false>( | ||||
| 769 | bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 770 | } | ||||
| 771 | |||||
| 772 | static bool __kmp_linear_barrier_gather_cancellable( | ||||
| 773 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, | ||||
| 774 | void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) { | ||||
| 775 | return __kmp_linear_barrier_gather_template<true>( | ||||
| 776 | bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 777 | } | ||||
| 778 | |||||
| 779 | static void __kmp_linear_barrier_release( | ||||
| 780 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, | ||||
| 781 | int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) { | ||||
| 782 | __kmp_linear_barrier_release_template<false>( | ||||
| 783 | bt, this_thr, gtid, tid, propagate_icvs USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 784 | } | ||||
| 785 | |||||
| 786 | static bool __kmp_linear_barrier_release_cancellable( | ||||
| 787 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, | ||||
| 788 | int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) { | ||||
| 789 | return __kmp_linear_barrier_release_template<true>( | ||||
| 790 | bt, this_thr, gtid, tid, propagate_icvs USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 791 | } | ||||
| 792 | |||||
| 793 | // Tree barrier | ||||
| 794 | static void __kmp_tree_barrier_gather( | ||||
| 795 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, | ||||
| 796 | void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) { | ||||
| 797 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_gather)((void)0); | ||||
| 798 | kmp_team_t *team = this_thr->th.th_team; | ||||
| 799 | kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; | ||||
| 800 | kmp_info_t **other_threads = team->t.t_threads; | ||||
| 801 | kmp_uint32 nproc = this_thr->th.th_team_nproc; | ||||
| 802 | kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt]; | ||||
| 803 | kmp_uint32 branch_factor = 1 << branch_bits; | ||||
| 804 | kmp_uint32 child; | ||||
| 805 | kmp_uint32 child_tid; | ||||
| 806 | kmp_uint64 new_state = 0; | ||||
| 807 | |||||
| 808 | KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | ||||
| 809 | 20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | ||||
| 810 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n" , gtid, team->t.t_id, tid, bt); }; | ||||
| 811 | KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid])if (!(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid ])) { __kmp_debug_assert("this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]" , "openmp/runtime/src/kmp_barrier.cpp", 811); }; | ||||
| 812 | |||||
| 813 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | ||||
| 814 | // Barrier imbalance - save arrive time to the thread | ||||
| 815 | if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { | ||||
| 816 | this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = | ||||
| 817 | __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0(); | ||||
| 818 | } | ||||
| 819 | #endif | ||||
| 820 | // Perform tree gather to wait until all threads have arrived; reduce any | ||||
| 821 | // required data as we go | ||||
| 822 | child_tid = (tid << branch_bits) + 1; | ||||
| 823 | if (child_tid < nproc) { | ||||
| 824 | // Parent threads wait for all their children to arrive | ||||
| 825 | new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP(1 << 2); | ||||
| 826 | child = 1; | ||||
| 827 | do { | ||||
| 828 | kmp_info_t *child_thr = other_threads[child_tid]; | ||||
| 829 | kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; | ||||
| 830 | #if KMP_CACHE_MANAGE | ||||
| 831 | // Prefetch next thread's arrived count | ||||
| 832 | if (child + 1 <= branch_factor && child_tid + 1 < nproc) | ||||
| 833 | KMP_CACHE_PREFETCH( | ||||
| 834 | &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_arrived); | ||||
| 835 | #endif /* KMP_CACHE_MANAGE */ | ||||
| 836 | KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_arrived, new_state); } | ||||
| 837 | ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_arrived, new_state); } | ||||
| 838 | "arrived(%p) == %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_arrived, new_state); } | ||||
| 839 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_arrived, new_state); } | ||||
| 840 | team->t.t_id, child_tid, &child_bar->b_arrived, new_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_arrived, new_state); }; | ||||
| 841 | // Wait for child to arrive | ||||
| 842 | kmp_flag_64<> flag(&child_bar->b_arrived, new_state); | ||||
| 843 | flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 844 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | ||||
| 845 | // Barrier imbalance - write min of the thread time and a child time to | ||||
| 846 | // the thread. | ||||
| 847 | if (__kmp_forkjoin_frames_mode == 2) { | ||||
| 848 | this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,((this_thr->th.th_bar_min_time) < (child_thr->th.th_bar_min_time ) ? (this_thr->th.th_bar_min_time) : (child_thr->th.th_bar_min_time )) | ||||
| 849 | child_thr->th.th_bar_min_time)((this_thr->th.th_bar_min_time) < (child_thr->th.th_bar_min_time ) ? (this_thr->th.th_bar_min_time) : (child_thr->th.th_bar_min_time )); | ||||
| 850 | } | ||||
| 851 | #endif | ||||
| 852 | if (reduce) { | ||||
| 853 | KA_TRACE(100,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n" , gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team ), team->t.t_id, child_tid); } | ||||
| 854 | ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n" , gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team ), team->t.t_id, child_tid); } | ||||
| 855 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n" , gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team ), team->t.t_id, child_tid); } | ||||
| 856 | team->t.t_id, child_tid))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n" , gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team ), team->t.t_id, child_tid); }; | ||||
| 857 | OMPT_REDUCTION_DECL(this_thr, gtid)ompt_data_t *my_task_data = (&(this_thr->th.th_current_task ->ompt_task_info.task_data)); ompt_data_t *my_parallel_data = (&(this_thr->th.th_team->t.ompt_team_info.parallel_data )); void *return_address = __ompt_load_return_address(gtid);; | ||||
| 858 | OMPT_REDUCTION_BEGINif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction ) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction , ompt_scope_begin, my_parallel_data, my_task_data, return_address ); }; | ||||
| 859 | (*reduce)(this_thr->th.th_local.reduce_data, | ||||
| 860 | child_thr->th.th_local.reduce_data); | ||||
| 861 | OMPT_REDUCTION_ENDif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction ) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction , ompt_scope_end, my_parallel_data, my_task_data, return_address ); }; | ||||
| 862 | } | ||||
| 863 | child++; | ||||
| 864 | child_tid++; | ||||
| 865 | } while (child <= branch_factor && child_tid < nproc); | ||||
| 866 | } | ||||
| 867 | |||||
| 868 | if (!KMP_MASTER_TID(tid)(0 == (tid))) { // Worker threads | ||||
| 869 | kmp_int32 parent_tid = (tid - 1) >> branch_bits; | ||||
| 870 | |||||
| 871 | KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid , &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar-> b_arrived + (1 << 2)); } | ||||
| 872 | ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid , &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar-> b_arrived + (1 << 2)); } | ||||
| 873 | "arrived(%p): %llu => %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid , &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar-> b_arrived + (1 << 2)); } | ||||
| 874 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid , &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar-> b_arrived + (1 << 2)); } | ||||
| 875 | team->t.t_id, parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid , &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar-> b_arrived + (1 << 2)); } | ||||
| 876 | thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid , &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar-> b_arrived + (1 << 2)); }; | ||||
| 877 | |||||
| 878 | // Mark arrival to parent thread | ||||
| 879 | /* After performing this write, a worker thread may not assume that the team | ||||
| 880 | is valid any more - it could be deallocated by the primary thread at any | ||||
| 881 | time. */ | ||||
| 882 | kmp_flag_64<> flag(&thr_bar->b_arrived, other_threads[parent_tid]); | ||||
| 883 | flag.release(); | ||||
| 884 | } else { | ||||
| 885 | // Need to update the team arrived pointer if we are the primary thread | ||||
| 886 | if (nproc > 1) // New value was already computed above | ||||
| 887 | team->t.t_bar[bt].b_arrived = new_state; | ||||
| 888 | else | ||||
| 889 | team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP(1 << 2); | ||||
| 890 | KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d " "arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team-> t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar [bt].b_arrived); } | ||||
| 891 | "arrived(%p) = %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d " "arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team-> t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar [bt].b_arrived); } | ||||
| 892 | gtid, team->t.t_id, tid, team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d " "arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team-> t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar [bt].b_arrived); } | ||||
| 893 | &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d " "arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team-> t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar [bt].b_arrived); }; | ||||
| 894 | } | ||||
| 895 | KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | ||||
| 896 | ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | ||||
| 897 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); }; | ||||
| 898 | } | ||||
| 899 | |||||
| 900 | static void __kmp_tree_barrier_release( | ||||
| 901 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, | ||||
| 902 | int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) { | ||||
| 903 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_release)((void)0); | ||||
| 904 | kmp_team_t *team; | ||||
| 905 | kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; | ||||
| 906 | kmp_uint32 nproc; | ||||
| 907 | kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt]; | ||||
| 908 | kmp_uint32 branch_factor = 1 << branch_bits; | ||||
| 909 | kmp_uint32 child; | ||||
| 910 | kmp_uint32 child_tid; | ||||
| 911 | |||||
| 912 | // Perform a tree release for all of the threads that have been gathered | ||||
| 913 | if (!KMP_MASTER_TID((0 == (tid)) | ||||
| 914 | tid)(0 == (tid))) { // Handle fork barrier workers who aren't part of a team yet | ||||
| 915 | KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n" , gtid, &thr_bar->b_go, (1 << 2)); } | ||||
| 916 | &thr_bar->b_go, KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n" , gtid, &thr_bar->b_go, (1 << 2)); }; | ||||
| 917 | // Wait for parent thread to release us | ||||
| 918 | kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP(1 << 2)); | ||||
| 919 | flag.wait(this_thr, TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 920 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | ||||
| 921 | if ((__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 && itt_sync_obj == NULL__null) || KMP_ITT_DEBUG0) { | ||||
| 922 | // In fork barrier where we could not get the object reliably (or | ||||
| 923 | // ITTNOTIFY is disabled) | ||||
| 924 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1); | ||||
| 925 | // Cancel wait on previous parallel region... | ||||
| 926 | __kmp_itt_task_starting(itt_sync_obj); | ||||
| 927 | |||||
| 928 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) | ||||
| 929 | return; | ||||
| 930 | |||||
| 931 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); | ||||
| 932 | if (itt_sync_obj != NULL__null) | ||||
| 933 | // Call prepare as early as possible for "new" barrier | ||||
| 934 | __kmp_itt_task_finished(itt_sync_obj); | ||||
| 935 | } else | ||||
| 936 | #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ | ||||
| 937 | // Early exit for reaping threads releasing forkjoin barrier | ||||
| 938 | if (bt
| ||||
| 939 | return; | ||||
| 940 | |||||
| 941 | // The worker thread may now assume that the team is valid. | ||||
| 942 | team = __kmp_threads[gtid]->th.th_team; | ||||
| 943 | KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null", "openmp/runtime/src/kmp_barrier.cpp", 943); }; | ||||
| 944 | tid = __kmp_tid_from_gtid(gtid); | ||||
| 945 | |||||
| 946 | TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE)(thr_bar->b_go) = (0); | ||||
| 947 | KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n" , gtid, team->t.t_id, tid, &thr_bar->b_go, 0); } | ||||
| 948 | ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n" , gtid, team->t.t_id, tid, &thr_bar->b_go, 0); } | ||||
| 949 | team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n" , gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }; | ||||
| 950 | KMP_MB(); // Flush all pending memory write invalidates. | ||||
| 951 | } else { | ||||
| 952 | team = __kmp_threads[gtid]->th.th_team; | ||||
| 953 | KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null", "openmp/runtime/src/kmp_barrier.cpp", 953); }; | ||||
| 954 | KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) primary enter for "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) primary enter for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); } | ||||
| 955 | "barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) primary enter for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); } | ||||
| 956 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) primary enter for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); }; | ||||
| 957 | } | ||||
| 958 | nproc = this_thr->th.th_team_nproc; | ||||
| 959 | child_tid = (tid << branch_bits) + 1; | ||||
| 960 | |||||
| 961 | if (child_tid < nproc) { | ||||
| 962 | kmp_info_t **other_threads = team->t.t_threads; | ||||
| 963 | child = 1; | ||||
| 964 | // Parent threads release all their children | ||||
| 965 | do { | ||||
| 966 | kmp_info_t *child_thr = other_threads[child_tid]; | ||||
| 967 | kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; | ||||
| 968 | #if KMP_CACHE_MANAGE | ||||
| 969 | // Prefetch next thread's go count | ||||
| 970 | if (child + 1 <= branch_factor && child_tid + 1 < nproc) | ||||
| 971 | KMP_CACHE_PREFETCH( | ||||
| 972 | &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_go); | ||||
| 973 | #endif /* KMP_CACHE_MANAGE */ | ||||
| 974 | |||||
| 975 | #if KMP_BARRIER_ICV_PUSH1 | ||||
| 976 | { | ||||
| 977 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy)((void)0); | ||||
| 978 | if (propagate_icvs) { | ||||
| 979 | __kmp_init_implicit_task(team->t.t_ident, | ||||
| 980 | team->t.t_threads[child_tid], team, | ||||
| 981 | child_tid, FALSE0); | ||||
| 982 | copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs, | ||||
| 983 | &team->t.t_implicit_task_taskdata[0].td_icvs); | ||||
| 984 | } | ||||
| 985 | } | ||||
| 986 | #endif // KMP_BARRIER_ICV_PUSH | ||||
| 987 | KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" "go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_go, child_bar->b_go, child_bar->b_go + (1 << 2)); } | ||||
| 988 | ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" "go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_go, child_bar->b_go, child_bar->b_go + (1 << 2)); } | ||||
| 989 | "go(%p): %u => %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" "go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_go, child_bar->b_go, child_bar->b_go + (1 << 2)); } | ||||
| 990 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" "go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_go, child_bar->b_go, child_bar->b_go + (1 << 2)); } | ||||
| 991 | team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" "go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_go, child_bar->b_go, child_bar->b_go + (1 << 2)); } | ||||
| 992 | child_bar->b_go + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" "go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_go, child_bar->b_go, child_bar->b_go + (1 << 2)); }; | ||||
| 993 | // Release child from barrier | ||||
| 994 | kmp_flag_64<> flag(&child_bar->b_go, child_thr); | ||||
| 995 | flag.release(); | ||||
| 996 | child++; | ||||
| 997 | child_tid++; | ||||
| 998 | } while (child <= branch_factor && child_tid < nproc); | ||||
| 999 | } | ||||
| 1000 | KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | ||||
| 1001 | 20, ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | ||||
| 1002 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); }; | ||||
| 1003 | } | ||||
| 1004 | |||||
| 1005 | // Hyper Barrier | ||||
| 1006 | static void __kmp_hyper_barrier_gather( | ||||
| 1007 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, | ||||
| 1008 | void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) { | ||||
| 1009 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_gather)((void)0); | ||||
| 1010 | kmp_team_t *team = this_thr->th.th_team; | ||||
| 1011 | kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; | ||||
| 1012 | kmp_info_t **other_threads = team->t.t_threads; | ||||
| 1013 | kmp_uint64 new_state = KMP_BARRIER_UNUSED_STATE(1 << 1); | ||||
| 1014 | kmp_uint32 num_threads = this_thr->th.th_team_nproc; | ||||
| 1015 | kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt]; | ||||
| 1016 | kmp_uint32 branch_factor = 1 << branch_bits; | ||||
| 1017 | kmp_uint32 offset; | ||||
| 1018 | kmp_uint32 level; | ||||
| 1019 | |||||
| 1020 | KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | ||||
| 1021 | 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | ||||
| 1022 | ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | ||||
| 1023 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n" , gtid, team->t.t_id, tid, bt); }; | ||||
| 1024 | KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid])if (!(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid ])) { __kmp_debug_assert("this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]" , "openmp/runtime/src/kmp_barrier.cpp", 1024); }; | ||||
| 1025 | |||||
| 1026 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | ||||
| 1027 | // Barrier imbalance - save arrive time to the thread | ||||
| 1028 | if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { | ||||
| 1029 | this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = | ||||
| 1030 | __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0(); | ||||
| 1031 | } | ||||
| 1032 | #endif | ||||
| 1033 | /* Perform a hypercube-embedded tree gather to wait until all of the threads | ||||
| 1034 | have arrived, and reduce any required data as we go. */ | ||||
| 1035 | kmp_flag_64<> p_flag(&thr_bar->b_arrived); | ||||
| 1036 | for (level = 0, offset = 1; offset < num_threads; | ||||
| 1037 | level += branch_bits, offset <<= branch_bits) { | ||||
| 1038 | kmp_uint32 child; | ||||
| 1039 | kmp_uint32 child_tid; | ||||
| 1040 | |||||
| 1041 | if (((tid >> level) & (branch_factor - 1)) != 0) { | ||||
| 1042 | kmp_int32 parent_tid = tid & ~((1 << (level + branch_bits)) - 1); | ||||
| 1043 | |||||
| 1044 | KMP_MB(); // Synchronize parent and child threads. | ||||
| 1045 | KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid , &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar-> b_arrived + (1 << 2)); } | ||||
| 1046 | ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid , &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar-> b_arrived + (1 << 2)); } | ||||
| 1047 | "arrived(%p): %llu => %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid , &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar-> b_arrived + (1 << 2)); } | ||||
| 1048 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid , &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar-> b_arrived + (1 << 2)); } | ||||
| 1049 | team->t.t_id, parent_tid, &thr_bar->b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid , &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar-> b_arrived + (1 << 2)); } | ||||
| 1050 | thr_bar->b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid , &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar-> b_arrived + (1 << 2)); } | ||||
| 1051 | thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid , &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar-> b_arrived + (1 << 2)); }; | ||||
| 1052 | // Mark arrival to parent thread | ||||
| 1053 | /* After performing this write (in the last iteration of the enclosing for | ||||
| 1054 | loop), a worker thread may not assume that the team is valid any more | ||||
| 1055 | - it could be deallocated by the primary thread at any time. */ | ||||
| 1056 | p_flag.set_waiter(other_threads[parent_tid]); | ||||
| 1057 | p_flag.release(); | ||||
| 1058 | break; | ||||
| 1059 | } | ||||
| 1060 | |||||
| 1061 | // Parent threads wait for children to arrive | ||||
| 1062 | if (new_state == KMP_BARRIER_UNUSED_STATE(1 << 1)) | ||||
| 1063 | new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP(1 << 2); | ||||
| 1064 | for (child = 1, child_tid = tid + (1 << level); | ||||
| 1065 | child < branch_factor && child_tid < num_threads; | ||||
| 1066 | child++, child_tid += (1 << level)) { | ||||
| 1067 | kmp_info_t *child_thr = other_threads[child_tid]; | ||||
| 1068 | kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; | ||||
| 1069 | #if KMP_CACHE_MANAGE | ||||
| 1070 | kmp_uint32 next_child_tid = child_tid + (1 << level); | ||||
| 1071 | // Prefetch next thread's arrived count | ||||
| 1072 | if (child + 1 < branch_factor && next_child_tid < num_threads) | ||||
| 1073 | KMP_CACHE_PREFETCH( | ||||
| 1074 | &other_threads[next_child_tid]->th.th_bar[bt].bb.b_arrived); | ||||
| 1075 | #endif /* KMP_CACHE_MANAGE */ | ||||
| 1076 | KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_arrived, new_state); } | ||||
| 1077 | ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_arrived, new_state); } | ||||
| 1078 | "arrived(%p) == %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_arrived, new_state); } | ||||
| 1079 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_arrived, new_state); } | ||||
| 1080 | team->t.t_id, child_tid, &child_bar->b_arrived, new_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_arrived, new_state); }; | ||||
| 1081 | // Wait for child to arrive | ||||
| 1082 | kmp_flag_64<> c_flag(&child_bar->b_arrived, new_state); | ||||
| 1083 | c_flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 1084 | KMP_MB(); // Synchronize parent and child threads. | ||||
| 1085 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | ||||
| 1086 | // Barrier imbalance - write min of the thread time and a child time to | ||||
| 1087 | // the thread. | ||||
| 1088 | if (__kmp_forkjoin_frames_mode == 2) { | ||||
| 1089 | this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,((this_thr->th.th_bar_min_time) < (child_thr->th.th_bar_min_time ) ? (this_thr->th.th_bar_min_time) : (child_thr->th.th_bar_min_time )) | ||||
| 1090 | child_thr->th.th_bar_min_time)((this_thr->th.th_bar_min_time) < (child_thr->th.th_bar_min_time ) ? (this_thr->th.th_bar_min_time) : (child_thr->th.th_bar_min_time )); | ||||
| 1091 | } | ||||
| 1092 | #endif | ||||
| 1093 | if (reduce) { | ||||
| 1094 | KA_TRACE(100,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n" , gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team ), team->t.t_id, child_tid); } | ||||
| 1095 | ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n" , gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team ), team->t.t_id, child_tid); } | ||||
| 1096 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n" , gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team ), team->t.t_id, child_tid); } | ||||
| 1097 | team->t.t_id, child_tid))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n" , gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team ), team->t.t_id, child_tid); }; | ||||
| 1098 | OMPT_REDUCTION_DECL(this_thr, gtid)ompt_data_t *my_task_data = (&(this_thr->th.th_current_task ->ompt_task_info.task_data)); ompt_data_t *my_parallel_data = (&(this_thr->th.th_team->t.ompt_team_info.parallel_data )); void *return_address = __ompt_load_return_address(gtid);; | ||||
| 1099 | OMPT_REDUCTION_BEGINif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction ) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction , ompt_scope_begin, my_parallel_data, my_task_data, return_address ); }; | ||||
| 1100 | (*reduce)(this_thr->th.th_local.reduce_data, | ||||
| 1101 | child_thr->th.th_local.reduce_data); | ||||
| 1102 | OMPT_REDUCTION_ENDif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction ) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction , ompt_scope_end, my_parallel_data, my_task_data, return_address ); }; | ||||
| 1103 | } | ||||
| 1104 | } | ||||
| 1105 | } | ||||
| 1106 | |||||
| 1107 | if (KMP_MASTER_TID(tid)(0 == (tid))) { | ||||
| 1108 | // Need to update the team arrived pointer if we are the primary thread | ||||
| 1109 | if (new_state == KMP_BARRIER_UNUSED_STATE(1 << 1)) | ||||
| 1110 | team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP(1 << 2); | ||||
| 1111 | else | ||||
| 1112 | team->t.t_bar[bt].b_arrived = new_state; | ||||
| 1113 | KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d " "arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team-> t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar [bt].b_arrived); } | ||||
| 1114 | "arrived(%p) = %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d " "arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team-> t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar [bt].b_arrived); } | ||||
| 1115 | gtid, team->t.t_id, tid, team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d " "arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team-> t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar [bt].b_arrived); } | ||||
| 1116 | &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d " "arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team-> t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar [bt].b_arrived); }; | ||||
| 1117 | } | ||||
| 1118 | KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | ||||
| 1119 | 20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | ||||
| 1120 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); }; | ||||
| 1121 | } | ||||
| 1122 | |||||
| 1123 | // The reverse versions seem to beat the forward versions overall | ||||
| 1124 | #define KMP_REVERSE_HYPER_BAR | ||||
| 1125 | static void __kmp_hyper_barrier_release( | ||||
| 1126 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, | ||||
| 1127 | int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) { | ||||
| 1128 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_release)((void)0); | ||||
| 1129 | kmp_team_t *team; | ||||
| 1130 | kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; | ||||
| 1131 | kmp_info_t **other_threads; | ||||
| 1132 | kmp_uint32 num_threads; | ||||
| 1133 | kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt]; | ||||
| 1134 | kmp_uint32 branch_factor = 1 << branch_bits; | ||||
| 1135 | kmp_uint32 child; | ||||
| 1136 | kmp_uint32 child_tid; | ||||
| 1137 | kmp_uint32 offset; | ||||
| 1138 | kmp_uint32 level; | ||||
| 1139 | |||||
| 1140 | /* Perform a hypercube-embedded tree release for all of the threads that have | ||||
| 1141 | been gathered. If KMP_REVERSE_HYPER_BAR is defined (default) the threads | ||||
| 1142 | are released in the reverse order of the corresponding gather, otherwise | ||||
| 1143 | threads are released in the same order. */ | ||||
| 1144 | if (KMP_MASTER_TID(tid)(0 == (tid))) { // primary thread | ||||
| 1145 | team = __kmp_threads[gtid]->th.th_team; | ||||
| 1146 | KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null", "openmp/runtime/src/kmp_barrier.cpp", 1146); }; | ||||
| 1147 | KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) primary enter for "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) primary enter for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); } | ||||
| 1148 | "barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) primary enter for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); } | ||||
| 1149 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) primary enter for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); }; | ||||
| 1150 | #if KMP_BARRIER_ICV_PUSH1 | ||||
| 1151 | if (propagate_icvs) { // primary already has ICVs in final destination; copy | ||||
| 1152 | copy_icvs(&thr_bar->th_fixed_icvs, | ||||
| 1153 | &team->t.t_implicit_task_taskdata[tid].td_icvs); | ||||
| 1154 | } | ||||
| 1155 | #endif | ||||
| 1156 | } else { // Handle fork barrier workers who aren't part of a team yet | ||||
| 1157 | KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n" , gtid, &thr_bar->b_go, (1 << 2)); } | ||||
| 1158 | &thr_bar->b_go, KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n" , gtid, &thr_bar->b_go, (1 << 2)); }; | ||||
| 1159 | // Wait for parent thread to release us | ||||
| 1160 | kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP(1 << 2)); | ||||
| 1161 | flag.wait(this_thr, TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 1162 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | ||||
| 1163 | if ((__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 && itt_sync_obj == NULL__null) || KMP_ITT_DEBUG0) { | ||||
| 1164 | // In fork barrier where we could not get the object reliably | ||||
| 1165 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1); | ||||
| 1166 | // Cancel wait on previous parallel region... | ||||
| 1167 | __kmp_itt_task_starting(itt_sync_obj); | ||||
| 1168 | |||||
| 1169 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) | ||||
| 1170 | return; | ||||
| 1171 | |||||
| 1172 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); | ||||
| 1173 | if (itt_sync_obj != NULL__null) | ||||
| 1174 | // Call prepare as early as possible for "new" barrier | ||||
| 1175 | __kmp_itt_task_finished(itt_sync_obj); | ||||
| 1176 | } else | ||||
| 1177 | #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ | ||||
| 1178 | // Early exit for reaping threads releasing forkjoin barrier | ||||
| 1179 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) | ||||
| 1180 | return; | ||||
| 1181 | |||||
| 1182 | // The worker thread may now assume that the team is valid. | ||||
| 1183 | team = __kmp_threads[gtid]->th.th_team; | ||||
| 1184 | KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null", "openmp/runtime/src/kmp_barrier.cpp", 1184); }; | ||||
| 1185 | tid = __kmp_tid_from_gtid(gtid); | ||||
| 1186 | |||||
| 1187 | TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE)(thr_bar->b_go) = (0); | ||||
| 1188 | KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n" , gtid, team->t.t_id, tid, &thr_bar->b_go, 0); } | ||||
| 1189 | ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n" , gtid, team->t.t_id, tid, &thr_bar->b_go, 0); } | ||||
| 1190 | gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n" , gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }; | ||||
| 1191 | KMP_MB(); // Flush all pending memory write invalidates. | ||||
| 1192 | } | ||||
| 1193 | num_threads = this_thr->th.th_team_nproc; | ||||
| 1194 | other_threads = team->t.t_threads; | ||||
| 1195 | |||||
| 1196 | #ifdef KMP_REVERSE_HYPER_BAR | ||||
| 1197 | // Count up to correct level for parent | ||||
| 1198 | for (level = 0, offset = 1; | ||||
| 1199 | offset < num_threads && (((tid >> level) & (branch_factor - 1)) == 0); | ||||
| 1200 | level += branch_bits, offset <<= branch_bits) | ||||
| 1201 | ; | ||||
| 1202 | |||||
| 1203 | // Now go down from there | ||||
| 1204 | for (level -= branch_bits, offset >>= branch_bits; offset != 0; | ||||
| 1205 | level -= branch_bits, offset >>= branch_bits) | ||||
| 1206 | #else | ||||
| 1207 | // Go down the tree, level by level | ||||
| 1208 | for (level = 0, offset = 1; offset < num_threads; | ||||
| 1209 | level += branch_bits, offset <<= branch_bits) | ||||
| 1210 | #endif // KMP_REVERSE_HYPER_BAR | ||||
| 1211 | { | ||||
| 1212 | #ifdef KMP_REVERSE_HYPER_BAR | ||||
| 1213 | /* Now go in reverse order through the children, highest to lowest. | ||||
| 1214 | Initial setting of child is conservative here. */ | ||||
| 1215 | child = num_threads >> ((level == 0) ? level : level - 1); | ||||
| 1216 | for (child = (child < branch_factor - 1) ? child : branch_factor - 1, | ||||
| 1217 | child_tid = tid + (child << level); | ||||
| 1218 | child >= 1; child--, child_tid -= (1 << level)) | ||||
| 1219 | #else | ||||
| 1220 | if (((tid >> level) & (branch_factor - 1)) != 0) | ||||
| 1221 | // No need to go lower than this, since this is the level parent would be | ||||
| 1222 | // notified | ||||
| 1223 | break; | ||||
| 1224 | // Iterate through children on this level of the tree | ||||
| 1225 | for (child = 1, child_tid = tid + (1 << level); | ||||
| 1226 | child < branch_factor && child_tid < num_threads; | ||||
| 1227 | child++, child_tid += (1 << level)) | ||||
| 1228 | #endif // KMP_REVERSE_HYPER_BAR | ||||
| 1229 | { | ||||
| 1230 | if (child_tid >= num_threads) | ||||
| 1231 | continue; // Child doesn't exist so keep going | ||||
| 1232 | else { | ||||
| 1233 | kmp_info_t *child_thr = other_threads[child_tid]; | ||||
| 1234 | kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; | ||||
| 1235 | #if KMP_CACHE_MANAGE | ||||
| 1236 | kmp_uint32 next_child_tid = child_tid - (1 << level); | ||||
| 1237 | // Prefetch next thread's go count | ||||
| 1238 | #ifdef KMP_REVERSE_HYPER_BAR | ||||
| 1239 | if (child - 1 >= 1 && next_child_tid < num_threads) | ||||
| 1240 | #else | ||||
| 1241 | if (child + 1 < branch_factor && next_child_tid < num_threads) | ||||
| 1242 | #endif // KMP_REVERSE_HYPER_BAR | ||||
| 1243 | KMP_CACHE_PREFETCH( | ||||
| 1244 | &other_threads[next_child_tid]->th.th_bar[bt].bb.b_go); | ||||
| 1245 | #endif /* KMP_CACHE_MANAGE */ | ||||
| 1246 | |||||
| 1247 | #if KMP_BARRIER_ICV_PUSH1 | ||||
| 1248 | if (propagate_icvs) // push my fixed ICVs to my child | ||||
| 1249 | copy_icvs(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs); | ||||
| 1250 | #endif // KMP_BARRIER_ICV_PUSH | ||||
| 1251 | |||||
| 1252 | KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" "go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_go, child_bar->b_go, child_bar->b_go + (1 << 2)); } | ||||
| 1253 | 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" "go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_go, child_bar->b_go, child_bar->b_go + (1 << 2)); } | ||||
| 1254 | ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" "go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_go, child_bar->b_go, child_bar->b_go + (1 << 2)); } | ||||
| 1255 | "go(%p): %u => %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" "go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_go, child_bar->b_go, child_bar->b_go + (1 << 2)); } | ||||
| 1256 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" "go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_go, child_bar->b_go, child_bar->b_go + (1 << 2)); } | ||||
| 1257 | team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" "go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_go, child_bar->b_go, child_bar->b_go + (1 << 2)); } | ||||
| 1258 | child_bar->b_go + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" "go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_go, child_bar->b_go, child_bar->b_go + (1 << 2)); }; | ||||
| 1259 | // Release child from barrier | ||||
| 1260 | kmp_flag_64<> flag(&child_bar->b_go, child_thr); | ||||
| 1261 | flag.release(); | ||||
| 1262 | } | ||||
| 1263 | } | ||||
| 1264 | } | ||||
| 1265 | #if KMP_BARRIER_ICV_PUSH1 | ||||
| 1266 | if (propagate_icvs && | ||||
| 1267 | !KMP_MASTER_TID(tid)(0 == (tid))) { // copy ICVs locally to final dest | ||||
| 1268 | __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, | ||||
| 1269 | FALSE0); | ||||
| 1270 | copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, | ||||
| 1271 | &thr_bar->th_fixed_icvs); | ||||
| 1272 | } | ||||
| 1273 | #endif | ||||
| 1274 | KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | ||||
| 1275 | 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | ||||
| 1276 | ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | ||||
| 1277 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); }; | ||||
| 1278 | } | ||||
| 1279 | |||||
| 1280 | // Hierarchical Barrier | ||||
| 1281 | |||||
| 1282 | // Initialize thread barrier data | ||||
| 1283 | /* Initializes/re-initializes the hierarchical barrier data stored on a thread. | ||||
| 1284 | Performs the minimum amount of initialization required based on how the team | ||||
| 1285 | has changed. Returns true if leaf children will require both on-core and | ||||
| 1286 | traditional wake-up mechanisms. For example, if the team size increases, | ||||
| 1287 | threads already in the team will respond to on-core wakeup on their parent | ||||
| 1288 | thread, but threads newly added to the team will only be listening on the | ||||
| 1289 | their local b_go. */ | ||||
| 1290 | static bool __kmp_init_hierarchical_barrier_thread(enum barrier_type bt, | ||||
| 1291 | kmp_bstate_t *thr_bar, | ||||
| 1292 | kmp_uint32 nproc, int gtid, | ||||
| 1293 | int tid, kmp_team_t *team) { | ||||
| 1294 | // Checks to determine if (re-)initialization is needed | ||||
| 1295 | bool uninitialized = thr_bar->team == NULL__null; | ||||
| 1296 | bool team_changed = team != thr_bar->team; | ||||
| 1297 | bool team_sz_changed = nproc != thr_bar->nproc; | ||||
| 1298 | bool tid_changed = tid != thr_bar->old_tid; | ||||
| 1299 | bool retval = false; | ||||
| 1300 | |||||
| 1301 | if (uninitialized || team_sz_changed) { | ||||
| 1302 | __kmp_get_hierarchy(nproc, thr_bar); | ||||
| 1303 | } | ||||
| 1304 | |||||
| 1305 | if (uninitialized || team_sz_changed || tid_changed) { | ||||
| 1306 | thr_bar->my_level = thr_bar->depth - 1; // default for primary thread | ||||
| 1307 | thr_bar->parent_tid = -1; // default for primary thread | ||||
| 1308 | if (!KMP_MASTER_TID(tid)(0 == (tid))) { | ||||
| 1309 | // if not primary thread, find parent thread in hierarchy | ||||
| 1310 | kmp_uint32 d = 0; | ||||
| 1311 | while (d < thr_bar->depth) { // find parent based on level of thread in | ||||
| 1312 | // hierarchy, and note level | ||||
| 1313 | kmp_uint32 rem; | ||||
| 1314 | if (d == thr_bar->depth - 2) { // reached level right below the primary | ||||
| 1315 | thr_bar->parent_tid = 0; | ||||
| 1316 | thr_bar->my_level = d; | ||||
| 1317 | break; | ||||
| 1318 | } else if ((rem = tid % thr_bar->skip_per_level[d + 1]) != 0) { | ||||
| 1319 | // TODO: can we make the above op faster? | ||||
| 1320 | // thread is not a subtree root at next level, so this is max | ||||
| 1321 | thr_bar->parent_tid = tid - rem; | ||||
| 1322 | thr_bar->my_level = d; | ||||
| 1323 | break; | ||||
| 1324 | } | ||||
| 1325 | ++d; | ||||
| 1326 | } | ||||
| 1327 | } | ||||
| 1328 | __kmp_type_convert(7 - ((tid - thr_bar->parent_tid) / | ||||
| 1329 | (thr_bar->skip_per_level[thr_bar->my_level])), | ||||
| 1330 | &(thr_bar->offset)); | ||||
| 1331 | thr_bar->old_tid = tid; | ||||
| 1332 | thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING0; | ||||
| 1333 | thr_bar->team = team; | ||||
| 1334 | thr_bar->parent_bar = | ||||
| 1335 | &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb; | ||||
| 1336 | } | ||||
| 1337 | if (uninitialized || team_changed || tid_changed) { | ||||
| 1338 | thr_bar->team = team; | ||||
| 1339 | thr_bar->parent_bar = | ||||
| 1340 | &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb; | ||||
| 1341 | retval = true; | ||||
| 1342 | } | ||||
| 1343 | if (uninitialized || team_sz_changed || tid_changed) { | ||||
| 1344 | thr_bar->nproc = nproc; | ||||
| 1345 | thr_bar->leaf_kids = thr_bar->base_leaf_kids; | ||||
| 1346 | if (thr_bar->my_level == 0) | ||||
| 1347 | thr_bar->leaf_kids = 0; | ||||
| 1348 | if (thr_bar->leaf_kids && (kmp_uint32)tid + thr_bar->leaf_kids + 1 > nproc) | ||||
| 1349 | __kmp_type_convert(nproc - tid - 1, &(thr_bar->leaf_kids)); | ||||
| 1350 | thr_bar->leaf_state = 0; | ||||
| 1351 | for (int i = 0; i < thr_bar->leaf_kids; ++i) | ||||
| 1352 | ((char *)&(thr_bar->leaf_state))[7 - i] = 1; | ||||
| 1353 | } | ||||
| 1354 | return retval; | ||||
| 1355 | } | ||||
| 1356 | |||||
| 1357 | static void __kmp_hierarchical_barrier_gather( | ||||
| 1358 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, | ||||
| 1359 | void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) { | ||||
| 1360 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_gather)((void)0); | ||||
| 1361 | kmp_team_t *team = this_thr->th.th_team; | ||||
| 1362 | kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; | ||||
| 1363 | kmp_uint32 nproc = this_thr->th.th_team_nproc; | ||||
| 1364 | kmp_info_t **other_threads = team->t.t_threads; | ||||
| 1365 | kmp_uint64 new_state = 0; | ||||
| 1366 | |||||
| 1367 | int level = team->t.t_level; | ||||
| 1368 | if (other_threads[0] | ||||
| 1369 | ->th.th_teams_microtask) // are we inside the teams construct? | ||||
| 1370 | if (this_thr->th.th_teams_size.nteams > 1) | ||||
| 1371 | ++level; // level was not increased in teams construct for team_of_masters | ||||
| 1372 | if (level == 1) | ||||
| 1373 | thr_bar->use_oncore_barrier = 1; | ||||
| 1374 | else | ||||
| 1375 | thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested | ||||
| 1376 | |||||
| 1377 | KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); } | ||||
| 1378 | "barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); } | ||||
| 1379 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); }; | ||||
| 1380 | KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid])if (!(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid ])) { __kmp_debug_assert("this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]" , "openmp/runtime/src/kmp_barrier.cpp", 1380); }; | ||||
| 1381 | |||||
| 1382 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | ||||
| 1383 | // Barrier imbalance - save arrive time to the thread | ||||
| 1384 | if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { | ||||
| 1385 | this_thr->th.th_bar_arrive_time = __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0(); | ||||
| 1386 | } | ||||
| 1387 | #endif | ||||
| 1388 | |||||
| 1389 | (void)__kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid, | ||||
| 1390 | team); | ||||
| 1391 | |||||
| 1392 | if (thr_bar->my_level) { // not a leaf (my_level==0 means leaf) | ||||
| 1393 | kmp_int32 child_tid; | ||||
| 1394 | new_state = | ||||
| 1395 | (kmp_uint64)team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP(1 << 2); | ||||
| 1396 | if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME(2147483647) && | ||||
| 1397 | thr_bar->use_oncore_barrier) { | ||||
| 1398 | if (thr_bar->leaf_kids) { | ||||
| 1399 | // First, wait for leaf children to check-in on my b_arrived flag | ||||
| 1400 | kmp_uint64 leaf_state = | ||||
| 1401 | KMP_MASTER_TID(tid)(0 == (tid)) | ||||
| 1402 | ? thr_bar->b_arrived | thr_bar->leaf_state | ||||
| 1403 | : team->t.t_bar[bt].b_arrived | thr_bar->leaf_state; | ||||
| 1404 | KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting " "for leaf kids\n", gtid, team->t.t_id, tid); } | ||||
| 1405 | "for leaf kids\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting " "for leaf kids\n", gtid, team->t.t_id, tid); } | ||||
| 1406 | gtid, team->t.t_id, tid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting " "for leaf kids\n", gtid, team->t.t_id, tid); }; | ||||
| 1407 | kmp_flag_64<> flag(&thr_bar->b_arrived, leaf_state); | ||||
| 1408 | flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 1409 | if (reduce) { | ||||
| 1410 | OMPT_REDUCTION_DECL(this_thr, gtid)ompt_data_t *my_task_data = (&(this_thr->th.th_current_task ->ompt_task_info.task_data)); ompt_data_t *my_parallel_data = (&(this_thr->th.th_team->t.ompt_team_info.parallel_data )); void *return_address = __ompt_load_return_address(gtid);; | ||||
| 1411 | OMPT_REDUCTION_BEGINif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction ) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction , ompt_scope_begin, my_parallel_data, my_task_data, return_address ); }; | ||||
| 1412 | for (child_tid = tid + 1; child_tid <= tid + thr_bar->leaf_kids; | ||||
| 1413 | ++child_tid) { | ||||
| 1414 | KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " "T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid); } | ||||
| 1415 | "T#%d(%d:%d)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " "T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid); } | ||||
| 1416 | gtid, team->t.t_id, tid,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " "T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid); } | ||||
| 1417 | __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " "T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid); } | ||||
| 1418 | child_tid))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " "T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid); }; | ||||
| 1419 | (*reduce)(this_thr->th.th_local.reduce_data, | ||||
| 1420 | other_threads[child_tid]->th.th_local.reduce_data); | ||||
| 1421 | } | ||||
| 1422 | OMPT_REDUCTION_ENDif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction ) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction , ompt_scope_end, my_parallel_data, my_task_data, return_address ); }; | ||||
| 1423 | } | ||||
| 1424 | // clear leaf_state bits | ||||
| 1425 | KMP_TEST_THEN_AND64(&thr_bar->b_arrived, ~(thr_bar->leaf_state))__sync_fetch_and_and((volatile kmp_uint64 *)(&thr_bar-> b_arrived), (kmp_uint64)(~(thr_bar->leaf_state))); | ||||
| 1426 | } | ||||
| 1427 | // Next, wait for higher level children on each child's b_arrived flag | ||||
| 1428 | for (kmp_uint32 d = 1; d < thr_bar->my_level; | ||||
| 1429 | ++d) { // gather lowest level threads first, but skip 0 | ||||
| 1430 | kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1], | ||||
| 1431 | skip = thr_bar->skip_per_level[d]; | ||||
| 1432 | if (last > nproc) | ||||
| 1433 | last = nproc; | ||||
| 1434 | for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) { | ||||
| 1435 | kmp_info_t *child_thr = other_threads[child_tid]; | ||||
| 1436 | kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; | ||||
| 1437 | KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " "T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id , tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, &child_bar->b_arrived, new_state); } | ||||
| 1438 | "T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " "T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id , tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, &child_bar->b_arrived, new_state); } | ||||
| 1439 | "arrived(%p) == %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " "T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id , tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, &child_bar->b_arrived, new_state); } | ||||
| 1440 | gtid, team->t.t_id, tid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " "T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id , tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, &child_bar->b_arrived, new_state); } | ||||
| 1441 | __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " "T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id , tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, &child_bar->b_arrived, new_state); } | ||||
| 1442 | child_tid, &child_bar->b_arrived, new_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " "T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id , tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, &child_bar->b_arrived, new_state); }; | ||||
| 1443 | kmp_flag_64<> flag(&child_bar->b_arrived, new_state); | ||||
| 1444 | flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 1445 | if (reduce) { | ||||
| 1446 | KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " "T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid); } | ||||
| 1447 | "T#%d(%d:%d)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " "T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid); } | ||||
| 1448 | gtid, team->t.t_id, tid,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " "T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid); } | ||||
| 1449 | __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " "T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid); } | ||||
| 1450 | child_tid))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " "T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid); }; | ||||
| 1451 | (*reduce)(this_thr->th.th_local.reduce_data, | ||||
| 1452 | child_thr->th.th_local.reduce_data); | ||||
| 1453 | } | ||||
| 1454 | } | ||||
| 1455 | } | ||||
| 1456 | } else { // Blocktime is not infinite | ||||
| 1457 | for (kmp_uint32 d = 0; d < thr_bar->my_level; | ||||
| 1458 | ++d) { // Gather lowest level threads first | ||||
| 1459 | kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1], | ||||
| 1460 | skip = thr_bar->skip_per_level[d]; | ||||
| 1461 | if (last > nproc) | ||||
| 1462 | last = nproc; | ||||
| 1463 | for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) { | ||||
| 1464 | kmp_info_t *child_thr = other_threads[child_tid]; | ||||
| 1465 | kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; | ||||
| 1466 | KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " "T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id , tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, &child_bar->b_arrived, new_state); } | ||||
| 1467 | "T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " "T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id , tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, &child_bar->b_arrived, new_state); } | ||||
| 1468 | "arrived(%p) == %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " "T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id , tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, &child_bar->b_arrived, new_state); } | ||||
| 1469 | gtid, team->t.t_id, tid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " "T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id , tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, &child_bar->b_arrived, new_state); } | ||||
| 1470 | __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " "T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id , tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, &child_bar->b_arrived, new_state); } | ||||
| 1471 | child_tid, &child_bar->b_arrived, new_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " "T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id , tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, &child_bar->b_arrived, new_state); }; | ||||
| 1472 | kmp_flag_64<> flag(&child_bar->b_arrived, new_state); | ||||
| 1473 | flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 1474 | if (reduce) { | ||||
| 1475 | KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " "T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid); } | ||||
| 1476 | "T#%d(%d:%d)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " "T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid); } | ||||
| 1477 | gtid, team->t.t_id, tid,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " "T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid); } | ||||
| 1478 | __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " "T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid); } | ||||
| 1479 | child_tid))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " "T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid); }; | ||||
| 1480 | (*reduce)(this_thr->th.th_local.reduce_data, | ||||
| 1481 | child_thr->th.th_local.reduce_data); | ||||
| 1482 | } | ||||
| 1483 | } | ||||
| 1484 | } | ||||
| 1485 | } | ||||
| 1486 | } | ||||
| 1487 | // All subordinates are gathered; now release parent if not primary thread | ||||
| 1488 | |||||
| 1489 | if (!KMP_MASTER_TID(tid)(0 == (tid))) { // worker threads release parent in hierarchy | ||||
| 1490 | KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing" " T#%d(%d:%d) arrived(%p): %llu => %llu\n", gtid, team-> t.t_id, tid, __kmp_gtid_from_tid(thr_bar->parent_tid, team ), team->t.t_id, thr_bar->parent_tid, &thr_bar-> b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 << 2)); } | ||||
| 1491 | " T#%d(%d:%d) arrived(%p): %llu => %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing" " T#%d(%d:%d) arrived(%p): %llu => %llu\n", gtid, team-> t.t_id, tid, __kmp_gtid_from_tid(thr_bar->parent_tid, team ), team->t.t_id, thr_bar->parent_tid, &thr_bar-> b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 << 2)); } | ||||
| 1492 | gtid, team->t.t_id, tid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing" " T#%d(%d:%d) arrived(%p): %llu => %llu\n", gtid, team-> t.t_id, tid, __kmp_gtid_from_tid(thr_bar->parent_tid, team ), team->t.t_id, thr_bar->parent_tid, &thr_bar-> b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 << 2)); } | ||||
| 1493 | __kmp_gtid_from_tid(thr_bar->parent_tid, team), team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing" " T#%d(%d:%d) arrived(%p): %llu => %llu\n", gtid, team-> t.t_id, tid, __kmp_gtid_from_tid(thr_bar->parent_tid, team ), team->t.t_id, thr_bar->parent_tid, &thr_bar-> b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 << 2)); } | ||||
| 1494 | thr_bar->parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing" " T#%d(%d:%d) arrived(%p): %llu => %llu\n", gtid, team-> t.t_id, tid, __kmp_gtid_from_tid(thr_bar->parent_tid, team ), team->t.t_id, thr_bar->parent_tid, &thr_bar-> b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 << 2)); } | ||||
| 1495 | thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing" " T#%d(%d:%d) arrived(%p): %llu => %llu\n", gtid, team-> t.t_id, tid, __kmp_gtid_from_tid(thr_bar->parent_tid, team ), team->t.t_id, thr_bar->parent_tid, &thr_bar-> b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 << 2)); }; | ||||
| 1496 | /* Mark arrival to parent: After performing this write, a worker thread may | ||||
| 1497 | not assume that the team is valid any more - it could be deallocated by | ||||
| 1498 | the primary thread at any time. */ | ||||
| 1499 | if (thr_bar->my_level || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647) || | ||||
| 1500 | !thr_bar->use_oncore_barrier) { // Parent is waiting on my b_arrived | ||||
| 1501 | // flag; release it | ||||
| 1502 | kmp_flag_64<> flag(&thr_bar->b_arrived, | ||||
| 1503 | other_threads[thr_bar->parent_tid]); | ||||
| 1504 | flag.release(); | ||||
| 1505 | } else { | ||||
| 1506 | // Leaf does special release on "offset" bits of parent's b_arrived flag | ||||
| 1507 | thr_bar->b_arrived = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP(1 << 2); | ||||
| 1508 | kmp_flag_oncore flag(&thr_bar->parent_bar->b_arrived, | ||||
| 1509 | thr_bar->offset + 1); | ||||
| 1510 | flag.set_waiter(other_threads[thr_bar->parent_tid]); | ||||
| 1511 | flag.release(); | ||||
| 1512 | } | ||||
| 1513 | } else { // Primary thread needs to update the team's b_arrived value | ||||
| 1514 | team->t.t_bar[bt].b_arrived = new_state; | ||||
| 1515 | KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d " "arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team-> t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar [bt].b_arrived); } | ||||
| 1516 | "arrived(%p) = %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d " "arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team-> t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar [bt].b_arrived); } | ||||
| 1517 | gtid, team->t.t_id, tid, team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d " "arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team-> t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar [bt].b_arrived); } | ||||
| 1518 | &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d " "arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team-> t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar [bt].b_arrived); }; | ||||
| 1519 | } | ||||
| 1520 | // Is the team access below unsafe or just technically invalid? | ||||
| 1521 | KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); } | ||||
| 1522 | "barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); } | ||||
| 1523 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); }; | ||||
| 1524 | } | ||||
| 1525 | |||||
| 1526 | static void __kmp_hierarchical_barrier_release( | ||||
| 1527 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, | ||||
| 1528 | int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) { | ||||
| 1529 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_release)((void)0); | ||||
| 1530 | kmp_team_t *team; | ||||
| 1531 | kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; | ||||
| 1532 | kmp_uint32 nproc; | ||||
| 1533 | bool team_change = false; // indicates on-core barrier shouldn't be used | ||||
| 1534 | |||||
| 1535 | if (KMP_MASTER_TID(tid)(0 == (tid))) { | ||||
| 1536 | team = __kmp_threads[gtid]->th.th_team; | ||||
| 1537 | KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null", "openmp/runtime/src/kmp_barrier.cpp", 1537); }; | ||||
| 1538 | KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) primary "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) primary " "entered barrier type %d\n", gtid, team->t.t_id, tid, bt) ; } | ||||
| 1539 | "entered barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) primary " "entered barrier type %d\n", gtid, team->t.t_id, tid, bt) ; } | ||||
| 1540 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) primary " "entered barrier type %d\n", gtid, team->t.t_id, tid, bt) ; }; | ||||
| 1541 | } else { // Worker threads | ||||
| 1542 | // Wait for parent thread to release me | ||||
| 1543 | if (!thr_bar->use_oncore_barrier || | ||||
| 1544 | __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647) || thr_bar->my_level != 0 || | ||||
| 1545 | thr_bar->team == NULL__null) { | ||||
| 1546 | // Use traditional method of waiting on my own b_go flag | ||||
| 1547 | thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG1; | ||||
| 1548 | kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP(1 << 2)); | ||||
| 1549 | flag.wait(this_thr, TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 1550 | TCW_8(thr_bar->b_go,(thr_bar->b_go) = (0) | ||||
| 1551 | KMP_INIT_BARRIER_STATE)(thr_bar->b_go) = (0); // Reset my b_go flag for next time | ||||
| 1552 | } else { // Thread barrier data is initialized, this is a leaf, blocktime is | ||||
| 1553 | // infinite, not nested | ||||
| 1554 | // Wait on my "offset" bits on parent's b_go flag | ||||
| 1555 | thr_bar->wait_flag = KMP_BARRIER_PARENT_FLAG2; | ||||
| 1556 | kmp_flag_oncore flag(&thr_bar->parent_bar->b_go, KMP_BARRIER_STATE_BUMP(1 << 2), | ||||
| 1557 | thr_bar->offset + 1, bt, | ||||
| 1558 | this_thr USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 1559 | flag.wait(this_thr, TRUE(!0)); | ||||
| 1560 | if (thr_bar->wait_flag == | ||||
| 1561 | KMP_BARRIER_SWITCHING4) { // Thread was switched to own b_go | ||||
| 1562 | TCW_8(thr_bar->b_go,(thr_bar->b_go) = (0) | ||||
| 1563 | KMP_INIT_BARRIER_STATE)(thr_bar->b_go) = (0); // Reset my b_go flag for next time | ||||
| 1564 | } else { // Reset my bits on parent's b_go flag | ||||
| 1565 | (RCAST(volatile char *,reinterpret_cast<volatile char *>(&(thr_bar->parent_bar ->b_go)) | ||||
| 1566 | &(thr_bar->parent_bar->b_go))reinterpret_cast<volatile char *>(&(thr_bar->parent_bar ->b_go)))[thr_bar->offset + 1] = 0; | ||||
| 1567 | } | ||||
| 1568 | } | ||||
| 1569 | thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING0; | ||||
| 1570 | // Early exit for reaping threads releasing forkjoin barrier | ||||
| 1571 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) | ||||
| 1572 | return; | ||||
| 1573 | // The worker thread may now assume that the team is valid. | ||||
| 1574 | team = __kmp_threads[gtid]->th.th_team; | ||||
| 1575 | KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null", "openmp/runtime/src/kmp_barrier.cpp", 1575); }; | ||||
| 1576 | tid = __kmp_tid_from_gtid(gtid); | ||||
| 1577 | |||||
| 1578 | KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n" , gtid, team->t.t_id, tid, &thr_bar->b_go, 0); } | ||||
| 1579 | 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n" , gtid, team->t.t_id, tid, &thr_bar->b_go, 0); } | ||||
| 1580 | ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n" , gtid, team->t.t_id, tid, &thr_bar->b_go, 0); } | ||||
| 1581 | gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n" , gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }; | ||||
| 1582 | KMP_MB(); // Flush all pending memory write invalidates. | ||||
| 1583 | } | ||||
| 1584 | |||||
| 1585 | nproc = this_thr->th.th_team_nproc; | ||||
| 1586 | int level = team->t.t_level; | ||||
| 1587 | if (team->t.t_threads[0] | ||||
| 1588 | ->th.th_teams_microtask) { // are we inside the teams construct? | ||||
| 1589 | if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && | ||||
| 1590 | this_thr->th.th_teams_level == level) | ||||
| 1591 | ++level; // level was not increased in teams construct for team_of_workers | ||||
| 1592 | if (this_thr->th.th_teams_size.nteams > 1) | ||||
| 1593 | ++level; // level was not increased in teams construct for team_of_masters | ||||
| 1594 | } | ||||
| 1595 | if (level == 1) | ||||
| 1596 | thr_bar->use_oncore_barrier = 1; | ||||
| 1597 | else | ||||
| 1598 | thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested | ||||
| 1599 | |||||
| 1600 | // If the team size has increased, we still communicate with old leaves via | ||||
| 1601 | // oncore barrier. | ||||
| 1602 | unsigned short int old_leaf_kids = thr_bar->leaf_kids; | ||||
| 1603 | kmp_uint64 old_leaf_state = thr_bar->leaf_state; | ||||
| 1604 | team_change = __kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, | ||||
| 1605 | tid, team); | ||||
| 1606 | // But if the entire team changes, we won't use oncore barrier at all | ||||
| 1607 | if (team_change) | ||||
| 1608 | old_leaf_kids = 0; | ||||
| 1609 | |||||
| 1610 | #if KMP_BARRIER_ICV_PUSH1 | ||||
| 1611 | if (propagate_icvs) { | ||||
| 1612 | __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, | ||||
| 1613 | FALSE0); | ||||
| 1614 | if (KMP_MASTER_TID((0 == (tid)) | ||||
| 1615 | tid)(0 == (tid))) { // primary already has copy in final destination; copy | ||||
| 1616 | copy_icvs(&thr_bar->th_fixed_icvs, | ||||
| 1617 | &team->t.t_implicit_task_taskdata[tid].td_icvs); | ||||
| 1618 | } else if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME(2147483647) && | ||||
| 1619 | thr_bar->use_oncore_barrier) { // optimization for inf blocktime | ||||
| 1620 | if (!thr_bar->my_level) // I'm a leaf in the hierarchy (my_level==0) | ||||
| 1621 | // leaves (on-core children) pull parent's fixed ICVs directly to local | ||||
| 1622 | // ICV store | ||||
| 1623 | copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, | ||||
| 1624 | &thr_bar->parent_bar->th_fixed_icvs); | ||||
| 1625 | // non-leaves will get ICVs piggybacked with b_go via NGO store | ||||
| 1626 | } else { // blocktime is not infinite; pull ICVs from parent's fixed ICVs | ||||
| 1627 | if (thr_bar->my_level) // not a leaf; copy ICVs to my fixed ICVs child can | ||||
| 1628 | // access | ||||
| 1629 | copy_icvs(&thr_bar->th_fixed_icvs, &thr_bar->parent_bar->th_fixed_icvs); | ||||
| 1630 | else // leaves copy parent's fixed ICVs directly to local ICV store | ||||
| 1631 | copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, | ||||
| 1632 | &thr_bar->parent_bar->th_fixed_icvs); | ||||
| 1633 | } | ||||
| 1634 | } | ||||
| 1635 | #endif // KMP_BARRIER_ICV_PUSH | ||||
| 1636 | |||||
| 1637 | // Now, release my children | ||||
| 1638 | if (thr_bar->my_level) { // not a leaf | ||||
| 1639 | kmp_int32 child_tid; | ||||
| 1640 | kmp_uint32 last; | ||||
| 1641 | if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME(2147483647) && | ||||
| 1642 | thr_bar->use_oncore_barrier) { | ||||
| 1643 | if (KMP_MASTER_TID(tid)(0 == (tid))) { // do a flat release | ||||
| 1644 | // Set local b_go to bump children via NGO store of the cache line | ||||
| 1645 | // containing IVCs and b_go. | ||||
| 1646 | thr_bar->b_go = KMP_BARRIER_STATE_BUMP(1 << 2); | ||||
| 1647 | // Use ngo stores if available; b_go piggybacks in the last 8 bytes of | ||||
| 1648 | // the cache line | ||||
| 1649 | ngo_load(&thr_bar->th_fixed_icvs)((void)0); | ||||
| 1650 | // This loops over all the threads skipping only the leaf nodes in the | ||||
| 1651 | // hierarchy | ||||
| 1652 | for (child_tid = thr_bar->skip_per_level[1]; child_tid < (int)nproc; | ||||
| 1653 | child_tid += thr_bar->skip_per_level[1]) { | ||||
| 1654 | kmp_bstate_t *child_bar = | ||||
| 1655 | &team->t.t_threads[child_tid]->th.th_bar[bt].bb; | ||||
| 1656 | KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) " "releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team ->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team-> t.t_id, child_tid, &child_bar->b_go, child_bar->b_go , child_bar->b_go + (1 << 2)); } | ||||
| 1657 | "releasing T#%d(%d:%d)"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) " "releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team ->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team-> t.t_id, child_tid, &child_bar->b_go, child_bar->b_go , child_bar->b_go + (1 << 2)); } | ||||
| 1658 | " go(%p): %u => %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) " "releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team ->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team-> t.t_id, child_tid, &child_bar->b_go, child_bar->b_go , child_bar->b_go + (1 << 2)); } | ||||
| 1659 | gtid, team->t.t_id, tid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) " "releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team ->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team-> t.t_id, child_tid, &child_bar->b_go, child_bar->b_go , child_bar->b_go + (1 << 2)); } | ||||
| 1660 | __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) " "releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team ->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team-> t.t_id, child_tid, &child_bar->b_go, child_bar->b_go , child_bar->b_go + (1 << 2)); } | ||||
| 1661 | child_tid, &child_bar->b_go, child_bar->b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) " "releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team ->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team-> t.t_id, child_tid, &child_bar->b_go, child_bar->b_go , child_bar->b_go + (1 << 2)); } | ||||
| 1662 | child_bar->b_go + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) " "releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team ->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team-> t.t_id, child_tid, &child_bar->b_go, child_bar->b_go , child_bar->b_go + (1 << 2)); }; | ||||
| 1663 | // Use ngo store (if available) to both store ICVs and release child | ||||
| 1664 | // via child's b_go | ||||
| 1665 | ngo_store_go(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs)memcpy((&child_bar->th_fixed_icvs), (&thr_bar-> th_fixed_icvs), 64); | ||||
| 1666 | } | ||||
| 1667 | ngo_sync()((void)0); | ||||
| 1668 | } | ||||
| 1669 | TCW_8(thr_bar->b_go,(thr_bar->b_go) = (0) | ||||
| 1670 | KMP_INIT_BARRIER_STATE)(thr_bar->b_go) = (0); // Reset my b_go flag for next time | ||||
| 1671 | // Now, release leaf children | ||||
| 1672 | if (thr_bar->leaf_kids) { // if there are any | ||||
| 1673 | // We test team_change on the off-chance that the level 1 team changed. | ||||
| 1674 | if (team_change || | ||||
| 1675 | old_leaf_kids < thr_bar->leaf_kids) { // some old, some new | ||||
| 1676 | if (old_leaf_kids) { // release old leaf kids | ||||
| 1677 | thr_bar->b_go |= old_leaf_state; | ||||
| 1678 | } | ||||
| 1679 | // Release new leaf kids | ||||
| 1680 | last = tid + thr_bar->skip_per_level[1]; | ||||
| 1681 | if (last > nproc) | ||||
| 1682 | last = nproc; | ||||
| 1683 | for (child_tid = tid + 1 + old_leaf_kids; child_tid < (int)last; | ||||
| 1684 | ++child_tid) { // skip_per_level[0]=1 | ||||
| 1685 | kmp_info_t *child_thr = team->t.t_threads[child_tid]; | ||||
| 1686 | kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; | ||||
| 1687 | KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing" " T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid , &child_bar->b_go, child_bar->b_go, child_bar-> b_go + (1 << 2)); } | ||||
| 1688 | 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing" " T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid , &child_bar->b_go, child_bar->b_go, child_bar-> b_go + (1 << 2)); } | ||||
| 1689 | ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing" " T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid , &child_bar->b_go, child_bar->b_go, child_bar-> b_go + (1 << 2)); } | ||||
| 1690 | " T#%d(%d:%d) go(%p): %u => %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing" " T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid , &child_bar->b_go, child_bar->b_go, child_bar-> b_go + (1 << 2)); } | ||||
| 1691 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing" " T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid , &child_bar->b_go, child_bar->b_go, child_bar-> b_go + (1 << 2)); } | ||||
| 1692 | team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing" " T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid , &child_bar->b_go, child_bar->b_go, child_bar-> b_go + (1 << 2)); } | ||||
| 1693 | child_bar->b_go + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing" " T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid , &child_bar->b_go, child_bar->b_go, child_bar-> b_go + (1 << 2)); }; | ||||
| 1694 | // Release child using child's b_go flag | ||||
| 1695 | kmp_flag_64<> flag(&child_bar->b_go, child_thr); | ||||
| 1696 | flag.release(); | ||||
| 1697 | } | ||||
| 1698 | } else { // Release all children at once with leaf_state bits on my own | ||||
| 1699 | // b_go flag | ||||
| 1700 | thr_bar->b_go |= thr_bar->leaf_state; | ||||
| 1701 | } | ||||
| 1702 | } | ||||
| 1703 | } else { // Blocktime is not infinite; do a simple hierarchical release | ||||
| 1704 | for (int d = thr_bar->my_level - 1; d >= 0; | ||||
| 1705 | --d) { // Release highest level threads first | ||||
| 1706 | last = tid + thr_bar->skip_per_level[d + 1]; | ||||
| 1707 | kmp_uint32 skip = thr_bar->skip_per_level[d]; | ||||
| 1708 | if (last > nproc) | ||||
| 1709 | last = nproc; | ||||
| 1710 | for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) { | ||||
| 1711 | kmp_info_t *child_thr = team->t.t_threads[child_tid]; | ||||
| 1712 | kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; | ||||
| 1713 | KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) " "releasing T#%d(%d:%d) go(%p): %u => %u\n", gtid, team-> t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t .t_id, child_tid, &child_bar->b_go, child_bar->b_go , child_bar->b_go + (1 << 2)); } | ||||
| 1714 | "releasing T#%d(%d:%d) go(%p): %u => %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) " "releasing T#%d(%d:%d) go(%p): %u => %u\n", gtid, team-> t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t .t_id, child_tid, &child_bar->b_go, child_bar->b_go , child_bar->b_go + (1 << 2)); } | ||||
| 1715 | gtid, team->t.t_id, tid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) " "releasing T#%d(%d:%d) go(%p): %u => %u\n", gtid, team-> t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t .t_id, child_tid, &child_bar->b_go, child_bar->b_go , child_bar->b_go + (1 << 2)); } | ||||
| 1716 | __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) " "releasing T#%d(%d:%d) go(%p): %u => %u\n", gtid, team-> t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t .t_id, child_tid, &child_bar->b_go, child_bar->b_go , child_bar->b_go + (1 << 2)); } | ||||
| 1717 | child_tid, &child_bar->b_go, child_bar->b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) " "releasing T#%d(%d:%d) go(%p): %u => %u\n", gtid, team-> t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t .t_id, child_tid, &child_bar->b_go, child_bar->b_go , child_bar->b_go + (1 << 2)); } | ||||
| 1718 | child_bar->b_go + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) " "releasing T#%d(%d:%d) go(%p): %u => %u\n", gtid, team-> t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t .t_id, child_tid, &child_bar->b_go, child_bar->b_go , child_bar->b_go + (1 << 2)); }; | ||||
| 1719 | // Release child using child's b_go flag | ||||
| 1720 | kmp_flag_64<> flag(&child_bar->b_go, child_thr); | ||||
| 1721 | flag.release(); | ||||
| 1722 | } | ||||
| 1723 | } | ||||
| 1724 | } | ||||
| 1725 | #if KMP_BARRIER_ICV_PUSH1 | ||||
| 1726 | if (propagate_icvs && !KMP_MASTER_TID(tid)(0 == (tid))) | ||||
| 1727 | // non-leaves copy ICVs from fixed ICVs to local dest | ||||
| 1728 | copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, | ||||
| 1729 | &thr_bar->th_fixed_icvs); | ||||
| 1730 | #endif // KMP_BARRIER_ICV_PUSH | ||||
| 1731 | } | ||||
| 1732 | KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); } | ||||
| 1733 | "barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); } | ||||
| 1734 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); }; | ||||
| 1735 | } | ||||
| 1736 | |||||
| 1737 | // End of Barrier Algorithms | ||||
| 1738 | |||||
| 1739 | // type traits for cancellable value | ||||
| 1740 | // if cancellable is true, then is_cancellable is a normal boolean variable | ||||
| 1741 | // if cancellable is false, then is_cancellable is a compile time constant | ||||
| 1742 | template <bool cancellable> struct is_cancellable {}; | ||||
| 1743 | template <> struct is_cancellable<true> { | ||||
| 1744 | bool value; | ||||
| 1745 | is_cancellable() : value(false) {} | ||||
| 1746 | is_cancellable(bool b) : value(b) {} | ||||
| 1747 | is_cancellable &operator=(bool b) { | ||||
| 1748 | value = b; | ||||
| 1749 | return *this; | ||||
| 1750 | } | ||||
| 1751 | operator bool() const { return value; } | ||||
| 1752 | }; | ||||
| 1753 | template <> struct is_cancellable<false> { | ||||
| 1754 | is_cancellable &operator=(bool b) { return *this; } | ||||
| 1755 | constexpr operator bool() const { return false; } | ||||
| 1756 | }; | ||||
| 1757 | |||||
| 1758 | // Internal function to do a barrier. | ||||
| 1759 | /* If is_split is true, do a split barrier, otherwise, do a plain barrier | ||||
| 1760 | If reduce is non-NULL, do a split reduction barrier, otherwise, do a split | ||||
| 1761 | barrier | ||||
| 1762 | When cancellable = false, | ||||
| 1763 | Returns 0 if primary thread, 1 if worker thread. | ||||
| 1764 | When cancellable = true | ||||
| 1765 | Returns 0 if not cancelled, 1 if cancelled. */ | ||||
| 1766 | template <bool cancellable = false> | ||||
| 1767 | static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split, | ||||
| 1768 | size_t reduce_size, void *reduce_data, | ||||
| 1769 | void (*reduce)(void *, void *)) { | ||||
| 1770 | KMP_TIME_PARTITIONED_BLOCK(OMP_plain_barrier)((void)0); | ||||
| 1771 | KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER)((void)0); | ||||
| 1772 | int tid = __kmp_tid_from_gtid(gtid); | ||||
| 1773 | kmp_info_t *this_thr = __kmp_threads[gtid]; | ||||
| 1774 | kmp_team_t *team = this_thr->th.th_team; | ||||
| 1775 | int status = 0; | ||||
| 1776 | is_cancellable<cancellable> cancelled; | ||||
| 1777 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | ||||
| 1778 | ompt_data_t *my_task_data; | ||||
| 1779 | ompt_data_t *my_parallel_data; | ||||
| 1780 | void *return_address; | ||||
| 1781 | ompt_sync_region_t barrier_kind; | ||||
| 1782 | #endif | ||||
| 1783 | |||||
| 1784 | KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) has arrived\n", gtid,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) has arrived\n" , gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid (gtid)); } | ||||
| 1785 | __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) has arrived\n" , gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid (gtid)); }; | ||||
| 1786 | |||||
| 1787 | #if OMPT_SUPPORT1 | ||||
| 1788 | if (ompt_enabled.enabled) { | ||||
| 1789 | #if OMPT_OPTIONAL1 | ||||
| 1790 | my_task_data = OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data )); | ||||
| 1791 | my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr)(&(this_thr->th.th_team->t.ompt_team_info.parallel_data )); | ||||
| 1792 | return_address = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid); | ||||
| 1793 | barrier_kind = __ompt_get_barrier_kind(bt, this_thr); | ||||
| 1794 | if (ompt_enabled.ompt_callback_sync_region) { | ||||
| 1795 | ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback( | ||||
| 1796 | barrier_kind, ompt_scope_begin, my_parallel_data, my_task_data, | ||||
| 1797 | return_address); | ||||
| 1798 | } | ||||
| 1799 | if (ompt_enabled.ompt_callback_sync_region_wait) { | ||||
| 1800 | ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback( | ||||
| 1801 | barrier_kind, ompt_scope_begin, my_parallel_data, my_task_data, | ||||
| 1802 | return_address); | ||||
| 1803 | } | ||||
| 1804 | #endif | ||||
| 1805 | // It is OK to report the barrier state after the barrier begin callback. | ||||
| 1806 | // According to the OMPT specification, a compliant implementation may | ||||
| 1807 | // even delay reporting this state until the barrier begins to wait. | ||||
| 1808 | this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier; | ||||
| 1809 | } | ||||
| 1810 | #endif | ||||
| 1811 | |||||
| 1812 | if (!team->t.t_serialized) { | ||||
| 1813 | #if USE_ITT_BUILD1 | ||||
| 1814 | // This value will be used in itt notify events below. | ||||
| 1815 | void *itt_sync_obj = NULL__null; | ||||
| 1816 | #if USE_ITT_NOTIFY1 | ||||
| 1817 | if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) | ||||
| 1818 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1); | ||||
| 1819 | #endif | ||||
| 1820 | #endif /* USE_ITT_BUILD */ | ||||
| 1821 | if (__kmp_tasking_mode == tskm_extra_barrier) { | ||||
| 1822 | __kmp_tasking_barrier(team, this_thr, gtid); | ||||
| 1823 | KA_TRACE(15,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n" , gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid (gtid)); } | ||||
| 1824 | ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n", gtid,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n" , gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid (gtid)); } | ||||
| 1825 | __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n" , gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid (gtid)); }; | ||||
| 1826 | } | ||||
| 1827 | |||||
| 1828 | /* Copy the blocktime info to the thread, where __kmp_wait_template() can | ||||
| 1829 | access it when the team struct is not guaranteed to exist. */ | ||||
| 1830 | // See note about the corresponding code in __kmp_join_barrier() being | ||||
| 1831 | // performance-critical. | ||||
| 1832 | if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) { | ||||
| 1833 | #if KMP_USE_MONITOR | ||||
| 1834 | this_thr->th.th_team_bt_intervals = | ||||
| 1835 | team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals; | ||||
| 1836 | this_thr->th.th_team_bt_set = | ||||
| 1837 | team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set; | ||||
| 1838 | #else | ||||
| 1839 | this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid)((((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs .bt_set) ? ((team)->t.t_threads[(tid)]->th.th_current_task ->td_icvs.blocktime) : __kmp_dflt_blocktime) * __kmp_ticks_per_msec ); | ||||
| 1840 | #endif | ||||
| 1841 | } | ||||
| 1842 | |||||
| 1843 | #if USE_ITT_BUILD1 | ||||
| 1844 | if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) | ||||
| 1845 | __kmp_itt_barrier_starting(gtid, itt_sync_obj); | ||||
| 1846 | #endif /* USE_ITT_BUILD */ | ||||
| 1847 | #if USE_DEBUGGER0 | ||||
| 1848 | // Let the debugger know: the thread arrived to the barrier and waiting. | ||||
| 1849 | if (KMP_MASTER_TID(tid)(0 == (tid))) { // Primary thread counter stored in team struct | ||||
| 1850 | team->t.t_bar[bt].b_master_arrived += 1; | ||||
| 1851 | } else { | ||||
| 1852 | this_thr->th.th_bar[bt].bb.b_worker_arrived += 1; | ||||
| 1853 | } // if | ||||
| 1854 | #endif /* USE_DEBUGGER */ | ||||
| 1855 | if (reduce != NULL__null) { | ||||
| 1856 | // KMP_DEBUG_ASSERT( is_split == TRUE ); // #C69956 | ||||
| 1857 | this_thr->th.th_local.reduce_data = reduce_data; | ||||
| 1858 | } | ||||
| 1859 | |||||
| 1860 | if (KMP_MASTER_TID(tid)(0 == (tid)) && __kmp_tasking_mode != tskm_immediate_exec) | ||||
| 1861 | // use 0 to only setup the current team if nthreads > 1 | ||||
| 1862 | __kmp_task_team_setup(this_thr, team, 0); | ||||
| 1863 | |||||
| 1864 | if (cancellable) { | ||||
| 1865 | cancelled = __kmp_linear_barrier_gather_cancellable( | ||||
| 1866 | bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 1867 | } else { | ||||
| 1868 | switch (__kmp_barrier_gather_pattern[bt]) { | ||||
| 1869 | case bp_dist_bar: { | ||||
| 1870 | __kmp_dist_barrier_gather(bt, this_thr, gtid, tid, | ||||
| 1871 | reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 1872 | break; | ||||
| 1873 | } | ||||
| 1874 | case bp_hyper_bar: { | ||||
| 1875 | // don't set branch bits to 0; use linear | ||||
| 1876 | KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt])if (!(__kmp_barrier_gather_branch_bits[bt])) { __kmp_debug_assert ("__kmp_barrier_gather_branch_bits[bt]", "openmp/runtime/src/kmp_barrier.cpp" , 1876); }; | ||||
| 1877 | __kmp_hyper_barrier_gather(bt, this_thr, gtid, tid, | ||||
| 1878 | reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 1879 | break; | ||||
| 1880 | } | ||||
| 1881 | case bp_hierarchical_bar: { | ||||
| 1882 | __kmp_hierarchical_barrier_gather( | ||||
| 1883 | bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 1884 | break; | ||||
| 1885 | } | ||||
| 1886 | case bp_tree_bar: { | ||||
| 1887 | // don't set branch bits to 0; use linear | ||||
| 1888 | KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt])if (!(__kmp_barrier_gather_branch_bits[bt])) { __kmp_debug_assert ("__kmp_barrier_gather_branch_bits[bt]", "openmp/runtime/src/kmp_barrier.cpp" , 1888); }; | ||||
| 1889 | __kmp_tree_barrier_gather(bt, this_thr, gtid, tid, | ||||
| 1890 | reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 1891 | break; | ||||
| 1892 | } | ||||
| 1893 | default: { | ||||
| 1894 | __kmp_linear_barrier_gather(bt, this_thr, gtid, tid, | ||||
| 1895 | reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 1896 | } | ||||
| 1897 | } | ||||
| 1898 | } | ||||
| 1899 | |||||
| 1900 | KMP_MB(); | ||||
| 1901 | |||||
| 1902 | if (KMP_MASTER_TID(tid)(0 == (tid))) { | ||||
| 1903 | status = 0; | ||||
| 1904 | if (__kmp_tasking_mode != tskm_immediate_exec && !cancelled) { | ||||
| 1905 | __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 1906 | } | ||||
| 1907 | #if USE_DEBUGGER0 | ||||
| 1908 | // Let the debugger know: All threads are arrived and starting leaving the | ||||
| 1909 | // barrier. | ||||
| 1910 | team->t.t_bar[bt].b_team_arrived += 1; | ||||
| 1911 | #endif | ||||
| 1912 | |||||
| 1913 | if (__kmp_omp_cancellation) { | ||||
| 1914 | kmp_int32 cancel_request = KMP_ATOMIC_LD_RLX(&team->t.t_cancel_request)(&team->t.t_cancel_request)->load(std::memory_order_relaxed ); | ||||
| 1915 | // Reset cancellation flag for worksharing constructs | ||||
| 1916 | if (cancel_request == cancel_loop || | ||||
| 1917 | cancel_request == cancel_sections) { | ||||
| 1918 | KMP_ATOMIC_ST_RLX(&team->t.t_cancel_request, cancel_noreq)(&team->t.t_cancel_request)->store(cancel_noreq, std ::memory_order_relaxed); | ||||
| 1919 | } | ||||
| 1920 | } | ||||
| 1921 | #if USE_ITT_BUILD1 | ||||
| 1922 | /* TODO: In case of split reduction barrier, primary thread may send | ||||
| 1923 | acquired event early, before the final summation into the shared | ||||
| 1924 | variable is done (final summation can be a long operation for array | ||||
| 1925 | reductions). */ | ||||
| 1926 | if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) | ||||
| 1927 | __kmp_itt_barrier_middle(gtid, itt_sync_obj); | ||||
| 1928 | #endif /* USE_ITT_BUILD */ | ||||
| 1929 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | ||||
| 1930 | // Barrier - report frame end (only if active_level == 1) | ||||
| 1931 | if ((__itt_frame_submit_v3_ptr__kmp_itt_frame_submit_v3_ptr__3_0 || KMP_ITT_DEBUG0) && | ||||
| 1932 | __kmp_forkjoin_frames_mode && | ||||
| 1933 | (this_thr->th.th_teams_microtask == NULL__null || // either not in teams | ||||
| 1934 | this_thr->th.th_teams_size.nteams == 1) && // or inside single team | ||||
| 1935 | team->t.t_active_level == 1) { | ||||
| 1936 | ident_t *loc = __kmp_threads[gtid]->th.th_ident; | ||||
| 1937 | kmp_uint64 cur_time = __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0(); | ||||
| 1938 | kmp_info_t **other_threads = team->t.t_threads; | ||||
| 1939 | int nproc = this_thr->th.th_team_nproc; | ||||
| 1940 | int i; | ||||
| 1941 | switch (__kmp_forkjoin_frames_mode) { | ||||
| 1942 | case 1: | ||||
| 1943 | __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, | ||||
| 1944 | loc, nproc); | ||||
| 1945 | this_thr->th.th_frame_time = cur_time; | ||||
| 1946 | break; | ||||
| 1947 | case 2: // AC 2015-01-19: currently does not work for hierarchical (to | ||||
| 1948 | // be fixed) | ||||
| 1949 | __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, | ||||
| 1950 | 1, loc, nproc); | ||||
| 1951 | break; | ||||
| 1952 | case 3: | ||||
| 1953 | if (__itt_metadata_add_ptr__kmp_itt_metadata_add_ptr__3_0) { | ||||
| 1954 | // Initialize with primary thread's wait time | ||||
| 1955 | kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time; | ||||
| 1956 | // Set arrive time to zero to be able to check it in | ||||
| 1957 | // __kmp_invoke_task(); the same is done inside the loop below | ||||
| 1958 | this_thr->th.th_bar_arrive_time = 0; | ||||
| 1959 | for (i = 1; i < nproc; ++i) { | ||||
| 1960 | delta += (cur_time - other_threads[i]->th.th_bar_arrive_time); | ||||
| 1961 | other_threads[i]->th.th_bar_arrive_time = 0; | ||||
| 1962 | } | ||||
| 1963 | __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time, | ||||
| 1964 | cur_time, delta, | ||||
| 1965 | (kmp_uint64)(reduce != NULL__null)); | ||||
| 1966 | } | ||||
| 1967 | __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, | ||||
| 1968 | loc, nproc); | ||||
| 1969 | this_thr->th.th_frame_time = cur_time; | ||||
| 1970 | break; | ||||
| 1971 | } | ||||
| 1972 | } | ||||
| 1973 | #endif /* USE_ITT_BUILD */ | ||||
| 1974 | } else { | ||||
| 1975 | status = 1; | ||||
| 1976 | #if USE_ITT_BUILD1 | ||||
| 1977 | if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) | ||||
| 1978 | __kmp_itt_barrier_middle(gtid, itt_sync_obj); | ||||
| 1979 | #endif /* USE_ITT_BUILD */ | ||||
| 1980 | } | ||||
| 1981 | if ((status == 1 || !is_split) && !cancelled) { | ||||
| 1982 | if (cancellable) { | ||||
| 1983 | cancelled = __kmp_linear_barrier_release_cancellable( | ||||
| 1984 | bt, this_thr, gtid, tid, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 1985 | } else { | ||||
| 1986 | switch (__kmp_barrier_release_pattern[bt]) { | ||||
| 1987 | case bp_dist_bar: { | ||||
| 1988 | KMP_ASSERT(__kmp_barrier_release_branch_bits[bt])if (!(__kmp_barrier_release_branch_bits[bt])) { __kmp_debug_assert ("__kmp_barrier_release_branch_bits[bt]", "openmp/runtime/src/kmp_barrier.cpp" , 1988); }; | ||||
| 1989 | __kmp_dist_barrier_release(bt, this_thr, gtid, tid, | ||||
| 1990 | FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 1991 | break; | ||||
| 1992 | } | ||||
| 1993 | case bp_hyper_bar: { | ||||
| 1994 | KMP_ASSERT(__kmp_barrier_release_branch_bits[bt])if (!(__kmp_barrier_release_branch_bits[bt])) { __kmp_debug_assert ("__kmp_barrier_release_branch_bits[bt]", "openmp/runtime/src/kmp_barrier.cpp" , 1994); }; | ||||
| 1995 | __kmp_hyper_barrier_release(bt, this_thr, gtid, tid, | ||||
| 1996 | FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 1997 | break; | ||||
| 1998 | } | ||||
| 1999 | case bp_hierarchical_bar: { | ||||
| 2000 | __kmp_hierarchical_barrier_release( | ||||
| 2001 | bt, this_thr, gtid, tid, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 2002 | break; | ||||
| 2003 | } | ||||
| 2004 | case bp_tree_bar: { | ||||
| 2005 | KMP_ASSERT(__kmp_barrier_release_branch_bits[bt])if (!(__kmp_barrier_release_branch_bits[bt])) { __kmp_debug_assert ("__kmp_barrier_release_branch_bits[bt]", "openmp/runtime/src/kmp_barrier.cpp" , 2005); }; | ||||
| 2006 | __kmp_tree_barrier_release(bt, this_thr, gtid, tid, | ||||
| 2007 | FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 2008 | break; | ||||
| 2009 | } | ||||
| 2010 | default: { | ||||
| 2011 | __kmp_linear_barrier_release(bt, this_thr, gtid, tid, | ||||
| 2012 | FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 2013 | } | ||||
| 2014 | } | ||||
| 2015 | } | ||||
| 2016 | if (__kmp_tasking_mode != tskm_immediate_exec && !cancelled) { | ||||
| 2017 | __kmp_task_team_sync(this_thr, team); | ||||
| 2018 | } | ||||
| 2019 | } | ||||
| 2020 | |||||
| 2021 | #if USE_ITT_BUILD1 | ||||
| 2022 | /* GEH: TODO: Move this under if-condition above and also include in | ||||
| 2023 | __kmp_end_split_barrier(). This will more accurately represent the actual | ||||
| 2024 | release time of the threads for split barriers. */ | ||||
| 2025 | if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) | ||||
| 2026 | __kmp_itt_barrier_finished(gtid, itt_sync_obj); | ||||
| 2027 | #endif /* USE_ITT_BUILD */ | ||||
| 2028 | } else { // Team is serialized. | ||||
| 2029 | status = 0; | ||||
| 2030 | if (__kmp_tasking_mode != tskm_immediate_exec) { | ||||
| 2031 | if (this_thr->th.th_task_team != NULL__null) { | ||||
| 2032 | #if USE_ITT_NOTIFY1 | ||||
| 2033 | void *itt_sync_obj = NULL__null; | ||||
| 2034 | if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) { | ||||
| 2035 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1); | ||||
| 2036 | __kmp_itt_barrier_starting(gtid, itt_sync_obj); | ||||
| 2037 | } | ||||
| 2038 | #endif | ||||
| 2039 | |||||
| 2040 | KMP_DEBUG_ASSERT(if (!(this_thr->th.th_task_team->tt.tt_found_proxy_tasks == (!0) || this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered == (!0))) { __kmp_debug_assert("this_thr->th.th_task_team->tt.tt_found_proxy_tasks == (!0) || this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered == (!0)" , "openmp/runtime/src/kmp_barrier.cpp", 2043); } | ||||
| 2041 | this_thr->th.th_task_team->tt.tt_found_proxy_tasks == TRUE ||if (!(this_thr->th.th_task_team->tt.tt_found_proxy_tasks == (!0) || this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered == (!0))) { __kmp_debug_assert("this_thr->th.th_task_team->tt.tt_found_proxy_tasks == (!0) || this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered == (!0)" , "openmp/runtime/src/kmp_barrier.cpp", 2043); } | ||||
| 2042 | this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered ==if (!(this_thr->th.th_task_team->tt.tt_found_proxy_tasks == (!0) || this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered == (!0))) { __kmp_debug_assert("this_thr->th.th_task_team->tt.tt_found_proxy_tasks == (!0) || this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered == (!0)" , "openmp/runtime/src/kmp_barrier.cpp", 2043); } | ||||
| 2043 | TRUE)if (!(this_thr->th.th_task_team->tt.tt_found_proxy_tasks == (!0) || this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered == (!0))) { __kmp_debug_assert("this_thr->th.th_task_team->tt.tt_found_proxy_tasks == (!0) || this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered == (!0)" , "openmp/runtime/src/kmp_barrier.cpp", 2043); }; | ||||
| 2044 | __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 2045 | __kmp_task_team_setup(this_thr, team, 0); | ||||
| 2046 | |||||
| 2047 | #if USE_ITT_BUILD1 | ||||
| 2048 | if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) | ||||
| 2049 | __kmp_itt_barrier_finished(gtid, itt_sync_obj); | ||||
| 2050 | #endif /* USE_ITT_BUILD */ | ||||
| 2051 | } | ||||
| 2052 | } | ||||
| 2053 | } | ||||
| 2054 | KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n",if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n" , gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid (gtid), status); } | ||||
| 2055 | gtid, __kmp_team_from_gtid(gtid)->t.t_id,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n" , gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid (gtid), status); } | ||||
| 2056 | __kmp_tid_from_gtid(gtid), status))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n" , gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid (gtid), status); }; | ||||
| 2057 | |||||
| 2058 | #if OMPT_SUPPORT1 | ||||
| 2059 | if (ompt_enabled.enabled) { | ||||
| 2060 | #if OMPT_OPTIONAL1 | ||||
| 2061 | if (ompt_enabled.ompt_callback_sync_region_wait) { | ||||
| 2062 | ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback( | ||||
| 2063 | barrier_kind, ompt_scope_end, my_parallel_data, my_task_data, | ||||
| 2064 | return_address); | ||||
| 2065 | } | ||||
| 2066 | if (ompt_enabled.ompt_callback_sync_region) { | ||||
| 2067 | ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback( | ||||
| 2068 | barrier_kind, ompt_scope_end, my_parallel_data, my_task_data, | ||||
| 2069 | return_address); | ||||
| 2070 | } | ||||
| 2071 | #endif | ||||
| 2072 | this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; | ||||
| 2073 | } | ||||
| 2074 | #endif | ||||
| 2075 | |||||
| 2076 | if (cancellable) | ||||
| 2077 | return (int)cancelled; | ||||
| 2078 | return status; | ||||
| 2079 | } | ||||
| 2080 | |||||
| 2081 | // Returns 0 if primary thread, 1 if worker thread. | ||||
| 2082 | int __kmp_barrier(enum barrier_type bt, int gtid, int is_split, | ||||
| 2083 | size_t reduce_size, void *reduce_data, | ||||
| 2084 | void (*reduce)(void *, void *)) { | ||||
| 2085 | return __kmp_barrier_template<>(bt, gtid, is_split, reduce_size, reduce_data, | ||||
| 2086 | reduce); | ||||
| 2087 | } | ||||
| 2088 | |||||
| 2089 | #if defined(KMP_GOMP_COMPAT) | ||||
| 2090 | // Returns 1 if cancelled, 0 otherwise | ||||
| 2091 | int __kmp_barrier_gomp_cancel(int gtid) { | ||||
| 2092 | if (__kmp_omp_cancellation) { | ||||
| 2093 | int cancelled = __kmp_barrier_template<true>(bs_plain_barrier, gtid, FALSE0, | ||||
| 2094 | 0, NULL__null, NULL__null); | ||||
| 2095 | if (cancelled) { | ||||
| 2096 | int tid = __kmp_tid_from_gtid(gtid); | ||||
| 2097 | kmp_info_t *this_thr = __kmp_threads[gtid]; | ||||
| 2098 | if (KMP_MASTER_TID(tid)(0 == (tid))) { | ||||
| 2099 | // Primary thread does not need to revert anything | ||||
| 2100 | } else { | ||||
| 2101 | // Workers need to revert their private b_arrived flag | ||||
| 2102 | this_thr->th.th_bar[bs_plain_barrier].bb.b_arrived -= | ||||
| 2103 | KMP_BARRIER_STATE_BUMP(1 << 2); | ||||
| 2104 | } | ||||
| 2105 | } | ||||
| 2106 | return cancelled; | ||||
| 2107 | } | ||||
| 2108 | __kmp_barrier(bs_plain_barrier, gtid, FALSE0, 0, NULL__null, NULL__null); | ||||
| 2109 | return FALSE0; | ||||
| 2110 | } | ||||
| 2111 | #endif | ||||
| 2112 | |||||
| 2113 | void __kmp_end_split_barrier(enum barrier_type bt, int gtid) { | ||||
| 2114 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_end_split_barrier)((void)0); | ||||
| 2115 | KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER)((void)0); | ||||
| 2116 | KMP_DEBUG_ASSERT(bt < bs_last_barrier)if (!(bt < bs_last_barrier)) { __kmp_debug_assert("bt < bs_last_barrier" , "openmp/runtime/src/kmp_barrier.cpp", 2116); }; | ||||
| 2117 | int tid = __kmp_tid_from_gtid(gtid); | ||||
| 2118 | kmp_info_t *this_thr = __kmp_threads[gtid]; | ||||
| 2119 | kmp_team_t *team = this_thr->th.th_team; | ||||
| 2120 | |||||
| 2121 | if (!team->t.t_serialized) { | ||||
| 2122 | if (KMP_MASTER_GTID(gtid)(0 == __kmp_tid_from_gtid((gtid)))) { | ||||
| 2123 | switch (__kmp_barrier_release_pattern[bt]) { | ||||
| 2124 | case bp_dist_bar: { | ||||
| 2125 | __kmp_dist_barrier_release(bt, this_thr, gtid, tid, | ||||
| 2126 | FALSE0 USE_ITT_BUILD_ARG(NULL), __null); | ||||
| 2127 | break; | ||||
| 2128 | } | ||||
| 2129 | case bp_hyper_bar: { | ||||
| 2130 | KMP_ASSERT(__kmp_barrier_release_branch_bits[bt])if (!(__kmp_barrier_release_branch_bits[bt])) { __kmp_debug_assert ("__kmp_barrier_release_branch_bits[bt]", "openmp/runtime/src/kmp_barrier.cpp" , 2130); }; | ||||
| 2131 | __kmp_hyper_barrier_release(bt, this_thr, gtid, tid, | ||||
| 2132 | FALSE0 USE_ITT_BUILD_ARG(NULL), __null); | ||||
| 2133 | break; | ||||
| 2134 | } | ||||
| 2135 | case bp_hierarchical_bar: { | ||||
| 2136 | __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid, | ||||
| 2137 | FALSE0 USE_ITT_BUILD_ARG(NULL), __null); | ||||
| 2138 | break; | ||||
| 2139 | } | ||||
| 2140 | case bp_tree_bar: { | ||||
| 2141 | KMP_ASSERT(__kmp_barrier_release_branch_bits[bt])if (!(__kmp_barrier_release_branch_bits[bt])) { __kmp_debug_assert ("__kmp_barrier_release_branch_bits[bt]", "openmp/runtime/src/kmp_barrier.cpp" , 2141); }; | ||||
| 2142 | __kmp_tree_barrier_release(bt, this_thr, gtid, tid, | ||||
| 2143 | FALSE0 USE_ITT_BUILD_ARG(NULL), __null); | ||||
| 2144 | break; | ||||
| 2145 | } | ||||
| 2146 | default: { | ||||
| 2147 | __kmp_linear_barrier_release(bt, this_thr, gtid, tid, | ||||
| 2148 | FALSE0 USE_ITT_BUILD_ARG(NULL), __null); | ||||
| 2149 | } | ||||
| 2150 | } | ||||
| 2151 | if (__kmp_tasking_mode != tskm_immediate_exec) { | ||||
| 2152 | __kmp_task_team_sync(this_thr, team); | ||||
| 2153 | } // if | ||||
| 2154 | } | ||||
| 2155 | } | ||||
| 2156 | } | ||||
| 2157 | |||||
| 2158 | void __kmp_join_barrier(int gtid) { | ||||
| 2159 | KMP_TIME_PARTITIONED_BLOCK(OMP_join_barrier)((void)0); | ||||
| 2160 | KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER)((void)0); | ||||
| 2161 | |||||
| 2162 | KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid])if (!(__kmp_threads && __kmp_threads[gtid])) { __kmp_debug_assert ("__kmp_threads && __kmp_threads[gtid]", "openmp/runtime/src/kmp_barrier.cpp" , 2162); }; | ||||
| 2163 | |||||
| 2164 | kmp_info_t *this_thr = __kmp_threads[gtid]; | ||||
| 2165 | kmp_team_t *team; | ||||
| 2166 | int tid; | ||||
| 2167 | #ifdef KMP_DEBUG1 | ||||
| 2168 | int team_id; | ||||
| 2169 | #endif /* KMP_DEBUG */ | ||||
| 2170 | #if USE_ITT_BUILD1 | ||||
| 2171 | void *itt_sync_obj = NULL__null; | ||||
| 2172 | #if USE_ITT_NOTIFY1 | ||||
| 2173 | if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) // Don't call routine without need | ||||
| 2174 | // Get object created at fork_barrier | ||||
| 2175 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); | ||||
| 2176 | #endif | ||||
| 2177 | #endif /* USE_ITT_BUILD */ | ||||
| 2178 | #if ((USE_ITT_BUILD1 && USE_ITT_NOTIFY1) || defined KMP_DEBUG1) | ||||
| 2179 | int nproc = this_thr->th.th_team_nproc; | ||||
| 2180 | #endif | ||||
| 2181 | KMP_MB(); | ||||
| 2182 | |||||
| 2183 | // Get current info | ||||
| 2184 | team = this_thr->th.th_team; | ||||
| 2185 | KMP_DEBUG_ASSERT(nproc == team->t.t_nproc)if (!(nproc == team->t.t_nproc)) { __kmp_debug_assert("nproc == team->t.t_nproc" , "openmp/runtime/src/kmp_barrier.cpp", 2185); }; | ||||
| 2186 | tid = __kmp_tid_from_gtid(gtid); | ||||
| 2187 | #ifdef KMP_DEBUG1 | ||||
| 2188 | team_id = team->t.t_id; | ||||
| 2189 | kmp_info_t *master_thread = this_thr->th.th_team_master; | ||||
| 2190 | if (master_thread != team->t.t_threads[0]) { | ||||
| 2191 | __kmp_print_structure(); | ||||
| 2192 | } | ||||
| 2193 | #endif /* KMP_DEBUG */ | ||||
| 2194 | KMP_DEBUG_ASSERT(master_thread == team->t.t_threads[0])if (!(master_thread == team->t.t_threads[0])) { __kmp_debug_assert ("master_thread == team->t.t_threads[0]", "openmp/runtime/src/kmp_barrier.cpp" , 2194); }; | ||||
| 2195 | KMP_MB(); | ||||
| 2196 | |||||
| 2197 | // Verify state | ||||
| 2198 | KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_team))if (!(((void *)(this_thr->th.th_team)))) { __kmp_debug_assert ("((void *)(this_thr->th.th_team))", "openmp/runtime/src/kmp_barrier.cpp" , 2198); }; | ||||
| 2199 | KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_root))if (!(((void *)(this_thr->th.th_root)))) { __kmp_debug_assert ("((void *)(this_thr->th.th_root))", "openmp/runtime/src/kmp_barrier.cpp" , 2199); }; | ||||
| 2200 | KMP_DEBUG_ASSERT(this_thr == team->t.t_threads[tid])if (!(this_thr == team->t.t_threads[tid])) { __kmp_debug_assert ("this_thr == team->t.t_threads[tid]", "openmp/runtime/src/kmp_barrier.cpp" , 2200); }; | ||||
| 2201 | KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n" , gtid, team_id, tid); } | ||||
| 2202 | gtid, team_id, tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n" , gtid, team_id, tid); }; | ||||
| 2203 | |||||
| 2204 | #if OMPT_SUPPORT1 | ||||
| 2205 | if (ompt_enabled.enabled) { | ||||
| 2206 | #if OMPT_OPTIONAL1 | ||||
| 2207 | ompt_data_t *my_task_data; | ||||
| 2208 | ompt_data_t *my_parallel_data; | ||||
| 2209 | void *codeptr = NULL__null; | ||||
| 2210 | int ds_tid = this_thr->th.th_info.ds.ds_tid; | ||||
| 2211 | if (KMP_MASTER_TID(ds_tid)(0 == (ds_tid)) && | ||||
| 2212 | (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback || | ||||
| 2213 | ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback)) | ||||
| 2214 | codeptr = team->t.ompt_team_info.master_return_address; | ||||
| 2215 | my_task_data = OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data )); | ||||
| 2216 | my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr)(&(this_thr->th.th_team->t.ompt_team_info.parallel_data )); | ||||
| 2217 | if (ompt_enabled.ompt_callback_sync_region) { | ||||
| 2218 | ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback( | ||||
| 2219 | ompt_sync_region_barrier_implicit, ompt_scope_begin, my_parallel_data, | ||||
| 2220 | my_task_data, codeptr); | ||||
| 2221 | } | ||||
| 2222 | if (ompt_enabled.ompt_callback_sync_region_wait) { | ||||
| 2223 | ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback( | ||||
| 2224 | ompt_sync_region_barrier_implicit, ompt_scope_begin, my_parallel_data, | ||||
| 2225 | my_task_data, codeptr); | ||||
| 2226 | } | ||||
| 2227 | if (!KMP_MASTER_TID(ds_tid)(0 == (ds_tid))) | ||||
| 2228 | this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data )); | ||||
| 2229 | #endif | ||||
| 2230 | this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier_implicit; | ||||
| 2231 | } | ||||
| 2232 | #endif | ||||
| 2233 | |||||
| 2234 | if (__kmp_tasking_mode == tskm_extra_barrier) { | ||||
| 2235 | __kmp_tasking_barrier(team, this_thr, gtid); | ||||
| 2236 | KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) past tasking barrier\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) past tasking barrier\n" , gtid, team_id, tid); } | ||||
| 2237 | gtid, team_id, tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) past tasking barrier\n" , gtid, team_id, tid); }; | ||||
| 2238 | } | ||||
| 2239 | #ifdef KMP_DEBUG1 | ||||
| 2240 | if (__kmp_tasking_mode != tskm_immediate_exec) { | ||||
| 2241 | KA_TRACE(20, ("__kmp_join_barrier: T#%d, old team = %d, old task_team = "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_barrier: T#%d, old team = %d, old task_team = " "%p, th_task_team = %p\n", __kmp_gtid_from_thread(this_thr), team_id, team->t.t_task_team[this_thr->th.th_task_state ], this_thr->th.th_task_team); } | ||||
| 2242 | "%p, th_task_team = %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_barrier: T#%d, old team = %d, old task_team = " "%p, th_task_team = %p\n", __kmp_gtid_from_thread(this_thr), team_id, team->t.t_task_team[this_thr->th.th_task_state ], this_thr->th.th_task_team); } | ||||
| 2243 | __kmp_gtid_from_thread(this_thr), team_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_barrier: T#%d, old team = %d, old task_team = " "%p, th_task_team = %p\n", __kmp_gtid_from_thread(this_thr), team_id, team->t.t_task_team[this_thr->th.th_task_state ], this_thr->th.th_task_team); } | ||||
| 2244 | team->t.t_task_team[this_thr->th.th_task_state],if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_barrier: T#%d, old team = %d, old task_team = " "%p, th_task_team = %p\n", __kmp_gtid_from_thread(this_thr), team_id, team->t.t_task_team[this_thr->th.th_task_state ], this_thr->th.th_task_team); } | ||||
| 2245 | this_thr->th.th_task_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_barrier: T#%d, old team = %d, old task_team = " "%p, th_task_team = %p\n", __kmp_gtid_from_thread(this_thr), team_id, team->t.t_task_team[this_thr->th.th_task_state ], this_thr->th.th_task_team); }; | ||||
| 2246 | if (this_thr->th.th_task_team) | ||||
| 2247 | KMP_DEBUG_ASSERT(this_thr->th.th_task_team ==if (!(this_thr->th.th_task_team == team->t.t_task_team[ this_thr->th.th_task_state])) { __kmp_debug_assert("this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]" , "openmp/runtime/src/kmp_barrier.cpp", 2248); } | ||||
| 2248 | team->t.t_task_team[this_thr->th.th_task_state])if (!(this_thr->th.th_task_team == team->t.t_task_team[ this_thr->th.th_task_state])) { __kmp_debug_assert("this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]" , "openmp/runtime/src/kmp_barrier.cpp", 2248); }; | ||||
| 2249 | } | ||||
| 2250 | #endif /* KMP_DEBUG */ | ||||
| 2251 | |||||
| 2252 | /* Copy the blocktime info to the thread, where __kmp_wait_template() can | ||||
| 2253 | access it when the team struct is not guaranteed to exist. Doing these | ||||
| 2254 | loads causes a cache miss slows down EPCC parallel by 2x. As a workaround, | ||||
| 2255 | we do not perform the copy if blocktime=infinite, since the values are not | ||||
| 2256 | used by __kmp_wait_template() in that case. */ | ||||
| 2257 | if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) { | ||||
| 2258 | #if KMP_USE_MONITOR | ||||
| 2259 | this_thr->th.th_team_bt_intervals = | ||||
| 2260 | team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals; | ||||
| 2261 | this_thr->th.th_team_bt_set = | ||||
| 2262 | team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set; | ||||
| 2263 | #else | ||||
| 2264 | this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid)((((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs .bt_set) ? ((team)->t.t_threads[(tid)]->th.th_current_task ->td_icvs.blocktime) : __kmp_dflt_blocktime) * __kmp_ticks_per_msec ); | ||||
| 2265 | #endif | ||||
| 2266 | } | ||||
| 2267 | |||||
| 2268 | #if USE_ITT_BUILD1 | ||||
| 2269 | if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) | ||||
| 2270 | __kmp_itt_barrier_starting(gtid, itt_sync_obj); | ||||
| 2271 | #endif /* USE_ITT_BUILD */ | ||||
| 2272 | |||||
| 2273 | switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) { | ||||
| 2274 | case bp_dist_bar: { | ||||
| 2275 | __kmp_dist_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, | ||||
| 2276 | NULL__null USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 2277 | break; | ||||
| 2278 | } | ||||
| 2279 | case bp_hyper_bar: { | ||||
| 2280 | KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier])if (!(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier])) { __kmp_debug_assert("__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]" , "openmp/runtime/src/kmp_barrier.cpp", 2280); }; | ||||
| 2281 | __kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, | ||||
| 2282 | NULL__null USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 2283 | break; | ||||
| 2284 | } | ||||
| 2285 | case bp_hierarchical_bar: { | ||||
| 2286 | __kmp_hierarchical_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, | ||||
| 2287 | NULL__null USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 2288 | break; | ||||
| 2289 | } | ||||
| 2290 | case bp_tree_bar: { | ||||
| 2291 | KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier])if (!(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier])) { __kmp_debug_assert("__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]" , "openmp/runtime/src/kmp_barrier.cpp", 2291); }; | ||||
| 2292 | __kmp_tree_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, | ||||
| 2293 | NULL__null USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 2294 | break; | ||||
| 2295 | } | ||||
| 2296 | default: { | ||||
| 2297 | __kmp_linear_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, | ||||
| 2298 | NULL__null USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 2299 | } | ||||
| 2300 | } | ||||
| 2301 | |||||
| 2302 | /* From this point on, the team data structure may be deallocated at any time | ||||
| 2303 | by the primary thread - it is unsafe to reference it in any of the worker | ||||
| 2304 | threads. Any per-team data items that need to be referenced before the | ||||
| 2305 | end of the barrier should be moved to the kmp_task_team_t structs. */ | ||||
| 2306 | if (KMP_MASTER_TID(tid)(0 == (tid))) { | ||||
| 2307 | if (__kmp_tasking_mode != tskm_immediate_exec) { | ||||
| 2308 | __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 2309 | } | ||||
| 2310 | if (__kmp_display_affinity) { | ||||
| 2311 | KMP_CHECK_UPDATE(team->t.t_display_affinity, 0)if ((team->t.t_display_affinity) != (0)) (team->t.t_display_affinity ) = (0); | ||||
| 2312 | } | ||||
| 2313 | #if KMP_STATS_ENABLED0 | ||||
| 2314 | // Have primary thread flag the workers to indicate they are now waiting for | ||||
| 2315 | // next parallel region, Also wake them up so they switch their timers to | ||||
| 2316 | // idle. | ||||
| 2317 | for (int i = 0; i < team->t.t_nproc; ++i) { | ||||
| 2318 | kmp_info_t *team_thread = team->t.t_threads[i]; | ||||
| 2319 | if (team_thread == this_thr) | ||||
| 2320 | continue; | ||||
| 2321 | team_thread->th.th_stats->setIdleFlag(); | ||||
| 2322 | if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647) && | ||||
| 2323 | team_thread->th.th_sleep_loc != NULL__null) | ||||
| 2324 | __kmp_null_resume_wrapper(team_thread); | ||||
| 2325 | } | ||||
| 2326 | #endif | ||||
| 2327 | #if USE_ITT_BUILD1 | ||||
| 2328 | if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) | ||||
| 2329 | __kmp_itt_barrier_middle(gtid, itt_sync_obj); | ||||
| 2330 | #endif /* USE_ITT_BUILD */ | ||||
| 2331 | |||||
| 2332 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | ||||
| 2333 | // Join barrier - report frame end | ||||
| 2334 | if ((__itt_frame_submit_v3_ptr__kmp_itt_frame_submit_v3_ptr__3_0 || KMP_ITT_DEBUG0) && | ||||
| 2335 | __kmp_forkjoin_frames_mode && | ||||
| 2336 | (this_thr->th.th_teams_microtask == NULL__null || // either not in teams | ||||
| 2337 | this_thr->th.th_teams_size.nteams == 1) && // or inside single team | ||||
| 2338 | team->t.t_active_level == 1) { | ||||
| 2339 | kmp_uint64 cur_time = __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0(); | ||||
| 2340 | ident_t *loc = team->t.t_ident; | ||||
| 2341 | kmp_info_t **other_threads = team->t.t_threads; | ||||
| 2342 | switch (__kmp_forkjoin_frames_mode) { | ||||
| 2343 | case 1: | ||||
| 2344 | __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, | ||||
| 2345 | loc, nproc); | ||||
| 2346 | break; | ||||
| 2347 | case 2: | ||||
| 2348 | __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1, | ||||
| 2349 | loc, nproc); | ||||
| 2350 | break; | ||||
| 2351 | case 3: | ||||
| 2352 | if (__itt_metadata_add_ptr__kmp_itt_metadata_add_ptr__3_0) { | ||||
| 2353 | // Initialize with primary thread's wait time | ||||
| 2354 | kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time; | ||||
| 2355 | // Set arrive time to zero to be able to check it in | ||||
| 2356 | // __kmp_invoke_task(); the same is done inside the loop below | ||||
| 2357 | this_thr->th.th_bar_arrive_time = 0; | ||||
| 2358 | for (int i = 1; i < nproc; ++i) { | ||||
| 2359 | delta += (cur_time - other_threads[i]->th.th_bar_arrive_time); | ||||
| 2360 | other_threads[i]->th.th_bar_arrive_time = 0; | ||||
| 2361 | } | ||||
| 2362 | __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time, | ||||
| 2363 | cur_time, delta, 0); | ||||
| 2364 | } | ||||
| 2365 | __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, | ||||
| 2366 | loc, nproc); | ||||
| 2367 | this_thr->th.th_frame_time = cur_time; | ||||
| 2368 | break; | ||||
| 2369 | } | ||||
| 2370 | } | ||||
| 2371 | #endif /* USE_ITT_BUILD */ | ||||
| 2372 | } | ||||
| 2373 | #if USE_ITT_BUILD1 | ||||
| 2374 | else { | ||||
| 2375 | if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) | ||||
| 2376 | __kmp_itt_barrier_middle(gtid, itt_sync_obj); | ||||
| 2377 | } | ||||
| 2378 | #endif /* USE_ITT_BUILD */ | ||||
| 2379 | |||||
| 2380 | #if KMP_DEBUG1 | ||||
| 2381 | if (KMP_MASTER_TID(tid)(0 == (tid))) { | ||||
| 2382 | KA_TRACE(if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n" , gtid, team_id, tid, nproc); } | ||||
| 2383 | 15,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n" , gtid, team_id, tid, nproc); } | ||||
| 2384 | ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n",if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n" , gtid, team_id, tid, nproc); } | ||||
| 2385 | gtid, team_id, tid, nproc))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n" , gtid, team_id, tid, nproc); }; | ||||
| 2386 | } | ||||
| 2387 | #endif /* KMP_DEBUG */ | ||||
| 2388 | |||||
| 2389 | // TODO now, mark worker threads as done so they may be disbanded | ||||
| 2390 | KMP_MB(); // Flush all pending memory write invalidates. | ||||
| 2391 | KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) leaving\n" , gtid, team_id, tid); } | ||||
| 2392 | ("__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) leaving\n" , gtid, team_id, tid); }; | ||||
| 2393 | |||||
| 2394 | } | ||||
| 2395 | |||||
| 2396 | // TODO release worker threads' fork barriers as we are ready instead of all at | ||||
| 2397 | // once | ||||
| 2398 | void __kmp_fork_barrier(int gtid, int tid) { | ||||
| 2399 | KMP_TIME_PARTITIONED_BLOCK(OMP_fork_barrier)((void)0); | ||||
| 2400 | KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER)((void)0); | ||||
| 2401 | kmp_info_t *this_thr = __kmp_threads[gtid]; | ||||
| 2402 | kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL__null; | ||||
| |||||
| 2403 | #if USE_ITT_BUILD1 | ||||
| 2404 | void *itt_sync_obj = NULL__null; | ||||
| 2405 | #endif /* USE_ITT_BUILD */ | ||||
| 2406 | if (team
| ||||
| 2407 | |||||
| 2408 | KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) has arrived\n", gtid,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:%d) has arrived\n" , gtid, (team != __null) ? team->t.t_id : -1, tid); } | ||||
| 2409 | (team != NULL) ? team->t.t_id : -1, tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:%d) has arrived\n" , gtid, (team != __null) ? team->t.t_id : -1, tid); }; | ||||
| 2410 | |||||
| 2411 | // th_team pointer only valid for primary thread here | ||||
| 2412 | if (KMP_MASTER_TID(tid)(0 == (tid))) { | ||||
| 2413 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | ||||
| 2414 | if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) { | ||||
| 2415 | // Create itt barrier object | ||||
| 2416 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 1); | ||||
| 2417 | __kmp_itt_barrier_middle(gtid, itt_sync_obj); // Call acquired/releasing | ||||
| 2418 | } | ||||
| 2419 | #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ | ||||
| 2420 | |||||
| 2421 | #ifdef KMP_DEBUG1 | ||||
| 2422 | KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_barrier.cpp" , 2422); }; | ||||
| 2423 | kmp_info_t **other_threads = team->t.t_threads; | ||||
| 2424 | int i; | ||||
| 2425 | |||||
| 2426 | // Verify state | ||||
| 2427 | KMP_MB(); | ||||
| 2428 | |||||
| 2429 | for (i = 1; i < team->t.t_nproc; ++i) { | ||||
| 2430 | KA_TRACE(500,if (kmp_a_debug >= 500) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go " "== %u.\n", gtid, team->t.t_id, other_threads[i]->th.th_info .ds.ds_gtid, team->t.t_id, other_threads[i]->th.th_info .ds.ds_tid, other_threads[i]->th.th_bar[bs_forkjoin_barrier ].bb.b_go); } | ||||
| 2431 | ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go "if (kmp_a_debug >= 500) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go " "== %u.\n", gtid, team->t.t_id, other_threads[i]->th.th_info .ds.ds_gtid, team->t.t_id, other_threads[i]->th.th_info .ds.ds_tid, other_threads[i]->th.th_bar[bs_forkjoin_barrier ].bb.b_go); } | ||||
| 2432 | "== %u.\n",if (kmp_a_debug >= 500) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go " "== %u.\n", gtid, team->t.t_id, other_threads[i]->th.th_info .ds.ds_gtid, team->t.t_id, other_threads[i]->th.th_info .ds.ds_tid, other_threads[i]->th.th_bar[bs_forkjoin_barrier ].bb.b_go); } | ||||
| 2433 | gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid,if (kmp_a_debug >= 500) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go " "== %u.\n", gtid, team->t.t_id, other_threads[i]->th.th_info .ds.ds_gtid, team->t.t_id, other_threads[i]->th.th_info .ds.ds_tid, other_threads[i]->th.th_bar[bs_forkjoin_barrier ].bb.b_go); } | ||||
| 2434 | team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid,if (kmp_a_debug >= 500) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go " "== %u.\n", gtid, team->t.t_id, other_threads[i]->th.th_info .ds.ds_gtid, team->t.t_id, other_threads[i]->th.th_info .ds.ds_tid, other_threads[i]->th.th_bar[bs_forkjoin_barrier ].bb.b_go); } | ||||
| 2435 | other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go))if (kmp_a_debug >= 500) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go " "== %u.\n", gtid, team->t.t_id, other_threads[i]->th.th_info .ds.ds_gtid, team->t.t_id, other_threads[i]->th.th_info .ds.ds_tid, other_threads[i]->th.th_bar[bs_forkjoin_barrier ].bb.b_go); }; | ||||
| 2436 | KMP_DEBUG_ASSERT(if (!(((other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb .b_go) & ~((1 << 0))) == 0)) { __kmp_debug_assert("((other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) & ~((1 << 0))) == 0" , "openmp/runtime/src/kmp_barrier.cpp", 2438); } | ||||
| 2437 | (TCR_4(other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) &if (!(((other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb .b_go) & ~((1 << 0))) == 0)) { __kmp_debug_assert("((other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) & ~((1 << 0))) == 0" , "openmp/runtime/src/kmp_barrier.cpp", 2438); } | ||||
| 2438 | ~(KMP_BARRIER_SLEEP_STATE)) == KMP_INIT_BARRIER_STATE)if (!(((other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb .b_go) & ~((1 << 0))) == 0)) { __kmp_debug_assert("((other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) & ~((1 << 0))) == 0" , "openmp/runtime/src/kmp_barrier.cpp", 2438); }; | ||||
| 2439 | KMP_DEBUG_ASSERT(other_threads[i]->th.th_team == team)if (!(other_threads[i]->th.th_team == team)) { __kmp_debug_assert ("other_threads[i]->th.th_team == team", "openmp/runtime/src/kmp_barrier.cpp" , 2439); }; | ||||
| 2440 | } | ||||
| 2441 | #endif | ||||
| 2442 | |||||
| 2443 | if (__kmp_tasking_mode != tskm_immediate_exec) { | ||||
| 2444 | // 0 indicates setup current task team if nthreads > 1 | ||||
| 2445 | __kmp_task_team_setup(this_thr, team, 0); | ||||
| 2446 | } | ||||
| 2447 | |||||
| 2448 | /* The primary thread may have changed its blocktime between join barrier | ||||
| 2449 | and fork barrier. Copy the blocktime info to the thread, where | ||||
| 2450 | __kmp_wait_template() can access it when the team struct is not | ||||
| 2451 | guaranteed to exist. */ | ||||
| 2452 | // See note about the corresponding code in __kmp_join_barrier() being | ||||
| 2453 | // performance-critical | ||||
| 2454 | if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) { | ||||
| 2455 | #if KMP_USE_MONITOR | ||||
| 2456 | this_thr->th.th_team_bt_intervals = | ||||
| 2457 | team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals; | ||||
| 2458 | this_thr->th.th_team_bt_set = | ||||
| 2459 | team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set; | ||||
| 2460 | #else | ||||
| 2461 | this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid)((((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs .bt_set) ? ((team)->t.t_threads[(tid)]->th.th_current_task ->td_icvs.blocktime) : __kmp_dflt_blocktime) * __kmp_ticks_per_msec ); | ||||
| 2462 | #endif | ||||
| 2463 | } | ||||
| 2464 | } // primary thread | ||||
| 2465 | |||||
| 2466 | switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) { | ||||
| 2467 | case bp_dist_bar: { | ||||
| 2468 | __kmp_dist_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, | ||||
| 2469 | TRUE(!0) USE_ITT_BUILD_ARG(NULL), __null); | ||||
| 2470 | break; | ||||
| 2471 | } | ||||
| 2472 | case bp_hyper_bar: { | ||||
| 2473 | KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier])if (!(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]) ) { __kmp_debug_assert("__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]" , "openmp/runtime/src/kmp_barrier.cpp", 2473); }; | ||||
| 2474 | __kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, | ||||
| 2475 | TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 2476 | break; | ||||
| 2477 | } | ||||
| 2478 | case bp_hierarchical_bar: { | ||||
| 2479 | __kmp_hierarchical_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, | ||||
| 2480 | TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 2481 | break; | ||||
| 2482 | } | ||||
| 2483 | case bp_tree_bar: { | ||||
| 2484 | KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier])if (!(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]) ) { __kmp_debug_assert("__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]" , "openmp/runtime/src/kmp_barrier.cpp", 2484); }; | ||||
| 2485 | __kmp_tree_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, | ||||
| 2486 | TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 2487 | break; | ||||
| 2488 | } | ||||
| 2489 | default: { | ||||
| 2490 | __kmp_linear_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, | ||||
| 2491 | TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 2492 | } | ||||
| 2493 | } | ||||
| 2494 | |||||
| 2495 | #if OMPT_SUPPORT1 | ||||
| 2496 | if (ompt_enabled.enabled && | ||||
| 2497 | this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) { | ||||
| 2498 | int ds_tid = this_thr->th.th_info.ds.ds_tid; | ||||
| 2499 | ompt_data_t *task_data = (team
| ||||
| 2500 | ? OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data )) | ||||
| 2501 | : &(this_thr->th.ompt_thread_info.task_data); | ||||
| 2502 | this_thr->th.ompt_thread_info.state = ompt_state_overhead; | ||||
| 2503 | #if OMPT_OPTIONAL1 | ||||
| 2504 | void *codeptr = NULL__null; | ||||
| 2505 | if (KMP_MASTER_TID(ds_tid)(0 == (ds_tid)) && | ||||
| 2506 | (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback || | ||||
| 2507 | ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback)) | ||||
| 2508 | codeptr = team ? team->t.ompt_team_info.master_return_address : NULL__null; | ||||
| 2509 | if (ompt_enabled.ompt_callback_sync_region_wait) { | ||||
| 2510 | ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback( | ||||
| 2511 | ompt_sync_region_barrier_implicit, ompt_scope_end, NULL__null, task_data, | ||||
| 2512 | codeptr); | ||||
| 2513 | } | ||||
| 2514 | if (ompt_enabled.ompt_callback_sync_region) { | ||||
| 2515 | ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback( | ||||
| |||||
| 2516 | ompt_sync_region_barrier_implicit, ompt_scope_end, NULL__null, task_data, | ||||
| 2517 | codeptr); | ||||
| 2518 | } | ||||
| 2519 | #endif | ||||
| 2520 | if (!KMP_MASTER_TID(ds_tid)(0 == (ds_tid)) && ompt_enabled.ompt_callback_implicit_task) { | ||||
| 2521 | ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback( | ||||
| 2522 | ompt_scope_end, NULL__null, task_data, 0, ds_tid, | ||||
| 2523 | ompt_task_implicit); // TODO: Can this be ompt_task_initial? | ||||
| 2524 | } | ||||
| 2525 | } | ||||
| 2526 | #endif | ||||
| 2527 | |||||
| 2528 | // Early exit for reaping threads releasing forkjoin barrier | ||||
| 2529 | if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) { | ||||
| 2530 | this_thr->th.th_task_team = NULL__null; | ||||
| 2531 | |||||
| 2532 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | ||||
| 2533 | if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) { | ||||
| 2534 | if (!KMP_MASTER_TID(tid)(0 == (tid))) { | ||||
| 2535 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); | ||||
| 2536 | if (itt_sync_obj) | ||||
| 2537 | __kmp_itt_barrier_finished(gtid, itt_sync_obj); | ||||
| 2538 | } | ||||
| 2539 | } | ||||
| 2540 | #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ | ||||
| 2541 | KA_TRACE(10, ("__kmp_fork_barrier: T#%d is leaving early\n", gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d is leaving early\n" , gtid); }; | ||||
| 2542 | return; | ||||
| 2543 | } | ||||
| 2544 | |||||
| 2545 | /* We can now assume that a valid team structure has been allocated by the | ||||
| 2546 | primary thread and propagated to all worker threads. The current thread, | ||||
| 2547 | however, may not be part of the team, so we can't blindly assume that the | ||||
| 2548 | team pointer is non-null. */ | ||||
| 2549 | team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team)((void *)(this_thr->th.th_team)); | ||||
| 2550 | KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null", "openmp/runtime/src/kmp_barrier.cpp", 2550); }; | ||||
| 2551 | tid = __kmp_tid_from_gtid(gtid); | ||||
| 2552 | |||||
| 2553 | #if KMP_BARRIER_ICV_PULL | ||||
| 2554 | /* Primary thread's copy of the ICVs was set up on the implicit taskdata in | ||||
| 2555 | __kmp_reinitialize_team. __kmp_fork_call() assumes the primary thread's | ||||
| 2556 | implicit task has this data before this function is called. We cannot | ||||
| 2557 | modify __kmp_fork_call() to look at the fixed ICVs in the primary thread's | ||||
| 2558 | thread struct, because it is not always the case that the threads arrays | ||||
| 2559 | have been allocated when __kmp_fork_call() is executed. */ | ||||
| 2560 | { | ||||
| 2561 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy)((void)0); | ||||
| 2562 | if (!KMP_MASTER_TID(tid)(0 == (tid))) { // primary thread already has ICVs | ||||
| 2563 | // Copy the initial ICVs from the primary thread's thread struct to the | ||||
| 2564 | // implicit task for this tid. | ||||
| 2565 | KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n" , gtid, tid); } | ||||
| 2566 | ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n" , gtid, tid); }; | ||||
| 2567 | __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, | ||||
| 2568 | tid, FALSE0); | ||||
| 2569 | copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, | ||||
| 2570 | &team->t.t_threads[0] | ||||
| 2571 | ->th.th_bar[bs_forkjoin_barrier] | ||||
| 2572 | .bb.th_fixed_icvs); | ||||
| 2573 | } | ||||
| 2574 | } | ||||
| 2575 | #endif // KMP_BARRIER_ICV_PULL | ||||
| 2576 | |||||
| 2577 | if (__kmp_tasking_mode != tskm_immediate_exec) { | ||||
| 2578 | __kmp_task_team_sync(this_thr, team); | ||||
| 2579 | } | ||||
| 2580 | |||||
| 2581 | #if KMP_AFFINITY_SUPPORTED1 | ||||
| 2582 | kmp_proc_bind_t proc_bind = team->t.t_proc_bind; | ||||
| 2583 | if (proc_bind == proc_bind_intel) { | ||||
| 2584 | // Call dynamic affinity settings | ||||
| 2585 | if (__kmp_affinity.type == affinity_balanced && team->t.t_size_changed) { | ||||
| 2586 | __kmp_balanced_affinity(this_thr, team->t.t_nproc); | ||||
| 2587 | } | ||||
| 2588 | } else if (proc_bind != proc_bind_false) { | ||||
| 2589 | if (this_thr->th.th_new_place == this_thr->th.th_current_place) { | ||||
| 2590 | KA_TRACE(100, ("__kmp_fork_barrier: T#%d already in correct place %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d already in correct place %d\n" , __kmp_gtid_from_thread(this_thr), this_thr->th.th_current_place ); } | ||||
| 2591 | __kmp_gtid_from_thread(this_thr),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d already in correct place %d\n" , __kmp_gtid_from_thread(this_thr), this_thr->th.th_current_place ); } | ||||
| 2592 | this_thr->th.th_current_place))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d already in correct place %d\n" , __kmp_gtid_from_thread(this_thr), this_thr->th.th_current_place ); }; | ||||
| 2593 | } else { | ||||
| 2594 | __kmp_affinity_set_place(gtid); | ||||
| 2595 | } | ||||
| 2596 | } | ||||
| 2597 | #endif // KMP_AFFINITY_SUPPORTED | ||||
| 2598 | // Perform the display affinity functionality | ||||
| 2599 | if (__kmp_display_affinity) { | ||||
| 2600 | if (team->t.t_display_affinity | ||||
| 2601 | #if KMP_AFFINITY_SUPPORTED1 | ||||
| 2602 | || (__kmp_affinity.type == affinity_balanced && team->t.t_size_changed) | ||||
| 2603 | #endif | ||||
| 2604 | ) { | ||||
| 2605 | // NULL means use the affinity-format-var ICV | ||||
| 2606 | __kmp_aux_display_affinity(gtid, NULL__null); | ||||
| 2607 | this_thr->th.th_prev_num_threads = team->t.t_nproc; | ||||
| 2608 | this_thr->th.th_prev_level = team->t.t_level; | ||||
| 2609 | } | ||||
| 2610 | } | ||||
| 2611 | if (!KMP_MASTER_TID(tid)(0 == (tid))) | ||||
| 2612 | KMP_CHECK_UPDATE(this_thr->th.th_def_allocator, team->t.t_def_allocator)if ((this_thr->th.th_def_allocator) != (team->t.t_def_allocator )) (this_thr->th.th_def_allocator) = (team->t.t_def_allocator ); | ||||
| 2613 | |||||
| 2614 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | ||||
| 2615 | if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) { | ||||
| 2616 | if (!KMP_MASTER_TID(tid)(0 == (tid))) { | ||||
| 2617 | // Get correct barrier object | ||||
| 2618 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); | ||||
| 2619 | __kmp_itt_barrier_finished(gtid, itt_sync_obj); // Workers call acquired | ||||
| 2620 | } // (prepare called inside barrier_release) | ||||
| 2621 | } | ||||
| 2622 | #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ | ||||
| 2623 | KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) is leaving\n", gtid,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:%d) is leaving\n" , gtid, team->t.t_id, tid); } | ||||
| 2624 | team->t.t_id, tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:%d) is leaving\n" , gtid, team->t.t_id, tid); }; | ||||
| 2625 | } | ||||
| 2626 | |||||
| 2627 | void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, | ||||
| 2628 | kmp_internal_control_t *new_icvs, ident_t *loc) { | ||||
| 2629 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_setup_icv_copy)((void)0); | ||||
| 2630 | |||||
| 2631 | KMP_DEBUG_ASSERT(team && new_nproc && new_icvs)if (!(team && new_nproc && new_icvs)) { __kmp_debug_assert ("team && new_nproc && new_icvs", "openmp/runtime/src/kmp_barrier.cpp" , 2631); }; | ||||
| 2632 | KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc)if (!((!(__kmp_init_parallel)) || new_icvs->nproc)) { __kmp_debug_assert ("(!(__kmp_init_parallel)) || new_icvs->nproc", "openmp/runtime/src/kmp_barrier.cpp" , 2632); }; | ||||
| 2633 | |||||
| 2634 | /* Primary thread's copy of the ICVs was set up on the implicit taskdata in | ||||
| 2635 | __kmp_reinitialize_team. __kmp_fork_call() assumes the primary thread's | ||||
| 2636 | implicit task has this data before this function is called. */ | ||||
| 2637 | #if KMP_BARRIER_ICV_PULL | ||||
| 2638 | /* Copy ICVs to primary thread's thread structure into th_fixed_icvs (which | ||||
| 2639 | remains untouched), where all of the worker threads can access them and | ||||
| 2640 | make their own copies after the barrier. */ | ||||
| 2641 | KMP_DEBUG_ASSERT(team->t.t_threads[0])if (!(team->t.t_threads[0])) { __kmp_debug_assert("team->t.t_threads[0]" , "openmp/runtime/src/kmp_barrier.cpp", 2641); }; // The threads arrays should be | ||||
| 2642 | // allocated at this point | ||||
| 2643 | copy_icvs( | ||||
| 2644 | &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs, | ||||
| 2645 | new_icvs); | ||||
| 2646 | KF_TRACE(10, ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n", 0,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n" , 0, team->t.t_threads[0], team); } | ||||
| 2647 | team->t.t_threads[0], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n" , 0, team->t.t_threads[0], team); }; | ||||
| 2648 | #elif KMP_BARRIER_ICV_PUSH1 | ||||
| 2649 | // The ICVs will be propagated in the fork barrier, so nothing needs to be | ||||
| 2650 | // done here. | ||||
| 2651 | KF_TRACE(10, ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n", 0,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n" , 0, team->t.t_threads[0], team); } | ||||
| 2652 | team->t.t_threads[0], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n" , 0, team->t.t_threads[0], team); }; | ||||
| 2653 | #else | ||||
| 2654 | // Copy the ICVs to each of the non-primary threads. This takes O(nthreads) | ||||
| 2655 | // time. | ||||
| 2656 | ngo_load(new_icvs)((void)0); | ||||
| 2657 | KMP_DEBUG_ASSERT(team->t.t_threads[0])if (!(team->t.t_threads[0])) { __kmp_debug_assert("team->t.t_threads[0]" , "openmp/runtime/src/kmp_barrier.cpp", 2657); }; // The threads arrays should be | ||||
| 2658 | // allocated at this point | ||||
| 2659 | for (int f = 1; f < new_nproc; ++f) { // Skip the primary thread | ||||
| 2660 | // TODO: GEH - pass in better source location info since usually NULL here | ||||
| 2661 | KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n" , f, team->t.t_threads[f], team); } | ||||
| 2662 | f, team->t.t_threads[f], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n" , f, team->t.t_threads[f], team); }; | ||||
| 2663 | __kmp_init_implicit_task(loc, team->t.t_threads[f], team, f, FALSE0); | ||||
| 2664 | ngo_store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs)copy_icvs((&team->t.t_implicit_task_taskdata[f].td_icvs ), (new_icvs)); | ||||
| 2665 | KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n" , f, team->t.t_threads[f], team); } | ||||
| 2666 | f, team->t.t_threads[f], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n" , f, team->t.t_threads[f], team); }; | ||||
| 2667 | } | ||||
| 2668 | ngo_sync()((void)0); | ||||
| 2669 | #endif // KMP_BARRIER_ICV_PULL | ||||
| 2670 | } |
| 1 | /* | ||||
| 2 | * kmp_wait_release.h -- Wait/Release implementation | ||||
| 3 | */ | ||||
| 4 | |||||
| 5 | //===----------------------------------------------------------------------===// | ||||
| 6 | // | ||||
| 7 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||
| 8 | // See https://llvm.org/LICENSE.txt for license information. | ||||
| 9 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||
| 10 | // | ||||
| 11 | //===----------------------------------------------------------------------===// | ||||
| 12 | |||||
| 13 | #ifndef KMP_WAIT_RELEASE_H | ||||
| 14 | #define KMP_WAIT_RELEASE_H | ||||
| 15 | |||||
| 16 | #include "kmp.h" | ||||
| 17 | #include "kmp_itt.h" | ||||
| 18 | #include "kmp_stats.h" | ||||
| 19 | #if OMPT_SUPPORT1 | ||||
| 20 | #include "ompt-specific.h" | ||||
| 21 | #endif | ||||
| 22 | |||||
| 23 | /*! | ||||
| 24 | @defgroup WAIT_RELEASE Wait/Release operations | ||||
| 25 | |||||
| 26 | The definitions and functions here implement the lowest level thread | ||||
| 27 | synchronizations of suspending a thread and awaking it. They are used to build | ||||
| 28 | higher level operations such as barriers and fork/join. | ||||
| 29 | */ | ||||
| 30 | |||||
| 31 | /*! | ||||
| 32 | @ingroup WAIT_RELEASE | ||||
| 33 | @{ | ||||
| 34 | */ | ||||
| 35 | |||||
| 36 | struct flag_properties { | ||||
| 37 | unsigned int type : 16; | ||||
| 38 | unsigned int reserved : 16; | ||||
| 39 | }; | ||||
| 40 | |||||
| 41 | template <enum flag_type FlagType> struct flag_traits {}; | ||||
| 42 | |||||
| 43 | template <> struct flag_traits<flag32> { | ||||
| 44 | typedef kmp_uint32 flag_t; | ||||
| 45 | static const flag_type t = flag32; | ||||
| 46 | static inline flag_t tcr(flag_t f) { return TCR_4(f)(f); } | ||||
| 47 | static inline flag_t test_then_add4(volatile flag_t *f) { | ||||
| 48 | return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f))__sync_fetch_and_add((volatile kmp_int32 *)(reinterpret_cast< volatile kmp_int32 *>(f)), 4); | ||||
| 49 | } | ||||
| 50 | static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { | ||||
| 51 | return KMP_TEST_THEN_OR32(f, v)__sync_fetch_and_or((volatile kmp_uint32 *)(f), (kmp_uint32)( v)); | ||||
| 52 | } | ||||
| 53 | static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { | ||||
| 54 | return KMP_TEST_THEN_AND32(f, v)__sync_fetch_and_and((volatile kmp_uint32 *)(f), (kmp_uint32) (v)); | ||||
| 55 | } | ||||
| 56 | }; | ||||
| 57 | |||||
| 58 | template <> struct flag_traits<atomic_flag64> { | ||||
| 59 | typedef kmp_uint64 flag_t; | ||||
| 60 | static const flag_type t = atomic_flag64; | ||||
| 61 | static inline flag_t tcr(flag_t f) { return TCR_8(f)(f); } | ||||
| 62 | static inline flag_t test_then_add4(volatile flag_t *f) { | ||||
| 63 | return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f))__sync_fetch_and_add((volatile kmp_int64 *)(reinterpret_cast< volatile kmp_int64 *>(f)), 4LL); | ||||
| 64 | } | ||||
| 65 | static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { | ||||
| 66 | return KMP_TEST_THEN_OR64(f, v)__sync_fetch_and_or((volatile kmp_uint64 *)(f), (kmp_uint64)( v)); | ||||
| 67 | } | ||||
| 68 | static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { | ||||
| 69 | return KMP_TEST_THEN_AND64(f, v)__sync_fetch_and_and((volatile kmp_uint64 *)(f), (kmp_uint64) (v)); | ||||
| 70 | } | ||||
| 71 | }; | ||||
| 72 | |||||
| 73 | template <> struct flag_traits<flag64> { | ||||
| 74 | typedef kmp_uint64 flag_t; | ||||
| 75 | static const flag_type t = flag64; | ||||
| 76 | static inline flag_t tcr(flag_t f) { return TCR_8(f)(f); } | ||||
| 77 | static inline flag_t test_then_add4(volatile flag_t *f) { | ||||
| 78 | return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f))__sync_fetch_and_add((volatile kmp_int64 *)(reinterpret_cast< volatile kmp_int64 *>(f)), 4LL); | ||||
| 79 | } | ||||
| 80 | static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { | ||||
| 81 | return KMP_TEST_THEN_OR64(f, v)__sync_fetch_and_or((volatile kmp_uint64 *)(f), (kmp_uint64)( v)); | ||||
| 82 | } | ||||
| 83 | static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { | ||||
| 84 | return KMP_TEST_THEN_AND64(f, v)__sync_fetch_and_and((volatile kmp_uint64 *)(f), (kmp_uint64) (v)); | ||||
| 85 | } | ||||
| 86 | }; | ||||
| 87 | |||||
| 88 | template <> struct flag_traits<flag_oncore> { | ||||
| 89 | typedef kmp_uint64 flag_t; | ||||
| 90 | static const flag_type t = flag_oncore; | ||||
| 91 | static inline flag_t tcr(flag_t f) { return TCR_8(f)(f); } | ||||
| 92 | static inline flag_t test_then_add4(volatile flag_t *f) { | ||||
| 93 | return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f))__sync_fetch_and_add((volatile kmp_int64 *)(reinterpret_cast< volatile kmp_int64 *>(f)), 4LL); | ||||
| 94 | } | ||||
| 95 | static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { | ||||
| 96 | return KMP_TEST_THEN_OR64(f, v)__sync_fetch_and_or((volatile kmp_uint64 *)(f), (kmp_uint64)( v)); | ||||
| 97 | } | ||||
| 98 | static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { | ||||
| 99 | return KMP_TEST_THEN_AND64(f, v)__sync_fetch_and_and((volatile kmp_uint64 *)(f), (kmp_uint64) (v)); | ||||
| 100 | } | ||||
| 101 | }; | ||||
| 102 | |||||
| 103 | /*! Base class for all flags */ | ||||
| 104 | template <flag_type FlagType> class kmp_flag { | ||||
| 105 | protected: | ||||
| 106 | flag_properties t; /**< "Type" of the flag in loc */ | ||||
| 107 | kmp_info_t *waiting_threads[1]; /**< Threads sleeping on this thread. */ | ||||
| 108 | kmp_uint32 num_waiting_threads; /**< Num threads sleeping on this thread. */ | ||||
| 109 | std::atomic<bool> *sleepLoc; | ||||
| 110 | |||||
| 111 | public: | ||||
| 112 | typedef flag_traits<FlagType> traits_type; | ||||
| 113 | kmp_flag() : t({FlagType, 0U}), num_waiting_threads(0), sleepLoc(nullptr) {} | ||||
| 114 | kmp_flag(int nwaiters) | ||||
| 115 | : t({FlagType, 0U}), num_waiting_threads(nwaiters), sleepLoc(nullptr) {} | ||||
| 116 | kmp_flag(std::atomic<bool> *sloc) | ||||
| 117 | : t({FlagType, 0U}), num_waiting_threads(0), sleepLoc(sloc) {} | ||||
| 118 | /*! @result the flag_type */ | ||||
| 119 | flag_type get_type() { return (flag_type)(t.type); } | ||||
| 120 | |||||
| 121 | /*! param i in index into waiting_threads | ||||
| 122 | * @result the thread that is waiting at index i */ | ||||
| 123 | kmp_info_t *get_waiter(kmp_uint32 i) { | ||||
| 124 | KMP_DEBUG_ASSERT(i < num_waiting_threads)if (!(i < num_waiting_threads)) { __kmp_debug_assert("i < num_waiting_threads" , "openmp/runtime/src/kmp_wait_release.h", 124); }; | ||||
| 125 | return waiting_threads[i]; | ||||
| 126 | } | ||||
| 127 | /*! @result num_waiting_threads */ | ||||
| 128 | kmp_uint32 get_num_waiters() { return num_waiting_threads; } | ||||
| 129 | /*! @param thr in the thread which is now waiting | ||||
| 130 | * Insert a waiting thread at index 0. */ | ||||
| 131 | void set_waiter(kmp_info_t *thr) { | ||||
| 132 | waiting_threads[0] = thr; | ||||
| 133 | num_waiting_threads = 1; | ||||
| 134 | } | ||||
| 135 | enum barrier_type get_bt() { return bs_last_barrier; } | ||||
| 136 | }; | ||||
| 137 | |||||
| 138 | /*! Base class for wait/release volatile flag */ | ||||
| 139 | template <typename PtrType, flag_type FlagType, bool Sleepable> | ||||
| 140 | class kmp_flag_native : public kmp_flag<FlagType> { | ||||
| 141 | protected: | ||||
| 142 | volatile PtrType *loc; | ||||
| 143 | PtrType checker; /**< When flag==checker, it has been released. */ | ||||
| 144 | typedef flag_traits<FlagType> traits_type; | ||||
| 145 | |||||
| 146 | public: | ||||
| 147 | typedef PtrType flag_t; | ||||
| 148 | kmp_flag_native(volatile PtrType *p) : kmp_flag<FlagType>(), loc(p) {} | ||||
| 149 | kmp_flag_native(volatile PtrType *p, kmp_info_t *thr) | ||||
| 150 | : kmp_flag<FlagType>(1), loc(p) { | ||||
| 151 | this->waiting_threads[0] = thr; | ||||
| 152 | } | ||||
| 153 | kmp_flag_native(volatile PtrType *p, PtrType c) | ||||
| 154 | : kmp_flag<FlagType>(), loc(p), checker(c) {} | ||||
| 155 | kmp_flag_native(volatile PtrType *p, PtrType c, std::atomic<bool> *sloc) | ||||
| 156 | : kmp_flag<FlagType>(sloc), loc(p), checker(c) {} | ||||
| 157 | virtual ~kmp_flag_native() {} | ||||
| 158 | void *operator new(size_t size) { return __kmp_allocate(size)___kmp_allocate((size), "openmp/runtime/src/kmp_wait_release.h" , 158); } | ||||
| 159 | void operator delete(void *p) { __kmp_free(p)___kmp_free((p), "openmp/runtime/src/kmp_wait_release.h", 159 ); } | ||||
| 160 | volatile PtrType *get() { return loc; } | ||||
| 161 | void *get_void_p() { return RCAST(void *, CCAST(PtrType *, loc))reinterpret_cast<void *>(const_cast<PtrType *>(loc )); } | ||||
| 162 | void set(volatile PtrType *new_loc) { loc = new_loc; } | ||||
| 163 | PtrType load() { return *loc; } | ||||
| 164 | void store(PtrType val) { *loc = val; } | ||||
| 165 | /*! @result true if the flag object has been released. */ | ||||
| 166 | virtual bool done_check() { | ||||
| 167 | if (Sleepable && !(this->sleepLoc)) | ||||
| 168 | return (traits_type::tcr(*(this->get())) & ~KMP_BARRIER_SLEEP_STATE(1 << 0)) == | ||||
| 169 | checker; | ||||
| 170 | else | ||||
| 171 | return traits_type::tcr(*(this->get())) == checker; | ||||
| 172 | } | ||||
| 173 | /*! @param old_loc in old value of flag | ||||
| 174 | * @result true if the flag's old value indicates it was released. */ | ||||
| 175 | virtual bool done_check_val(PtrType old_loc) { return old_loc == checker; } | ||||
| 176 | /*! @result true if the flag object is not yet released. | ||||
| 177 | * Used in __kmp_wait_template like: | ||||
| 178 | * @code | ||||
| 179 | * while (flag.notdone_check()) { pause(); } | ||||
| 180 | * @endcode */ | ||||
| 181 | virtual bool notdone_check() { | ||||
| 182 | return traits_type::tcr(*(this->get())) != checker; | ||||
| 183 | } | ||||
| 184 | /*! @result Actual flag value before release was applied. | ||||
| 185 | * Trigger all waiting threads to run by modifying flag to release state. */ | ||||
| 186 | void internal_release() { | ||||
| 187 | (void)traits_type::test_then_add4((volatile PtrType *)this->get()); | ||||
| 188 | } | ||||
| 189 | /*! @result Actual flag value before sleep bit(s) set. | ||||
| 190 | * Notes that there is at least one thread sleeping on the flag by setting | ||||
| 191 | * sleep bit(s). */ | ||||
| 192 | PtrType set_sleeping() { | ||||
| 193 | if (this->sleepLoc) { | ||||
| 194 | this->sleepLoc->store(true); | ||||
| 195 | return *(this->get()); | ||||
| 196 | } | ||||
| 197 | return traits_type::test_then_or((volatile PtrType *)this->get(), | ||||
| 198 | KMP_BARRIER_SLEEP_STATE(1 << 0)); | ||||
| 199 | } | ||||
| 200 | /*! @result Actual flag value before sleep bit(s) cleared. | ||||
| 201 | * Notes that there are no longer threads sleeping on the flag by clearing | ||||
| 202 | * sleep bit(s). */ | ||||
| 203 | void unset_sleeping() { | ||||
| 204 | if (this->sleepLoc) { | ||||
| 205 | this->sleepLoc->store(false); | ||||
| 206 | return; | ||||
| 207 | } | ||||
| 208 | traits_type::test_then_and((volatile PtrType *)this->get(), | ||||
| 209 | ~KMP_BARRIER_SLEEP_STATE(1 << 0)); | ||||
| 210 | } | ||||
| 211 | /*! @param old_loc in old value of flag | ||||
| 212 | * Test if there are threads sleeping on the flag's old value in old_loc. */ | ||||
| 213 | bool is_sleeping_val(PtrType old_loc) { | ||||
| 214 | if (this->sleepLoc) | ||||
| 215 | return this->sleepLoc->load(); | ||||
| 216 | return old_loc & KMP_BARRIER_SLEEP_STATE(1 << 0); | ||||
| 217 | } | ||||
| 218 | /*! Test whether there are threads sleeping on the flag. */ | ||||
| 219 | bool is_sleeping() { | ||||
| 220 | if (this->sleepLoc) | ||||
| 221 | return this->sleepLoc->load(); | ||||
| 222 | return is_sleeping_val(*(this->get())); | ||||
| 223 | } | ||||
| 224 | bool is_any_sleeping() { | ||||
| 225 | if (this->sleepLoc) | ||||
| 226 | return this->sleepLoc->load(); | ||||
| 227 | return is_sleeping_val(*(this->get())); | ||||
| 228 | } | ||||
| 229 | kmp_uint8 *get_stolen() { return NULL__null; } | ||||
| 230 | }; | ||||
| 231 | |||||
| 232 | /*! Base class for wait/release atomic flag */ | ||||
| 233 | template <typename PtrType, flag_type FlagType, bool Sleepable> | ||||
| 234 | class kmp_flag_atomic : public kmp_flag<FlagType> { | ||||
| 235 | protected: | ||||
| 236 | std::atomic<PtrType> *loc; /**< Pointer to flag location to wait on */ | ||||
| 237 | PtrType checker; /**< Flag == checker means it has been released. */ | ||||
| 238 | public: | ||||
| 239 | typedef flag_traits<FlagType> traits_type; | ||||
| 240 | typedef PtrType flag_t; | ||||
| 241 | kmp_flag_atomic(std::atomic<PtrType> *p) : kmp_flag<FlagType>(), loc(p) {} | ||||
| 242 | kmp_flag_atomic(std::atomic<PtrType> *p, kmp_info_t *thr) | ||||
| 243 | : kmp_flag<FlagType>(1), loc(p) { | ||||
| 244 | this->waiting_threads[0] = thr; | ||||
| 245 | } | ||||
| 246 | kmp_flag_atomic(std::atomic<PtrType> *p, PtrType c) | ||||
| 247 | : kmp_flag<FlagType>(), loc(p), checker(c) {} | ||||
| 248 | kmp_flag_atomic(std::atomic<PtrType> *p, PtrType c, std::atomic<bool> *sloc) | ||||
| 249 | : kmp_flag<FlagType>(sloc), loc(p), checker(c) {} | ||||
| 250 | /*! @result the pointer to the actual flag */ | ||||
| 251 | std::atomic<PtrType> *get() { return loc; } | ||||
| 252 | /*! @result void* pointer to the actual flag */ | ||||
| 253 | void *get_void_p() { return RCAST(void *, loc)reinterpret_cast<void *>(loc); } | ||||
| 254 | /*! @param new_loc in set loc to point at new_loc */ | ||||
| 255 | void set(std::atomic<PtrType> *new_loc) { loc = new_loc; } | ||||
| 256 | /*! @result flag value */ | ||||
| 257 | PtrType load() { return loc->load(std::memory_order_acquire); } | ||||
| 258 | /*! @param val the new flag value to be stored */ | ||||
| 259 | void store(PtrType val) { loc->store(val, std::memory_order_release); } | ||||
| 260 | /*! @result true if the flag object has been released. */ | ||||
| 261 | bool done_check() { | ||||
| 262 | if (Sleepable && !(this->sleepLoc)) | ||||
| 263 | return (this->load() & ~KMP_BARRIER_SLEEP_STATE(1 << 0)) == checker; | ||||
| 264 | else | ||||
| 265 | return this->load() == checker; | ||||
| 266 | } | ||||
| 267 | /*! @param old_loc in old value of flag | ||||
| 268 | * @result true if the flag's old value indicates it was released. */ | ||||
| 269 | bool done_check_val(PtrType old_loc) { return old_loc == checker; } | ||||
| 270 | /*! @result true if the flag object is not yet released. | ||||
| 271 | * Used in __kmp_wait_template like: | ||||
| 272 | * @code | ||||
| 273 | * while (flag.notdone_check()) { pause(); } | ||||
| 274 | * @endcode */ | ||||
| 275 | bool notdone_check() { return this->load() != checker; } | ||||
| 276 | /*! @result Actual flag value before release was applied. | ||||
| 277 | * Trigger all waiting threads to run by modifying flag to release state. */ | ||||
| 278 | void internal_release() { KMP_ATOMIC_ADD(this->get(), 4)(this->get())->fetch_add(4, std::memory_order_acq_rel); } | ||||
| 279 | /*! @result Actual flag value before sleep bit(s) set. | ||||
| 280 | * Notes that there is at least one thread sleeping on the flag by setting | ||||
| 281 | * sleep bit(s). */ | ||||
| 282 | PtrType set_sleeping() { | ||||
| 283 | if (this->sleepLoc) { | ||||
| 284 | this->sleepLoc->store(true); | ||||
| 285 | return *(this->get()); | ||||
| 286 | } | ||||
| 287 | return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE)(this->get())->fetch_or((1 << 0), std::memory_order_acq_rel ); | ||||
| 288 | } | ||||
| 289 | /*! @result Actual flag value before sleep bit(s) cleared. | ||||
| 290 | * Notes that there are no longer threads sleeping on the flag by clearing | ||||
| 291 | * sleep bit(s). */ | ||||
| 292 | void unset_sleeping() { | ||||
| 293 | if (this->sleepLoc) { | ||||
| 294 | this->sleepLoc->store(false); | ||||
| 295 | return; | ||||
| 296 | } | ||||
| 297 | KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE)(this->get())->fetch_and(~(1 << 0), std::memory_order_acq_rel ); | ||||
| 298 | } | ||||
| 299 | /*! @param old_loc in old value of flag | ||||
| 300 | * Test whether there are threads sleeping on flag's old value in old_loc. */ | ||||
| 301 | bool is_sleeping_val(PtrType old_loc) { | ||||
| 302 | if (this->sleepLoc) | ||||
| 303 | return this->sleepLoc->load(); | ||||
| 304 | return old_loc & KMP_BARRIER_SLEEP_STATE(1 << 0); | ||||
| 305 | } | ||||
| 306 | /*! Test whether there are threads sleeping on the flag. */ | ||||
| 307 | bool is_sleeping() { | ||||
| 308 | if (this->sleepLoc) | ||||
| 309 | return this->sleepLoc->load(); | ||||
| 310 | return is_sleeping_val(this->load()); | ||||
| 311 | } | ||||
| 312 | bool is_any_sleeping() { | ||||
| 313 | if (this->sleepLoc) | ||||
| 314 | return this->sleepLoc->load(); | ||||
| 315 | return is_sleeping_val(this->load()); | ||||
| 316 | } | ||||
| 317 | kmp_uint8 *get_stolen() { return NULL__null; } | ||||
| 318 | }; | ||||
| 319 | |||||
| 320 | #if OMPT_SUPPORT1 | ||||
| 321 | OMPT_NOINLINE__attribute__((noinline)) | ||||
| 322 | static void __ompt_implicit_task_end(kmp_info_t *this_thr, | ||||
| 323 | ompt_state_t ompt_state, | ||||
| 324 | ompt_data_t *tId) { | ||||
| 325 | int ds_tid = this_thr->th.th_info.ds.ds_tid; | ||||
| 326 | if (ompt_state == ompt_state_wait_barrier_implicit) { | ||||
| 327 | this_thr->th.ompt_thread_info.state = ompt_state_overhead; | ||||
| 328 | #if OMPT_OPTIONAL1 | ||||
| 329 | void *codeptr = NULL__null; | ||||
| 330 | if (ompt_enabled.ompt_callback_sync_region_wait) { | ||||
| 331 | ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback( | ||||
| 332 | ompt_sync_region_barrier_implicit, ompt_scope_end, NULL__null, tId, | ||||
| 333 | codeptr); | ||||
| 334 | } | ||||
| 335 | if (ompt_enabled.ompt_callback_sync_region) { | ||||
| 336 | ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback( | ||||
| 337 | ompt_sync_region_barrier_implicit, ompt_scope_end, NULL__null, tId, | ||||
| 338 | codeptr); | ||||
| 339 | } | ||||
| 340 | #endif | ||||
| 341 | if (!KMP_MASTER_TID(ds_tid)(0 == (ds_tid))) { | ||||
| 342 | if (ompt_enabled.ompt_callback_implicit_task) { | ||||
| 343 | int flags = this_thr->th.ompt_thread_info.parallel_flags; | ||||
| 344 | flags = (flags & ompt_parallel_league) ? ompt_task_initial | ||||
| 345 | : ompt_task_implicit; | ||||
| 346 | ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback( | ||||
| 347 | ompt_scope_end, NULL__null, tId, 0, ds_tid, flags); | ||||
| 348 | } | ||||
| 349 | // return to idle state | ||||
| 350 | this_thr->th.ompt_thread_info.state = ompt_state_idle; | ||||
| 351 | } else { | ||||
| 352 | this_thr->th.ompt_thread_info.state = ompt_state_overhead; | ||||
| 353 | } | ||||
| 354 | } | ||||
| 355 | } | ||||
| 356 | #endif | ||||
| 357 | |||||
| 358 | /* Spin wait loop that first does pause/yield, then sleep. A thread that calls | ||||
| 359 | __kmp_wait_* must make certain that another thread calls __kmp_release | ||||
| 360 | to wake it back up to prevent deadlocks! | ||||
| 361 | |||||
| 362 | NOTE: We may not belong to a team at this point. */ | ||||
| 363 | template <class C, bool final_spin, bool Cancellable = false, | ||||
| 364 | bool Sleepable = true> | ||||
| 365 | static inline bool | ||||
| 366 | __kmp_wait_template(kmp_info_t *this_thr, | ||||
| 367 | C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) { | ||||
| 368 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | ||||
| 369 | volatile void *spin = flag->get(); | ||||
| 370 | #endif | ||||
| 371 | kmp_uint32 spins; | ||||
| 372 | int th_gtid; | ||||
| 373 | int tasks_completed = FALSE0; | ||||
| 374 | #if !KMP_USE_MONITOR | ||||
| 375 | kmp_uint64 poll_count; | ||||
| 376 | kmp_uint64 hibernate_goal; | ||||
| 377 | #else | ||||
| 378 | kmp_uint32 hibernate; | ||||
| 379 | #endif | ||||
| 380 | kmp_uint64 time; | ||||
| 381 | |||||
| 382 | KMP_FSYNC_SPIN_INIT(spin, NULL)int sync_iters = 0; if (__kmp_itt_fsync_prepare_ptr__3_0) { if (spin == __null) { spin = __null; } } __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(0x4376) : "%ebx"); | ||||
| 383 | if (flag->done_check()) { | ||||
| 384 | KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin))do { __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(0x4377) : "%ebx"); if (sync_iters >= __kmp_itt_prepare_delay ) { (!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0 ((void *)((void *)const_cast<void *>(spin))); } } while (0); | ||||
| 385 | return false; | ||||
| 386 | } | ||||
| 387 | th_gtid = this_thr->th.th_info.ds.ds_gtid; | ||||
| 388 | if (Cancellable) { | ||||
| 389 | kmp_team_t *team = this_thr->th.th_team; | ||||
| 390 | if (team && team->t.t_cancel_request == cancel_parallel) | ||||
| 391 | return true; | ||||
| 392 | } | ||||
| 393 | #if KMP_OS_UNIX1 | ||||
| 394 | if (final_spin) | ||||
| 395 | KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true)(&this_thr->th.th_blocking)->store(true, std::memory_order_release ); | ||||
| 396 | #endif | ||||
| 397 | KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n" , th_gtid, flag); } | ||||
| 398 | ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n" , th_gtid, flag); }; | ||||
| 399 | #if KMP_STATS_ENABLED0 | ||||
| 400 | stats_state_e thread_state = KMP_GET_THREAD_STATE()((void)0); | ||||
| 401 | #endif | ||||
| 402 | |||||
| 403 | /* OMPT Behavior: | ||||
| 404 | THIS function is called from | ||||
| 405 | __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions) | ||||
| 406 | these have join / fork behavior | ||||
| 407 | |||||
| 408 | In these cases, we don't change the state or trigger events in THIS | ||||
| 409 | function. | ||||
| 410 | Events are triggered in the calling code (__kmp_barrier): | ||||
| 411 | |||||
| 412 | state := ompt_state_overhead | ||||
| 413 | barrier-begin | ||||
| 414 | barrier-wait-begin | ||||
| 415 | state := ompt_state_wait_barrier | ||||
| 416 | call join-barrier-implementation (finally arrive here) | ||||
| 417 | {} | ||||
| 418 | call fork-barrier-implementation (finally arrive here) | ||||
| 419 | {} | ||||
| 420 | state := ompt_state_overhead | ||||
| 421 | barrier-wait-end | ||||
| 422 | barrier-end | ||||
| 423 | state := ompt_state_work_parallel | ||||
| 424 | |||||
| 425 | |||||
| 426 | __kmp_fork_barrier (after thread creation, before executing implicit task) | ||||
| 427 | call fork-barrier-implementation (finally arrive here) | ||||
| 428 | {} // worker arrive here with state = ompt_state_idle | ||||
| 429 | |||||
| 430 | |||||
| 431 | __kmp_join_barrier (implicit barrier at end of parallel region) | ||||
| 432 | state := ompt_state_barrier_implicit | ||||
| 433 | barrier-begin | ||||
| 434 | barrier-wait-begin | ||||
| 435 | call join-barrier-implementation (finally arrive here | ||||
| 436 | final_spin=FALSE) | ||||
| 437 | { | ||||
| 438 | } | ||||
| 439 | __kmp_fork_barrier (implicit barrier at end of parallel region) | ||||
| 440 | call fork-barrier-implementation (finally arrive here final_spin=TRUE) | ||||
| 441 | |||||
| 442 | Worker after task-team is finished: | ||||
| 443 | barrier-wait-end | ||||
| 444 | barrier-end | ||||
| 445 | implicit-task-end | ||||
| 446 | idle-begin | ||||
| 447 | state := ompt_state_idle | ||||
| 448 | |||||
| 449 | Before leaving, if state = ompt_state_idle | ||||
| 450 | idle-end | ||||
| 451 | state := ompt_state_overhead | ||||
| 452 | */ | ||||
| 453 | #if OMPT_SUPPORT1 | ||||
| 454 | ompt_state_t ompt_entry_state; | ||||
| 455 | ompt_data_t *tId; | ||||
| 456 | if (ompt_enabled.enabled) { | ||||
| 457 | ompt_entry_state = this_thr->th.ompt_thread_info.state; | ||||
| 458 | if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit || | ||||
| 459 | KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)(0 == (this_thr->th.th_info.ds.ds_tid))) { | ||||
| 460 | ompt_lw_taskteam_t *team = NULL__null; | ||||
| 461 | if (this_thr->th.th_team) | ||||
| 462 | team = this_thr->th.th_team->t.ompt_serialized_team_info; | ||||
| 463 | if (team) { | ||||
| 464 | tId = &(team->ompt_task_info.task_data); | ||||
| 465 | } else { | ||||
| 466 | tId = OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data )); | ||||
| 467 | } | ||||
| 468 | } else { | ||||
| 469 | tId = &(this_thr->th.ompt_thread_info.task_data); | ||||
| 470 | } | ||||
| 471 | if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec || | ||||
| 472 | this_thr->th.th_task_team == NULL__null)) { | ||||
| 473 | // implicit task is done. Either no taskqueue, or task-team finished | ||||
| 474 | __ompt_implicit_task_end(this_thr, ompt_entry_state, tId); | ||||
| 475 | } | ||||
| 476 | } | ||||
| 477 | #endif | ||||
| 478 | |||||
| 479 | KMP_INIT_YIELD(spins){ (spins) = __kmp_yield_init; }; // Setup for waiting | ||||
| 480 | KMP_INIT_BACKOFF(time){ (time) = __kmp_pause_init; }; | ||||
| 481 | |||||
| 482 | if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647) || | ||||
| 483 | __kmp_pause_status == kmp_soft_paused) { | ||||
| 484 | #if KMP_USE_MONITOR | ||||
| 485 | // The worker threads cannot rely on the team struct existing at this point. | ||||
| 486 | // Use the bt values cached in the thread struct instead. | ||||
| 487 | #ifdef KMP_ADJUST_BLOCKTIME1 | ||||
| 488 | if (__kmp_pause_status == kmp_soft_paused || | ||||
| 489 | (__kmp_zero_bt && !this_thr->th.th_team_bt_set)) | ||||
| 490 | // Force immediate suspend if not set by user and more threads than | ||||
| 491 | // available procs | ||||
| 492 | hibernate = 0; | ||||
| 493 | else | ||||
| 494 | hibernate = this_thr->th.th_team_bt_intervals; | ||||
| 495 | #else | ||||
| 496 | hibernate = this_thr->th.th_team_bt_intervals; | ||||
| 497 | #endif /* KMP_ADJUST_BLOCKTIME */ | ||||
| 498 | |||||
| 499 | /* If the blocktime is nonzero, we want to make sure that we spin wait for | ||||
| 500 | the entirety of the specified #intervals, plus up to one interval more. | ||||
| 501 | This increment make certain that this thread doesn't go to sleep too | ||||
| 502 | soon. */ | ||||
| 503 | if (hibernate != 0) | ||||
| 504 | hibernate++; | ||||
| 505 | |||||
| 506 | // Add in the current time value. | ||||
| 507 | hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value)(__kmp_global.g.g_time.dt.t_value); | ||||
| 508 | KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",if (kmp_f_debug >= 20) { __kmp_debug_printf ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n" , th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate, hibernate - __kmp_global.g.g_time.dt.t_value); } | ||||
| 509 | th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,if (kmp_f_debug >= 20) { __kmp_debug_printf ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n" , th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate, hibernate - __kmp_global.g.g_time.dt.t_value); } | ||||
| 510 | hibernate - __kmp_global.g.g_time.dt.t_value))if (kmp_f_debug >= 20) { __kmp_debug_printf ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n" , th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate, hibernate - __kmp_global.g.g_time.dt.t_value); }; | ||||
| 511 | #else | ||||
| 512 | if (__kmp_pause_status == kmp_soft_paused) { | ||||
| 513 | // Force immediate suspend | ||||
| 514 | hibernate_goal = KMP_NOW()__kmp_hardware_timestamp(); | ||||
| 515 | } else | ||||
| 516 | hibernate_goal = KMP_NOW()__kmp_hardware_timestamp() + this_thr->th.th_team_bt_intervals; | ||||
| 517 | poll_count = 0; | ||||
| 518 | (void)poll_count; | ||||
| 519 | #endif // KMP_USE_MONITOR | ||||
| 520 | } | ||||
| 521 | |||||
| 522 | KMP_MB(); | ||||
| 523 | |||||
| 524 | // Main wait spin loop | ||||
| 525 | while (flag->notdone_check()) { | ||||
| 526 | kmp_task_team_t *task_team = NULL__null; | ||||
| 527 | if (__kmp_tasking_mode != tskm_immediate_exec) { | ||||
| 528 | task_team = this_thr->th.th_task_team; | ||||
| 529 | /* If the thread's task team pointer is NULL, it means one of 3 things: | ||||
| 530 | 1) A newly-created thread is first being released by | ||||
| 531 | __kmp_fork_barrier(), and its task team has not been set up yet. | ||||
| 532 | 2) All tasks have been executed to completion. | ||||
| 533 | 3) Tasking is off for this region. This could be because we are in a | ||||
| 534 | serialized region (perhaps the outer one), or else tasking was manually | ||||
| 535 | disabled (KMP_TASKING=0). */ | ||||
| 536 | if (task_team != NULL__null) { | ||||
| 537 | if (TCR_SYNC_4(task_team->tt.tt_active)(task_team->tt.tt_active)) { | ||||
| 538 | if (KMP_TASKING_ENABLED(task_team)((!0) == ((task_team)->tt.tt_found_tasks))) { | ||||
| 539 | flag->execute_tasks( | ||||
| 540 | this_thr, th_gtid, final_spin, | ||||
| 541 | &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj, 0); | ||||
| 542 | } else | ||||
| 543 | this_thr->th.th_reap_state = KMP_SAFE_TO_REAP1; | ||||
| 544 | } else { | ||||
| 545 | KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid))if (!(!(0 == (this_thr->th.th_info.ds.ds_tid)))) { __kmp_debug_assert ("!(0 == (this_thr->th.th_info.ds.ds_tid))", "openmp/runtime/src/kmp_wait_release.h" , 545); }; | ||||
| 546 | #if OMPT_SUPPORT1 | ||||
| 547 | // task-team is done now, other cases should be catched above | ||||
| 548 | if (final_spin && ompt_enabled.enabled) | ||||
| 549 | __ompt_implicit_task_end(this_thr, ompt_entry_state, tId); | ||||
| 550 | #endif | ||||
| 551 | this_thr->th.th_task_team = NULL__null; | ||||
| 552 | this_thr->th.th_reap_state = KMP_SAFE_TO_REAP1; | ||||
| 553 | } | ||||
| 554 | } else { | ||||
| 555 | this_thr->th.th_reap_state = KMP_SAFE_TO_REAP1; | ||||
| 556 | } // if | ||||
| 557 | } // if | ||||
| 558 | |||||
| 559 | KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin))do { if (__kmp_itt_fsync_prepare_ptr__3_0 && sync_iters < __kmp_itt_prepare_delay) { ++sync_iters; if (sync_iters >= __kmp_itt_prepare_delay) { (!__kmp_itt_fsync_prepare_ptr__3_0 ) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)((void *)const_cast<void *>(spin))); } } } while (0); | ||||
| 560 | if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) { | ||||
| 561 | if (__kmp_global.g.g_abort) | ||||
| 562 | __kmp_abort_thread(); | ||||
| 563 | break; | ||||
| 564 | } | ||||
| 565 | |||||
| 566 | // If we are oversubscribed, or have waited a bit (and | ||||
| 567 | // KMP_LIBRARY=throughput), then yield | ||||
| 568 | KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time){ if (__kmp_tpause_enabled) { if (((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))) { __kmp_tpause(0, (time) ); } else { __kmp_tpause(__kmp_tpause_hint, (time)); } (time) = (time << 1 | 1) & ((kmp_uint64)0xFFFF); } else { __kmp_x86_pause(); if ((((__kmp_use_yield == 1 || __kmp_use_yield == 2) && (((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)))))) { __kmp_yield(); } else if (__kmp_use_yield == 1) { (spins) -= 2; if (!(spins)) { __kmp_yield(); (spins) = __kmp_yield_next; } } } }; | ||||
| 569 | |||||
| 570 | #if KMP_STATS_ENABLED0 | ||||
| 571 | // Check if thread has been signalled to idle state | ||||
| 572 | // This indicates that the logical "join-barrier" has finished | ||||
| 573 | if (this_thr->th.th_stats->isIdle() && | ||||
| 574 | KMP_GET_THREAD_STATE()((void)0) == FORK_JOIN_BARRIER) { | ||||
| 575 | KMP_SET_THREAD_STATE(IDLE)((void)0); | ||||
| 576 | KMP_PUSH_PARTITIONED_TIMER(OMP_idle)((void)0); | ||||
| 577 | } | ||||
| 578 | #endif | ||||
| 579 | // Check if the barrier surrounding this wait loop has been cancelled | ||||
| 580 | if (Cancellable) { | ||||
| 581 | kmp_team_t *team = this_thr->th.th_team; | ||||
| 582 | if (team && team->t.t_cancel_request == cancel_parallel) | ||||
| 583 | break; | ||||
| 584 | } | ||||
| 585 | |||||
| 586 | // For hidden helper thread, if task_team is nullptr, it means the main | ||||
| 587 | // thread has not released the barrier. We cannot wait here because once the | ||||
| 588 | // main thread releases all children barriers, all hidden helper threads are | ||||
| 589 | // still sleeping. This leads to a problem that following configuration, | ||||
| 590 | // such as task team sync, will not be performed such that this thread does | ||||
| 591 | // not have task team. Usually it is not bad. However, a corner case is, | ||||
| 592 | // when the first task encountered is an untied task, the check in | ||||
| 593 | // __kmp_task_alloc will crash because it uses the task team pointer without | ||||
| 594 | // checking whether it is nullptr. It is probably under some kind of | ||||
| 595 | // assumption. | ||||
| 596 | if (task_team && KMP_HIDDEN_HELPER_WORKER_THREAD(th_gtid)((th_gtid) > 1 && (th_gtid) <= __kmp_hidden_helper_threads_num ) && | ||||
| 597 | !TCR_4(__kmp_hidden_helper_team_done)(__kmp_hidden_helper_team_done)) { | ||||
| 598 | // If there is still hidden helper tasks to be executed, the hidden helper | ||||
| 599 | // thread will not enter a waiting status. | ||||
| 600 | if (KMP_ATOMIC_LD_ACQ(&__kmp_unexecuted_hidden_helper_tasks)(&__kmp_unexecuted_hidden_helper_tasks)->load(std::memory_order_acquire ) == 0) { | ||||
| 601 | __kmp_hidden_helper_worker_thread_wait(); | ||||
| 602 | } | ||||
| 603 | continue; | ||||
| 604 | } | ||||
| 605 | |||||
| 606 | // Don't suspend if KMP_BLOCKTIME is set to "infinite" | ||||
| 607 | if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME(2147483647) && | ||||
| 608 | __kmp_pause_status != kmp_soft_paused) | ||||
| 609 | continue; | ||||
| 610 | |||||
| 611 | // Don't suspend if there is a likelihood of new tasks being spawned. | ||||
| 612 | if (task_team != NULL__null && TCR_4(task_team->tt.tt_found_tasks)(task_team->tt.tt_found_tasks) && | ||||
| 613 | !__kmp_wpolicy_passive) | ||||
| 614 | continue; | ||||
| 615 | |||||
| 616 | #if KMP_USE_MONITOR | ||||
| 617 | // If we have waited a bit more, fall asleep | ||||
| 618 | if (TCR_4(__kmp_global.g.g_time.dt.t_value)(__kmp_global.g.g_time.dt.t_value) < hibernate) | ||||
| 619 | continue; | ||||
| 620 | #else | ||||
| 621 | if (KMP_BLOCKING(hibernate_goal, poll_count++)((hibernate_goal) > __kmp_hardware_timestamp())) | ||||
| 622 | continue; | ||||
| 623 | #endif | ||||
| 624 | // Don't suspend if wait loop designated non-sleepable | ||||
| 625 | // in template parameters | ||||
| 626 | if (!Sleepable) | ||||
| 627 | continue; | ||||
| 628 | |||||
| 629 | #if KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0) || KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0) | ||||
| 630 | if (__kmp_mwait_enabled || __kmp_umwait_enabled) { | ||||
| 631 | KF_TRACE(50, ("__kmp_wait_sleep: T#%d using monitor/mwait\n", th_gtid))if (kmp_f_debug >= 50) { __kmp_debug_printf ("__kmp_wait_sleep: T#%d using monitor/mwait\n" , th_gtid); }; | ||||
| 632 | flag->mwait(th_gtid); | ||||
| 633 | } else { | ||||
| 634 | #endif | ||||
| 635 | KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid))if (kmp_f_debug >= 50) { __kmp_debug_printf ("__kmp_wait_sleep: T#%d suspend time reached\n" , th_gtid); }; | ||||
| 636 | #if KMP_OS_UNIX1 | ||||
| 637 | if (final_spin) | ||||
| 638 | KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false)(&this_thr->th.th_blocking)->store(false, std::memory_order_release ); | ||||
| 639 | #endif | ||||
| 640 | flag->suspend(th_gtid); | ||||
| 641 | #if KMP_OS_UNIX1 | ||||
| 642 | if (final_spin) | ||||
| 643 | KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true)(&this_thr->th.th_blocking)->store(true, std::memory_order_release ); | ||||
| 644 | #endif | ||||
| 645 | #if KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0) || KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0) | ||||
| 646 | } | ||||
| 647 | #endif | ||||
| 648 | |||||
| 649 | if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) { | ||||
| 650 | if (__kmp_global.g.g_abort) | ||||
| 651 | __kmp_abort_thread(); | ||||
| 652 | break; | ||||
| 653 | } else if (__kmp_tasking_mode != tskm_immediate_exec && | ||||
| 654 | this_thr->th.th_reap_state == KMP_SAFE_TO_REAP1) { | ||||
| 655 | this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP0; | ||||
| 656 | } | ||||
| 657 | // TODO: If thread is done with work and times out, disband/free | ||||
| 658 | } | ||||
| 659 | |||||
| 660 | #if OMPT_SUPPORT1 | ||||
| 661 | ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state; | ||||
| 662 | if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) { | ||||
| 663 | #if OMPT_OPTIONAL1 | ||||
| 664 | if (final_spin) { | ||||
| 665 | __ompt_implicit_task_end(this_thr, ompt_exit_state, tId); | ||||
| 666 | ompt_exit_state = this_thr->th.ompt_thread_info.state; | ||||
| 667 | } | ||||
| 668 | #endif | ||||
| 669 | if (ompt_exit_state == ompt_state_idle) { | ||||
| 670 | this_thr->th.ompt_thread_info.state = ompt_state_overhead; | ||||
| 671 | } | ||||
| 672 | } | ||||
| 673 | #endif | ||||
| 674 | #if KMP_STATS_ENABLED0 | ||||
| 675 | // If we were put into idle state, pop that off the state stack | ||||
| 676 | if (KMP_GET_THREAD_STATE()((void)0) == IDLE) { | ||||
| 677 | KMP_POP_PARTITIONED_TIMER()((void)0); | ||||
| 678 | KMP_SET_THREAD_STATE(thread_state)((void)0); | ||||
| 679 | this_thr->th.th_stats->resetIdleFlag(); | ||||
| 680 | } | ||||
| 681 | #endif | ||||
| 682 | |||||
| 683 | #if KMP_OS_UNIX1 | ||||
| 684 | if (final_spin) | ||||
| 685 | KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false)(&this_thr->th.th_blocking)->store(false, std::memory_order_release ); | ||||
| 686 | #endif | ||||
| 687 | KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin))do { __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(0x4377) : "%ebx"); if (sync_iters >= __kmp_itt_prepare_delay ) { (!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0 ((void *)((void *)const_cast<void *>(spin))); } } while (0); | ||||
| 688 | if (Cancellable) { | ||||
| 689 | kmp_team_t *team = this_thr->th.th_team; | ||||
| 690 | if (team && team->t.t_cancel_request == cancel_parallel) { | ||||
| 691 | if (tasks_completed) { | ||||
| 692 | // undo the previous decrement of unfinished_threads so that the | ||||
| 693 | // thread can decrement at the join barrier with no problem | ||||
| 694 | kmp_task_team_t *task_team = this_thr->th.th_task_team; | ||||
| 695 | std::atomic<kmp_int32> *unfinished_threads = | ||||
| 696 | &(task_team->tt.tt_unfinished_threads); | ||||
| 697 | KMP_ATOMIC_INC(unfinished_threads)(unfinished_threads)->fetch_add(1, std::memory_order_acq_rel ); | ||||
| 698 | } | ||||
| 699 | return true; | ||||
| 700 | } | ||||
| 701 | } | ||||
| 702 | return false; | ||||
| 703 | } | ||||
| 704 | |||||
| 705 | #if KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0) || KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0) | ||||
| 706 | // Set up a monitor on the flag variable causing the calling thread to wait in | ||||
| 707 | // a less active state until the flag variable is modified. | ||||
| 708 | template <class C> | ||||
| 709 | static inline void __kmp_mwait_template(int th_gtid, C *flag) { | ||||
| 710 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_mwait)((void)0); | ||||
| 711 | kmp_info_t *th = __kmp_threads[th_gtid]; | ||||
| 712 | |||||
| 713 | KF_TRACE(30, ("__kmp_mwait_template: T#%d enter for flag = %p\n", th_gtid,if (kmp_f_debug >= 30) { __kmp_debug_printf ("__kmp_mwait_template: T#%d enter for flag = %p\n" , th_gtid, flag->get()); } | ||||
| 714 | flag->get()))if (kmp_f_debug >= 30) { __kmp_debug_printf ("__kmp_mwait_template: T#%d enter for flag = %p\n" , th_gtid, flag->get()); }; | ||||
| 715 | |||||
| 716 | // User-level mwait is available | ||||
| 717 | KMP_DEBUG_ASSERT(__kmp_mwait_enabled || __kmp_umwait_enabled)if (!(__kmp_mwait_enabled || __kmp_umwait_enabled)) { __kmp_debug_assert ("__kmp_mwait_enabled || __kmp_umwait_enabled", "openmp/runtime/src/kmp_wait_release.h" , 717); }; | ||||
| 718 | |||||
| 719 | __kmp_suspend_initialize_thread(th); | ||||
| 720 | __kmp_lock_suspend_mx(th); | ||||
| 721 | |||||
| 722 | volatile void *spin = flag->get(); | ||||
| 723 | void *cacheline = (void *)(kmp_uintptr_t(spin) & ~(CACHE_LINE64 - 1)); | ||||
| 724 | |||||
| 725 | if (!flag->done_check()) { | ||||
| 726 | // Mark thread as no longer active | ||||
| 727 | th->th.th_active = FALSE0; | ||||
| 728 | if (th->th.th_active_in_pool) { | ||||
| 729 | th->th.th_active_in_pool = FALSE0; | ||||
| 730 | KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth)(&__kmp_thread_pool_active_nth)->fetch_sub(1, std::memory_order_acq_rel ); | ||||
| 731 | KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0)if (!((__kmp_thread_pool_active_nth) >= 0)) { __kmp_debug_assert ("(__kmp_thread_pool_active_nth) >= 0", "openmp/runtime/src/kmp_wait_release.h" , 731); }; | ||||
| 732 | } | ||||
| 733 | flag->set_sleeping(); | ||||
| 734 | KF_TRACE(50, ("__kmp_mwait_template: T#%d calling monitor\n", th_gtid))if (kmp_f_debug >= 50) { __kmp_debug_printf ("__kmp_mwait_template: T#%d calling monitor\n" , th_gtid); }; | ||||
| 735 | #if KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0) | ||||
| 736 | if (__kmp_umwait_enabled) { | ||||
| 737 | __kmp_umonitor(cacheline); | ||||
| 738 | } | ||||
| 739 | #elif KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0) | ||||
| 740 | if (__kmp_mwait_enabled) { | ||||
| 741 | __kmp_mm_monitor(cacheline, 0, 0); | ||||
| 742 | } | ||||
| 743 | #endif | ||||
| 744 | // To avoid a race, check flag between 'monitor' and 'mwait'. A write to | ||||
| 745 | // the address could happen after the last time we checked and before | ||||
| 746 | // monitoring started, in which case monitor can't detect the change. | ||||
| 747 | if (flag->done_check()) | ||||
| 748 | flag->unset_sleeping(); | ||||
| 749 | else { | ||||
| 750 | // if flag changes here, wake-up happens immediately | ||||
| 751 | TCW_PTR(th->th.th_sleep_loc, (void *)flag)((th->th.th_sleep_loc)) = (((void *)flag)); | ||||
| 752 | th->th.th_sleep_loc_type = flag->get_type(); | ||||
| 753 | __kmp_unlock_suspend_mx(th); | ||||
| 754 | KF_TRACE(50, ("__kmp_mwait_template: T#%d calling mwait\n", th_gtid))if (kmp_f_debug >= 50) { __kmp_debug_printf ("__kmp_mwait_template: T#%d calling mwait\n" , th_gtid); }; | ||||
| 755 | #if KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0) | ||||
| 756 | if (__kmp_umwait_enabled) { | ||||
| 757 | __kmp_umwait(1, 100); // to do: enable ctrl via hints, backoff counter | ||||
| 758 | } | ||||
| 759 | #elif KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0) | ||||
| 760 | if (__kmp_mwait_enabled) { | ||||
| 761 | __kmp_mm_mwait(0, __kmp_mwait_hints); | ||||
| 762 | } | ||||
| 763 | #endif | ||||
| 764 | KF_TRACE(50, ("__kmp_mwait_template: T#%d mwait done\n", th_gtid))if (kmp_f_debug >= 50) { __kmp_debug_printf ("__kmp_mwait_template: T#%d mwait done\n" , th_gtid); }; | ||||
| 765 | __kmp_lock_suspend_mx(th); | ||||
| 766 | // Clean up sleep info; doesn't matter how/why this thread stopped waiting | ||||
| 767 | if (flag->is_sleeping()) | ||||
| 768 | flag->unset_sleeping(); | ||||
| 769 | TCW_PTR(th->th.th_sleep_loc, NULL)((th->th.th_sleep_loc)) = ((__null)); | ||||
| 770 | th->th.th_sleep_loc_type = flag_unset; | ||||
| 771 | } | ||||
| 772 | // Mark thread as active again | ||||
| 773 | th->th.th_active = TRUE(!0); | ||||
| 774 | if (TCR_4(th->th.th_in_pool)(th->th.th_in_pool)) { | ||||
| 775 | KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth)(&__kmp_thread_pool_active_nth)->fetch_add(1, std::memory_order_acq_rel ); | ||||
| 776 | th->th.th_active_in_pool = TRUE(!0); | ||||
| 777 | } | ||||
| 778 | } // Drop out to main wait loop to check flag, handle tasks, etc. | ||||
| 779 | __kmp_unlock_suspend_mx(th); | ||||
| 780 | KF_TRACE(30, ("__kmp_mwait_template: T#%d exit\n", th_gtid))if (kmp_f_debug >= 30) { __kmp_debug_printf ("__kmp_mwait_template: T#%d exit\n" , th_gtid); }; | ||||
| 781 | } | ||||
| 782 | #endif // KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT | ||||
| 783 | |||||
| 784 | /* Release any threads specified as waiting on the flag by releasing the flag | ||||
| 785 | and resume the waiting thread if indicated by the sleep bit(s). A thread that | ||||
| 786 | calls __kmp_wait_template must call this function to wake up the potentially | ||||
| 787 | sleeping thread and prevent deadlocks! */ | ||||
| 788 | template <class C> static inline void __kmp_release_template(C *flag) { | ||||
| 789 | #ifdef KMP_DEBUG1 | ||||
| 790 | int gtid = TCR_4(__kmp_init_gtid)(__kmp_init_gtid) ? __kmp_get_gtid()__kmp_get_global_thread_id() : -1; | ||||
| 791 | #endif | ||||
| 792 | KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()))if (kmp_f_debug >= 20) { __kmp_debug_printf ("__kmp_release: T#%d releasing flag(%x)\n" , gtid, flag->get()); }; | ||||
| 793 | KMP_DEBUG_ASSERT(flag->get())if (!(flag->get())) { __kmp_debug_assert("flag->get()", "openmp/runtime/src/kmp_wait_release.h", 793); }; | ||||
| 794 | KMP_FSYNC_RELEASING(flag->get_void_p())(!__kmp_itt_fsync_releasing_ptr__3_0) ? (void)0 : __kmp_itt_fsync_releasing_ptr__3_0 ((void *)(flag->get_void_p())); | ||||
| 795 | |||||
| 796 | flag->internal_release(); | ||||
| 797 | |||||
| 798 | KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),if (kmp_f_debug >= 100) { __kmp_debug_printf ("__kmp_release: T#%d set new spin=%d\n" , gtid, flag->get(), flag->load()); } | ||||
| 799 | flag->load()))if (kmp_f_debug >= 100) { __kmp_debug_printf ("__kmp_release: T#%d set new spin=%d\n" , gtid, flag->get(), flag->load()); }; | ||||
| 800 | |||||
| 801 | if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) { | ||||
| 802 | // Only need to check sleep stuff if infinite block time not set. | ||||
| 803 | // Are *any* threads waiting on flag sleeping? | ||||
| 804 | if (flag->is_any_sleeping()) { | ||||
| 805 | for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) { | ||||
| 806 | // if sleeping waiter exists at i, sets current_waiter to i inside flag | ||||
| 807 | kmp_info_t *waiter = flag->get_waiter(i); | ||||
| 808 | if (waiter) { | ||||
| 809 | int wait_gtid = waiter->th.th_info.ds.ds_gtid; | ||||
| 810 | // Wake up thread if needed | ||||
| 811 | KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep "if (kmp_f_debug >= 50) { __kmp_debug_printf ("__kmp_release: T#%d waking up thread T#%d since sleep " "flag(%p) set\n", gtid, wait_gtid, flag->get()); } | ||||
| 812 | "flag(%p) set\n",if (kmp_f_debug >= 50) { __kmp_debug_printf ("__kmp_release: T#%d waking up thread T#%d since sleep " "flag(%p) set\n", gtid, wait_gtid, flag->get()); } | ||||
| 813 | gtid, wait_gtid, flag->get()))if (kmp_f_debug >= 50) { __kmp_debug_printf ("__kmp_release: T#%d waking up thread T#%d since sleep " "flag(%p) set\n", gtid, wait_gtid, flag->get()); }; | ||||
| 814 | flag->resume(wait_gtid); // unsets flag's current_waiter when done | ||||
| 815 | } | ||||
| 816 | } | ||||
| 817 | } | ||||
| 818 | } | ||||
| 819 | } | ||||
| 820 | |||||
| 821 | template <bool Cancellable, bool Sleepable> | ||||
| 822 | class kmp_flag_32 : public kmp_flag_atomic<kmp_uint32, flag32, Sleepable> { | ||||
| 823 | public: | ||||
| 824 | kmp_flag_32(std::atomic<kmp_uint32> *p) | ||||
| 825 | : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p) {} | ||||
| 826 | kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr) | ||||
| 827 | : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p, thr) {} | ||||
| 828 | kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c) | ||||
| 829 | : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p, c) {} | ||||
| 830 | void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); } | ||||
| 831 | #if KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0) || KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0) | ||||
| 832 | void mwait(int th_gtid) { __kmp_mwait_32(th_gtid, this); } | ||||
| 833 | #endif | ||||
| 834 | void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); } | ||||
| 835 | int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, | ||||
| 836 | int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj, | ||||
| 837 | kmp_int32 is_constrained) { | ||||
| 838 | return __kmp_execute_tasks_32( | ||||
| 839 | this_thr, gtid, this, final_spin, | ||||
| 840 | thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj, is_constrained); | ||||
| 841 | } | ||||
| 842 | bool wait(kmp_info_t *this_thr, | ||||
| 843 | int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) { | ||||
| 844 | if (final_spin) | ||||
| 845 | return __kmp_wait_template<kmp_flag_32, TRUE(!0), Cancellable, Sleepable>( | ||||
| 846 | this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 847 | else | ||||
| 848 | return __kmp_wait_template<kmp_flag_32, FALSE0, Cancellable, Sleepable>( | ||||
| 849 | this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 850 | } | ||||
| 851 | void release() { __kmp_release_template(this); } | ||||
| 852 | flag_type get_ptr_type() { return flag32; } | ||||
| 853 | }; | ||||
| 854 | |||||
| 855 | template <bool Cancellable, bool Sleepable> | ||||
| 856 | class kmp_flag_64 : public kmp_flag_native<kmp_uint64, flag64, Sleepable> { | ||||
| 857 | public: | ||||
| 858 | kmp_flag_64(volatile kmp_uint64 *p) | ||||
| 859 | : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p) {} | ||||
| 860 | kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr) | ||||
| 861 | : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, thr) {} | ||||
| 862 | kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c) | ||||
| 863 | : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, c) {} | ||||
| 864 | kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c, std::atomic<bool> *loc) | ||||
| 865 | : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, c, loc) {} | ||||
| 866 | void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); } | ||||
| 867 | #if KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0) || KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0) | ||||
| 868 | void mwait(int th_gtid) { __kmp_mwait_64(th_gtid, this); } | ||||
| 869 | #endif | ||||
| 870 | void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); } | ||||
| 871 | int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, | ||||
| 872 | int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj, | ||||
| 873 | kmp_int32 is_constrained) { | ||||
| 874 | return __kmp_execute_tasks_64( | ||||
| 875 | this_thr, gtid, this, final_spin, | ||||
| 876 | thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj, is_constrained); | ||||
| 877 | } | ||||
| 878 | bool wait(kmp_info_t *this_thr, | ||||
| 879 | int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) { | ||||
| 880 | if (final_spin
| ||||
| 881 | return __kmp_wait_template<kmp_flag_64, TRUE(!0), Cancellable, Sleepable>( | ||||
| 882 | this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 883 | else | ||||
| 884 | return __kmp_wait_template<kmp_flag_64, FALSE0, Cancellable, Sleepable>( | ||||
| 885 | this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 886 | } | ||||
| 887 | void release() { __kmp_release_template(this); } | ||||
| 888 | flag_type get_ptr_type() { return flag64; } | ||||
| 889 | }; | ||||
| 890 | |||||
| 891 | template <bool Cancellable, bool Sleepable> | ||||
| 892 | class kmp_atomic_flag_64 | ||||
| 893 | : public kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable> { | ||||
| 894 | public: | ||||
| 895 | kmp_atomic_flag_64(std::atomic<kmp_uint64> *p) | ||||
| 896 | : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p) {} | ||||
| 897 | kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_info_t *thr) | ||||
| 898 | : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, thr) {} | ||||
| 899 | kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_uint64 c) | ||||
| 900 | : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, c) {} | ||||
| 901 | kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_uint64 c, | ||||
| 902 | std::atomic<bool> *loc) | ||||
| 903 | : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, c, loc) {} | ||||
| 904 | void suspend(int th_gtid) { __kmp_atomic_suspend_64(th_gtid, this); } | ||||
| 905 | void mwait(int th_gtid) { __kmp_atomic_mwait_64(th_gtid, this); } | ||||
| 906 | void resume(int th_gtid) { __kmp_atomic_resume_64(th_gtid, this); } | ||||
| 907 | int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, | ||||
| 908 | int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj, | ||||
| 909 | kmp_int32 is_constrained) { | ||||
| 910 | return __kmp_atomic_execute_tasks_64( | ||||
| 911 | this_thr, gtid, this, final_spin, | ||||
| 912 | thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj, is_constrained); | ||||
| 913 | } | ||||
| 914 | bool wait(kmp_info_t *this_thr, | ||||
| 915 | int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) { | ||||
| 916 | if (final_spin) | ||||
| 917 | return __kmp_wait_template<kmp_atomic_flag_64, TRUE(!0), Cancellable, | ||||
| 918 | Sleepable>( | ||||
| 919 | this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 920 | else | ||||
| 921 | return __kmp_wait_template<kmp_atomic_flag_64, FALSE0, Cancellable, | ||||
| 922 | Sleepable>( | ||||
| 923 | this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 924 | } | ||||
| 925 | void release() { __kmp_release_template(this); } | ||||
| 926 | flag_type get_ptr_type() { return atomic_flag64; } | ||||
| 927 | }; | ||||
| 928 | |||||
| 929 | // Hierarchical 64-bit on-core barrier instantiation | ||||
| 930 | class kmp_flag_oncore : public kmp_flag_native<kmp_uint64, flag_oncore, false> { | ||||
| 931 | kmp_uint32 offset; /**< Portion of flag of interest for an operation. */ | ||||
| 932 | bool flag_switch; /**< Indicates a switch in flag location. */ | ||||
| 933 | enum barrier_type bt; /**< Barrier type. */ | ||||
| 934 | kmp_info_t *this_thr; /**< Thread to redirect to different flag location. */ | ||||
| 935 | #if USE_ITT_BUILD1 | ||||
| 936 | void *itt_sync_obj; /**< ITT object to pass to new flag location. */ | ||||
| 937 | #endif | ||||
| 938 | unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) { | ||||
| 939 | return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc))reinterpret_cast<unsigned char *>(const_cast<kmp_uint64 *>(loc)))[offset]; | ||||
| 940 | } | ||||
| 941 | |||||
| 942 | public: | ||||
| 943 | kmp_flag_oncore(volatile kmp_uint64 *p) | ||||
| 944 | : kmp_flag_native<kmp_uint64, flag_oncore, false>(p), flag_switch(false) { | ||||
| 945 | } | ||||
| 946 | kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx) | ||||
| 947 | : kmp_flag_native<kmp_uint64, flag_oncore, false>(p), offset(idx), | ||||
| 948 | flag_switch(false), | ||||
| 949 | bt(bs_last_barrier) USE_ITT_BUILD_ARG(itt_sync_obj(nullptr)), itt_sync_obj(nullptr) {} | ||||
| 950 | kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx, | ||||
| 951 | enum barrier_type bar_t, | ||||
| 952 | kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt), void *itt) | ||||
| 953 | : kmp_flag_native<kmp_uint64, flag_oncore, false>(p, c), offset(idx), | ||||
| 954 | flag_switch(false), bt(bar_t), | ||||
| 955 | this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)), itt_sync_obj(itt) {} | ||||
| 956 | virtual ~kmp_flag_oncore() override {} | ||||
| 957 | void *operator new(size_t size) { return __kmp_allocate(size)___kmp_allocate((size), "openmp/runtime/src/kmp_wait_release.h" , 957); } | ||||
| 958 | void operator delete(void *p) { __kmp_free(p)___kmp_free((p), "openmp/runtime/src/kmp_wait_release.h", 958 ); } | ||||
| 959 | bool done_check_val(kmp_uint64 old_loc) override { | ||||
| 960 | return byteref(&old_loc, offset) == checker; | ||||
| 961 | } | ||||
| 962 | bool done_check() override { return done_check_val(*get()); } | ||||
| 963 | bool notdone_check() override { | ||||
| 964 | // Calculate flag_switch | ||||
| 965 | if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG3) | ||||
| 966 | flag_switch = true; | ||||
| 967 | if (byteref(get(), offset) != 1 && !flag_switch) | ||||
| 968 | return true; | ||||
| 969 | else if (flag_switch) { | ||||
| 970 | this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING4; | ||||
| 971 | kmp_flag_64<> flag(&this_thr->th.th_bar[bt].bb.b_go, | ||||
| 972 | (kmp_uint64)KMP_BARRIER_STATE_BUMP(1 << 2)); | ||||
| 973 | __kmp_wait_64(this_thr, &flag, TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 974 | } | ||||
| 975 | return false; | ||||
| 976 | } | ||||
| 977 | void internal_release() { | ||||
| 978 | // Other threads can write their own bytes simultaneously. | ||||
| 979 | if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME(2147483647)) { | ||||
| 980 | byteref(get(), offset) = 1; | ||||
| 981 | } else { | ||||
| 982 | kmp_uint64 mask = 0; | ||||
| 983 | byteref(&mask, offset) = 1; | ||||
| 984 | KMP_TEST_THEN_OR64(get(), mask)__sync_fetch_and_or((volatile kmp_uint64 *)(get()), (kmp_uint64 )(mask)); | ||||
| 985 | } | ||||
| 986 | } | ||||
| 987 | void wait(kmp_info_t *this_thr, int final_spin) { | ||||
| 988 | if (final_spin) | ||||
| 989 | __kmp_wait_template<kmp_flag_oncore, TRUE(!0)>( | ||||
| 990 | this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 991 | else | ||||
| 992 | __kmp_wait_template<kmp_flag_oncore, FALSE0>( | ||||
| 993 | this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | ||||
| 994 | } | ||||
| 995 | void release() { __kmp_release_template(this); } | ||||
| 996 | void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); } | ||||
| 997 | #if KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0) || KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0) | ||||
| 998 | void mwait(int th_gtid) { __kmp_mwait_oncore(th_gtid, this); } | ||||
| 999 | #endif | ||||
| 1000 | void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); } | ||||
| 1001 | int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, | ||||
| 1002 | int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj, | ||||
| 1003 | kmp_int32 is_constrained) { | ||||
| 1004 | #if OMPD_SUPPORT1 | ||||
| 1005 | int ret = __kmp_execute_tasks_oncore( | ||||
| 1006 | this_thr, gtid, this, final_spin, | ||||
| 1007 | thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj, is_constrained); | ||||
| 1008 | if (ompd_state & OMPD_ENABLE_BP0x1) | ||||
| 1009 | ompd_bp_task_end(); | ||||
| 1010 | return ret; | ||||
| 1011 | #else | ||||
| 1012 | return __kmp_execute_tasks_oncore( | ||||
| 1013 | this_thr, gtid, this, final_spin, | ||||
| 1014 | thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj, is_constrained); | ||||
| 1015 | #endif | ||||
| 1016 | } | ||||
| 1017 | enum barrier_type get_bt() { return bt; } | ||||
| 1018 | flag_type get_ptr_type() { return flag_oncore; } | ||||
| 1019 | }; | ||||
| 1020 | |||||
| 1021 | static inline void __kmp_null_resume_wrapper(kmp_info_t *thr) { | ||||
| 1022 | int gtid = __kmp_gtid_from_thread(thr); | ||||
| 1023 | void *flag = CCAST(void *, thr->th.th_sleep_loc)const_cast<void *>(thr->th.th_sleep_loc); | ||||
| 1024 | flag_type type = thr->th.th_sleep_loc_type; | ||||
| 1025 | if (!flag) | ||||
| 1026 | return; | ||||
| 1027 | // Attempt to wake up a thread: examine its type and call appropriate template | ||||
| 1028 | switch (type) { | ||||
| 1029 | case flag32: | ||||
| 1030 | __kmp_resume_32(gtid, RCAST(kmp_flag_32<> *, flag)reinterpret_cast<kmp_flag_32<> *>(flag)); | ||||
| 1031 | break; | ||||
| 1032 | case flag64: | ||||
| 1033 | __kmp_resume_64(gtid, RCAST(kmp_flag_64<> *, flag)reinterpret_cast<kmp_flag_64<> *>(flag)); | ||||
| 1034 | break; | ||||
| 1035 | case atomic_flag64: | ||||
| 1036 | __kmp_atomic_resume_64(gtid, RCAST(kmp_atomic_flag_64<> *, flag)reinterpret_cast<kmp_atomic_flag_64<> *>(flag)); | ||||
| 1037 | break; | ||||
| 1038 | case flag_oncore: | ||||
| 1039 | __kmp_resume_oncore(gtid, RCAST(kmp_flag_oncore *, flag)reinterpret_cast<kmp_flag_oncore *>(flag)); | ||||
| 1040 | break; | ||||
| 1041 | #ifdef KMP_DEBUG1 | ||||
| 1042 | case flag_unset: | ||||
| 1043 | KF_TRACE(100, ("__kmp_null_resume_wrapper: flag type %d is unset\n", type))if (kmp_f_debug >= 100) { __kmp_debug_printf ("__kmp_null_resume_wrapper: flag type %d is unset\n" , type); }; | ||||
| 1044 | break; | ||||
| 1045 | default: | ||||
| 1046 | KF_TRACE(100, ("__kmp_null_resume_wrapper: flag type %d does not match any "if (kmp_f_debug >= 100) { __kmp_debug_printf ("__kmp_null_resume_wrapper: flag type %d does not match any " "known flag type\n", type); } | ||||
| 1047 | "known flag type\n",if (kmp_f_debug >= 100) { __kmp_debug_printf ("__kmp_null_resume_wrapper: flag type %d does not match any " "known flag type\n", type); } | ||||
| 1048 | type))if (kmp_f_debug >= 100) { __kmp_debug_printf ("__kmp_null_resume_wrapper: flag type %d does not match any " "known flag type\n", type); }; | ||||
| 1049 | #endif | ||||
| 1050 | } | ||||
| 1051 | } | ||||
| 1052 | |||||
| 1053 | /*! | ||||
| 1054 | @} | ||||
| 1055 | */ | ||||
| 1056 | |||||
| 1057 | #endif // KMP_WAIT_RELEASE_H |