| File: | build/source/openmp/runtime/src/kmp_barrier.cpp |
| Warning: | line 2218, column 7 Called function pointer is null (null dereference) |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | /* | |||
| 2 | * kmp_barrier.cpp | |||
| 3 | */ | |||
| 4 | ||||
| 5 | //===----------------------------------------------------------------------===// | |||
| 6 | // | |||
| 7 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
| 8 | // See https://llvm.org/LICENSE.txt for license information. | |||
| 9 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
| 10 | // | |||
| 11 | //===----------------------------------------------------------------------===// | |||
| 12 | ||||
| 13 | #include "kmp_wait_release.h" | |||
| 14 | #include "kmp_barrier.h" | |||
| 15 | #include "kmp_itt.h" | |||
| 16 | #include "kmp_os.h" | |||
| 17 | #include "kmp_stats.h" | |||
| 18 | #include "ompt-specific.h" | |||
| 19 | // for distributed barrier | |||
| 20 | #include "kmp_affinity.h" | |||
| 21 | ||||
| 22 | #if KMP_MIC0 | |||
| 23 | #include <immintrin.h> | |||
| 24 | #define USE_NGO_STORES 1 | |||
| 25 | #endif // KMP_MIC | |||
| 26 | ||||
| 27 | #if KMP_MIC0 && USE_NGO_STORES | |||
| 28 | // ICV copying | |||
| 29 | #define ngo_load(src)((void)0) __m512d Vt = _mm512_load_pd((void *)(src)) | |||
| 30 | #define ngo_store_icvs(dst, src)copy_icvs((dst), (src)) _mm512_storenrngo_pd((void *)(dst), Vt) | |||
| 31 | #define ngo_store_go(dst, src)memcpy((dst), (src), 64) _mm512_storenrngo_pd((void *)(dst), Vt) | |||
| 32 | #define ngo_sync()((void)0) __asm__ volatile("lock; addl $0,0(%%rsp)" ::: "memory") | |||
| 33 | #else | |||
| 34 | #define ngo_load(src)((void)0) ((void)0) | |||
| 35 | #define ngo_store_icvs(dst, src)copy_icvs((dst), (src)) copy_icvs((dst), (src)) | |||
| 36 | #define ngo_store_go(dst, src)memcpy((dst), (src), 64) KMP_MEMCPYmemcpy((dst), (src), CACHE_LINE64) | |||
| 37 | #define ngo_sync()((void)0) ((void)0) | |||
| 38 | #endif /* KMP_MIC && USE_NGO_STORES */ | |||
| 39 | ||||
| 40 | void __kmp_print_structure(void); // Forward declaration | |||
| 41 | ||||
| 42 | // ---------------------------- Barrier Algorithms ---------------------------- | |||
| 43 | // Distributed barrier | |||
| 44 | ||||
| 45 | // Compute how many threads to have polling each cache-line. | |||
| 46 | // We want to limit the number of writes to IDEAL_GO_RESOLUTION. | |||
| 47 | void distributedBarrier::computeVarsForN(size_t n) { | |||
| 48 | int nsockets = 1; | |||
| 49 | if (__kmp_topology) { | |||
| 50 | int socket_level = __kmp_topology->get_level(KMP_HW_SOCKET); | |||
| 51 | int core_level = __kmp_topology->get_level(KMP_HW_CORE); | |||
| 52 | int ncores_per_socket = | |||
| 53 | __kmp_topology->calculate_ratio(core_level, socket_level); | |||
| 54 | nsockets = __kmp_topology->get_count(socket_level); | |||
| 55 | ||||
| 56 | if (nsockets <= 0) | |||
| 57 | nsockets = 1; | |||
| 58 | if (ncores_per_socket <= 0) | |||
| 59 | ncores_per_socket = 1; | |||
| 60 | ||||
| 61 | threads_per_go = ncores_per_socket >> 1; | |||
| 62 | if (!fix_threads_per_go) { | |||
| 63 | // Minimize num_gos | |||
| 64 | if (threads_per_go > 4) { | |||
| 65 | if (KMP_OPTIMIZE_FOR_REDUCTIONS0) { | |||
| 66 | threads_per_go = threads_per_go >> 1; | |||
| 67 | } | |||
| 68 | if (threads_per_go > 4 && nsockets == 1) | |||
| 69 | threads_per_go = threads_per_go >> 1; | |||
| 70 | } | |||
| 71 | } | |||
| 72 | if (threads_per_go == 0) | |||
| 73 | threads_per_go = 1; | |||
| 74 | fix_threads_per_go = true; | |||
| 75 | num_gos = n / threads_per_go; | |||
| 76 | if (n % threads_per_go) | |||
| 77 | num_gos++; | |||
| 78 | if (nsockets == 1 || num_gos == 1) | |||
| 79 | num_groups = 1; | |||
| 80 | else { | |||
| 81 | num_groups = num_gos / nsockets; | |||
| 82 | if (num_gos % nsockets) | |||
| 83 | num_groups++; | |||
| 84 | } | |||
| 85 | if (num_groups <= 0) | |||
| 86 | num_groups = 1; | |||
| 87 | gos_per_group = num_gos / num_groups; | |||
| 88 | if (num_gos % num_groups) | |||
| 89 | gos_per_group++; | |||
| 90 | threads_per_group = threads_per_go * gos_per_group; | |||
| 91 | } else { | |||
| 92 | num_gos = n / threads_per_go; | |||
| 93 | if (n % threads_per_go) | |||
| 94 | num_gos++; | |||
| 95 | if (num_gos == 1) | |||
| 96 | num_groups = 1; | |||
| 97 | else { | |||
| 98 | num_groups = num_gos / 2; | |||
| 99 | if (num_gos % 2) | |||
| 100 | num_groups++; | |||
| 101 | } | |||
| 102 | gos_per_group = num_gos / num_groups; | |||
| 103 | if (num_gos % num_groups) | |||
| 104 | gos_per_group++; | |||
| 105 | threads_per_group = threads_per_go * gos_per_group; | |||
| 106 | } | |||
| 107 | } | |||
| 108 | ||||
| 109 | void distributedBarrier::computeGo(size_t n) { | |||
| 110 | // Minimize num_gos | |||
| 111 | for (num_gos = 1;; num_gos++) | |||
| 112 | if (IDEAL_CONTENTION * num_gos >= n) | |||
| 113 | break; | |||
| 114 | threads_per_go = n / num_gos; | |||
| 115 | if (n % num_gos) | |||
| 116 | threads_per_go++; | |||
| 117 | while (num_gos > MAX_GOS) { | |||
| 118 | threads_per_go++; | |||
| 119 | num_gos = n / threads_per_go; | |||
| 120 | if (n % threads_per_go) | |||
| 121 | num_gos++; | |||
| 122 | } | |||
| 123 | computeVarsForN(n); | |||
| 124 | } | |||
| 125 | ||||
| 126 | // This function is to resize the barrier arrays when the new number of threads | |||
| 127 | // exceeds max_threads, which is the current size of all the arrays | |||
| 128 | void distributedBarrier::resize(size_t nthr) { | |||
| 129 | KMP_DEBUG_ASSERT(nthr > max_threads)if (!(nthr > max_threads)) { __kmp_debug_assert("nthr > max_threads" , "openmp/runtime/src/kmp_barrier.cpp", 129); }; | |||
| 130 | ||||
| 131 | // expand to requested size * 2 | |||
| 132 | max_threads = nthr * 2; | |||
| 133 | ||||
| 134 | // allocate arrays to new max threads | |||
| 135 | for (int i = 0; i < MAX_ITERS; ++i) { | |||
| 136 | if (flags[i]) | |||
| 137 | flags[i] = (flags_s *)KMP_INTERNAL_REALLOC(flags[i],realloc((flags[i]), (max_threads * sizeof(flags_s))) | |||
| 138 | max_threads * sizeof(flags_s))realloc((flags[i]), (max_threads * sizeof(flags_s))); | |||
| 139 | else | |||
| 140 | flags[i] = (flags_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(flags_s))malloc(max_threads * sizeof(flags_s)); | |||
| 141 | } | |||
| 142 | ||||
| 143 | if (go) | |||
| 144 | go = (go_s *)KMP_INTERNAL_REALLOC(go, max_threads * sizeof(go_s))realloc((go), (max_threads * sizeof(go_s))); | |||
| 145 | else | |||
| 146 | go = (go_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(go_s))malloc(max_threads * sizeof(go_s)); | |||
| 147 | ||||
| 148 | if (iter) | |||
| 149 | iter = (iter_s *)KMP_INTERNAL_REALLOC(iter, max_threads * sizeof(iter_s))realloc((iter), (max_threads * sizeof(iter_s))); | |||
| 150 | else | |||
| 151 | iter = (iter_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(iter_s))malloc(max_threads * sizeof(iter_s)); | |||
| 152 | ||||
| 153 | if (sleep) | |||
| 154 | sleep = | |||
| 155 | (sleep_s *)KMP_INTERNAL_REALLOC(sleep, max_threads * sizeof(sleep_s))realloc((sleep), (max_threads * sizeof(sleep_s))); | |||
| 156 | else | |||
| 157 | sleep = (sleep_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(sleep_s))malloc(max_threads * sizeof(sleep_s)); | |||
| 158 | } | |||
| 159 | ||||
| 160 | // This function is to set all the go flags that threads might be waiting | |||
| 161 | // on, and when blocktime is not infinite, it should be followed by a wake-up | |||
| 162 | // call to each thread | |||
| 163 | kmp_uint64 distributedBarrier::go_release() { | |||
| 164 | kmp_uint64 next_go = iter[0].iter + distributedBarrier::MAX_ITERS; | |||
| 165 | for (size_t j = 0; j < num_gos; j++) { | |||
| 166 | go[j].go.store(next_go); | |||
| 167 | } | |||
| 168 | return next_go; | |||
| 169 | } | |||
| 170 | ||||
| 171 | void distributedBarrier::go_reset() { | |||
| 172 | for (size_t j = 0; j < max_threads; ++j) { | |||
| 173 | for (size_t i = 0; i < distributedBarrier::MAX_ITERS; ++i) { | |||
| 174 | flags[i][j].stillNeed = 1; | |||
| 175 | } | |||
| 176 | go[j].go.store(0); | |||
| 177 | iter[j].iter = 0; | |||
| 178 | } | |||
| 179 | } | |||
| 180 | ||||
| 181 | // This function inits/re-inits the distributed barrier for a particular number | |||
| 182 | // of threads. If a resize of arrays is needed, it calls the resize function. | |||
| 183 | void distributedBarrier::init(size_t nthr) { | |||
| 184 | size_t old_max = max_threads; | |||
| 185 | if (nthr > max_threads) { // need more space in arrays | |||
| 186 | resize(nthr); | |||
| 187 | } | |||
| 188 | ||||
| 189 | for (size_t i = 0; i < max_threads; i++) { | |||
| 190 | for (size_t j = 0; j < distributedBarrier::MAX_ITERS; j++) { | |||
| 191 | flags[j][i].stillNeed = 1; | |||
| 192 | } | |||
| 193 | go[i].go.store(0); | |||
| 194 | iter[i].iter = 0; | |||
| 195 | if (i >= old_max) | |||
| 196 | sleep[i].sleep = false; | |||
| 197 | } | |||
| 198 | ||||
| 199 | // Recalculate num_gos, etc. based on new nthr | |||
| 200 | computeVarsForN(nthr); | |||
| 201 | ||||
| 202 | num_threads = nthr; | |||
| 203 | ||||
| 204 | if (team_icvs == NULL__null) | |||
| 205 | team_icvs = __kmp_allocate(sizeof(kmp_internal_control_t))___kmp_allocate((sizeof(kmp_internal_control_t)), "openmp/runtime/src/kmp_barrier.cpp" , 205); | |||
| 206 | } | |||
| 207 | ||||
| 208 | // This function is used only when KMP_BLOCKTIME is not infinite. | |||
| 209 | // static | |||
| 210 | void __kmp_dist_barrier_wakeup(enum barrier_type bt, kmp_team_t *team, | |||
| 211 | size_t start, size_t stop, size_t inc, | |||
| 212 | size_t tid) { | |||
| 213 | KMP_DEBUG_ASSERT(__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME)if (!(__kmp_dflt_blocktime != (2147483647))) { __kmp_debug_assert ("__kmp_dflt_blocktime != (2147483647)", "openmp/runtime/src/kmp_barrier.cpp" , 213); }; | |||
| 214 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) | |||
| 215 | return; | |||
| 216 | ||||
| 217 | kmp_info_t **other_threads = team->t.t_threads; | |||
| 218 | for (size_t thr = start; thr < stop; thr += inc) { | |||
| 219 | KMP_DEBUG_ASSERT(other_threads[thr])if (!(other_threads[thr])) { __kmp_debug_assert("other_threads[thr]" , "openmp/runtime/src/kmp_barrier.cpp", 219); }; | |||
| 220 | int gtid = other_threads[thr]->th.th_info.ds.ds_gtid; | |||
| 221 | // Wake up worker regardless of if it appears to be sleeping or not | |||
| 222 | __kmp_atomic_resume_64(gtid, (kmp_atomic_flag_64<> *)NULL__null); | |||
| 223 | } | |||
| 224 | } | |||
| 225 | ||||
| 226 | static void __kmp_dist_barrier_gather( | |||
| 227 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, | |||
| 228 | void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) { | |||
| 229 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_dist_gather)((void)0); | |||
| 230 | kmp_team_t *team; | |||
| 231 | distributedBarrier *b; | |||
| 232 | kmp_info_t **other_threads; | |||
| 233 | kmp_uint64 my_current_iter, my_next_iter; | |||
| 234 | kmp_uint32 nproc; | |||
| 235 | bool group_leader; | |||
| 236 | ||||
| 237 | team = this_thr->th.th_team; | |||
| 238 | nproc = this_thr->th.th_team_nproc; | |||
| 239 | other_threads = team->t.t_threads; | |||
| 240 | b = team->t.b; | |||
| 241 | my_current_iter = b->iter[tid].iter; | |||
| 242 | my_next_iter = (my_current_iter + 1) % distributedBarrier::MAX_ITERS; | |||
| 243 | group_leader = ((tid % b->threads_per_group) == 0); | |||
| 244 | ||||
| 245 | KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_gather: T#%d(%d:%d) enter; barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | |||
| 246 | ("__kmp_dist_barrier_gather: T#%d(%d:%d) enter; barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_gather: T#%d(%d:%d) enter; barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | |||
| 247 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_gather: T#%d(%d:%d) enter; barrier type %d\n" , gtid, team->t.t_id, tid, bt); }; | |||
| 248 | ||||
| 249 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | |||
| 250 | // Barrier imbalance - save arrive time to the thread | |||
| 251 | if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { | |||
| 252 | this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = | |||
| 253 | __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0(); | |||
| 254 | } | |||
| 255 | #endif | |||
| 256 | ||||
| 257 | if (group_leader) { | |||
| 258 | // Start from the thread after the group leader | |||
| 259 | size_t group_start = tid + 1; | |||
| 260 | size_t group_end = tid + b->threads_per_group; | |||
| 261 | size_t threads_pending = 0; | |||
| 262 | ||||
| 263 | if (group_end > nproc) | |||
| 264 | group_end = nproc; | |||
| 265 | do { // wait for threads in my group | |||
| 266 | threads_pending = 0; | |||
| 267 | // Check all the flags every time to avoid branch misspredict | |||
| 268 | for (size_t thr = group_start; thr < group_end; thr++) { | |||
| 269 | // Each thread uses a different cache line | |||
| 270 | threads_pending += b->flags[my_current_iter][thr].stillNeed; | |||
| 271 | } | |||
| 272 | // Execute tasks here | |||
| 273 | if (__kmp_tasking_mode != tskm_immediate_exec) { | |||
| 274 | kmp_task_team_t *task_team = this_thr->th.th_task_team; | |||
| 275 | if (task_team != NULL__null) { | |||
| 276 | if (TCR_SYNC_4(task_team->tt.tt_active)(task_team->tt.tt_active)) { | |||
| 277 | if (KMP_TASKING_ENABLED(task_team)((!0) == ((task_team)->tt.tt_found_tasks))) { | |||
| 278 | int tasks_completed = FALSE0; | |||
| 279 | __kmp_atomic_execute_tasks_64( | |||
| 280 | this_thr, gtid, (kmp_atomic_flag_64<> *)NULL__null, FALSE0, | |||
| 281 | &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj, 0); | |||
| 282 | } else | |||
| 283 | this_thr->th.th_reap_state = KMP_SAFE_TO_REAP1; | |||
| 284 | } | |||
| 285 | } else { | |||
| 286 | this_thr->th.th_reap_state = KMP_SAFE_TO_REAP1; | |||
| 287 | } // if | |||
| 288 | } | |||
| 289 | if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) { | |||
| 290 | if (__kmp_global.g.g_abort) | |||
| 291 | __kmp_abort_thread(); | |||
| 292 | break; | |||
| 293 | } else if (__kmp_tasking_mode != tskm_immediate_exec && | |||
| 294 | this_thr->th.th_reap_state == KMP_SAFE_TO_REAP1) { | |||
| 295 | this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP0; | |||
| 296 | } | |||
| 297 | } while (threads_pending > 0); | |||
| 298 | ||||
| 299 | if (reduce) { // Perform reduction if needed | |||
| 300 | OMPT_REDUCTION_DECL(this_thr, gtid)ompt_data_t *my_task_data = (&(this_thr->th.th_current_task ->ompt_task_info.task_data)); ompt_data_t *my_parallel_data = (&(this_thr->th.th_team->t.ompt_team_info.parallel_data )); void *return_address = __ompt_load_return_address(gtid);; | |||
| 301 | OMPT_REDUCTION_BEGINif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction ) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction , ompt_scope_begin, my_parallel_data, my_task_data, return_address ); }; | |||
| 302 | // Group leader reduces all threads in group | |||
| 303 | for (size_t thr = group_start; thr < group_end; thr++) { | |||
| 304 | (*reduce)(this_thr->th.th_local.reduce_data, | |||
| 305 | other_threads[thr]->th.th_local.reduce_data); | |||
| 306 | } | |||
| 307 | OMPT_REDUCTION_ENDif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction ) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction , ompt_scope_end, my_parallel_data, my_task_data, return_address ); }; | |||
| 308 | } | |||
| 309 | ||||
| 310 | // Set flag for next iteration | |||
| 311 | b->flags[my_next_iter][tid].stillNeed = 1; | |||
| 312 | // Each thread uses a different cache line; resets stillNeed to 0 to | |||
| 313 | // indicate it has reached the barrier | |||
| 314 | b->flags[my_current_iter][tid].stillNeed = 0; | |||
| 315 | ||||
| 316 | do { // wait for all group leaders | |||
| 317 | threads_pending = 0; | |||
| 318 | for (size_t thr = 0; thr < nproc; thr += b->threads_per_group) { | |||
| 319 | threads_pending += b->flags[my_current_iter][thr].stillNeed; | |||
| 320 | } | |||
| 321 | // Execute tasks here | |||
| 322 | if (__kmp_tasking_mode != tskm_immediate_exec) { | |||
| 323 | kmp_task_team_t *task_team = this_thr->th.th_task_team; | |||
| 324 | if (task_team != NULL__null) { | |||
| 325 | if (TCR_SYNC_4(task_team->tt.tt_active)(task_team->tt.tt_active)) { | |||
| 326 | if (KMP_TASKING_ENABLED(task_team)((!0) == ((task_team)->tt.tt_found_tasks))) { | |||
| 327 | int tasks_completed = FALSE0; | |||
| 328 | __kmp_atomic_execute_tasks_64( | |||
| 329 | this_thr, gtid, (kmp_atomic_flag_64<> *)NULL__null, FALSE0, | |||
| 330 | &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj, 0); | |||
| 331 | } else | |||
| 332 | this_thr->th.th_reap_state = KMP_SAFE_TO_REAP1; | |||
| 333 | } | |||
| 334 | } else { | |||
| 335 | this_thr->th.th_reap_state = KMP_SAFE_TO_REAP1; | |||
| 336 | } // if | |||
| 337 | } | |||
| 338 | if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) { | |||
| 339 | if (__kmp_global.g.g_abort) | |||
| 340 | __kmp_abort_thread(); | |||
| 341 | break; | |||
| 342 | } else if (__kmp_tasking_mode != tskm_immediate_exec && | |||
| 343 | this_thr->th.th_reap_state == KMP_SAFE_TO_REAP1) { | |||
| 344 | this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP0; | |||
| 345 | } | |||
| 346 | } while (threads_pending > 0); | |||
| 347 | ||||
| 348 | if (reduce) { // Perform reduction if needed | |||
| 349 | if (KMP_MASTER_TID(tid)(0 == (tid))) { // Master reduces over group leaders | |||
| 350 | OMPT_REDUCTION_DECL(this_thr, gtid)ompt_data_t *my_task_data = (&(this_thr->th.th_current_task ->ompt_task_info.task_data)); ompt_data_t *my_parallel_data = (&(this_thr->th.th_team->t.ompt_team_info.parallel_data )); void *return_address = __ompt_load_return_address(gtid);; | |||
| 351 | OMPT_REDUCTION_BEGINif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction ) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction , ompt_scope_begin, my_parallel_data, my_task_data, return_address ); }; | |||
| 352 | for (size_t thr = b->threads_per_group; thr < nproc; | |||
| 353 | thr += b->threads_per_group) { | |||
| 354 | (*reduce)(this_thr->th.th_local.reduce_data, | |||
| 355 | other_threads[thr]->th.th_local.reduce_data); | |||
| 356 | } | |||
| 357 | OMPT_REDUCTION_ENDif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction ) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction , ompt_scope_end, my_parallel_data, my_task_data, return_address ); }; | |||
| 358 | } | |||
| 359 | } | |||
| 360 | } else { | |||
| 361 | // Set flag for next iteration | |||
| 362 | b->flags[my_next_iter][tid].stillNeed = 1; | |||
| 363 | // Each thread uses a different cache line; resets stillNeed to 0 to | |||
| 364 | // indicate it has reached the barrier | |||
| 365 | b->flags[my_current_iter][tid].stillNeed = 0; | |||
| 366 | } | |||
| 367 | ||||
| 368 | KMP_MFENCE()if (__builtin_expect(!!(!__kmp_cpuinfo.initialized), 0)) { __kmp_query_cpuid (&__kmp_cpuinfo); } if (__kmp_cpuinfo.flags.sse2) { __sync_synchronize (); }; | |||
| 369 | ||||
| 370 | KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | |||
| 371 | ("__kmp_dist_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | |||
| 372 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); }; | |||
| 373 | } | |||
| 374 | ||||
| 375 | static void __kmp_dist_barrier_release( | |||
| 376 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, | |||
| 377 | int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) { | |||
| 378 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_dist_release)((void)0); | |||
| 379 | kmp_team_t *team; | |||
| 380 | distributedBarrier *b; | |||
| 381 | kmp_bstate_t *thr_bar; | |||
| 382 | kmp_uint64 my_current_iter, next_go; | |||
| 383 | size_t my_go_index; | |||
| 384 | bool group_leader; | |||
| 385 | ||||
| 386 | KA_TRACE(20, ("__kmp_dist_barrier_release: T#%d(%d) enter; barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_release: T#%d(%d) enter; barrier type %d\n" , gtid, tid, bt); } | |||
| 387 | gtid, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_release: T#%d(%d) enter; barrier type %d\n" , gtid, tid, bt); }; | |||
| 388 | ||||
| 389 | thr_bar = &this_thr->th.th_bar[bt].bb; | |||
| 390 | ||||
| 391 | if (!KMP_MASTER_TID(tid)(0 == (tid))) { | |||
| 392 | // workers and non-master group leaders need to check their presence in team | |||
| 393 | do { | |||
| 394 | if (this_thr->th.th_used_in_team.load() != 1 && | |||
| 395 | this_thr->th.th_used_in_team.load() != 3) { | |||
| 396 | // Thread is not in use in a team. Wait on location in tid's thread | |||
| 397 | // struct. The 0 value tells anyone looking that this thread is spinning | |||
| 398 | // or sleeping until this location becomes 3 again; 3 is the transition | |||
| 399 | // state to get to 1 which is waiting on go and being in the team | |||
| 400 | kmp_flag_32<false, false> my_flag(&(this_thr->th.th_used_in_team), 3); | |||
| 401 | if (KMP_COMPARE_AND_STORE_ACQ32(&(this_thr->th.th_used_in_team), 2,__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(this_thr ->th.th_used_in_team)), (kmp_uint32)(2), (kmp_uint32)(0)) | |||
| 402 | 0)__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(this_thr ->th.th_used_in_team)), (kmp_uint32)(2), (kmp_uint32)(0)) || | |||
| 403 | this_thr->th.th_used_in_team.load() == 0) { | |||
| 404 | my_flag.wait(this_thr, true USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 405 | } | |||
| 406 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | |||
| 407 | if ((__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 && itt_sync_obj == NULL__null) || KMP_ITT_DEBUG0) { | |||
| 408 | // In fork barrier where we could not get the object reliably | |||
| 409 | itt_sync_obj = | |||
| 410 | __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1); | |||
| 411 | // Cancel wait on previous parallel region... | |||
| 412 | __kmp_itt_task_starting(itt_sync_obj); | |||
| 413 | ||||
| 414 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) | |||
| 415 | return; | |||
| 416 | ||||
| 417 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); | |||
| 418 | if (itt_sync_obj != NULL__null) | |||
| 419 | // Call prepare as early as possible for "new" barrier | |||
| 420 | __kmp_itt_task_finished(itt_sync_obj); | |||
| 421 | } else | |||
| 422 | #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ | |||
| 423 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) | |||
| 424 | return; | |||
| 425 | } | |||
| 426 | if (this_thr->th.th_used_in_team.load() != 1 && | |||
| 427 | this_thr->th.th_used_in_team.load() != 3) // spurious wake-up? | |||
| 428 | continue; | |||
| 429 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) | |||
| 430 | return; | |||
| 431 | ||||
| 432 | // At this point, the thread thinks it is in use in a team, or in | |||
| 433 | // transition to be used in a team, but it might have reached this barrier | |||
| 434 | // before it was marked unused by the team. Unused threads are awoken and | |||
| 435 | // shifted to wait on local thread struct elsewhere. It also might reach | |||
| 436 | // this point by being picked up for use by a different team. Either way, | |||
| 437 | // we need to update the tid. | |||
| 438 | tid = __kmp_tid_from_gtid(gtid); | |||
| 439 | team = this_thr->th.th_team; | |||
| 440 | KMP_DEBUG_ASSERT(tid >= 0)if (!(tid >= 0)) { __kmp_debug_assert("tid >= 0", "openmp/runtime/src/kmp_barrier.cpp" , 440); }; | |||
| 441 | KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_barrier.cpp" , 441); }; | |||
| 442 | b = team->t.b; | |||
| 443 | my_current_iter = b->iter[tid].iter; | |||
| 444 | next_go = my_current_iter + distributedBarrier::MAX_ITERS; | |||
| 445 | my_go_index = tid / b->threads_per_go; | |||
| 446 | if (this_thr->th.th_used_in_team.load() == 3) { | |||
| 447 | KMP_COMPARE_AND_STORE_ACQ32(&(this_thr->th.th_used_in_team), 3, 1)__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(this_thr ->th.th_used_in_team)), (kmp_uint32)(3), (kmp_uint32)(1)); | |||
| 448 | } | |||
| 449 | // Check if go flag is set | |||
| 450 | if (b->go[my_go_index].go.load() != next_go) { | |||
| 451 | // Wait on go flag on team | |||
| 452 | kmp_atomic_flag_64<false, true> my_flag( | |||
| 453 | &(b->go[my_go_index].go), next_go, &(b->sleep[tid].sleep)); | |||
| 454 | my_flag.wait(this_thr, true USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 455 | KMP_DEBUG_ASSERT(my_current_iter == b->iter[tid].iter ||if (!(my_current_iter == b->iter[tid].iter || b->iter[tid ].iter == 0)) { __kmp_debug_assert("my_current_iter == b->iter[tid].iter || b->iter[tid].iter == 0" , "openmp/runtime/src/kmp_barrier.cpp", 456); } | |||
| 456 | b->iter[tid].iter == 0)if (!(my_current_iter == b->iter[tid].iter || b->iter[tid ].iter == 0)) { __kmp_debug_assert("my_current_iter == b->iter[tid].iter || b->iter[tid].iter == 0" , "openmp/runtime/src/kmp_barrier.cpp", 456); }; | |||
| 457 | KMP_DEBUG_ASSERT(b->sleep[tid].sleep == false)if (!(b->sleep[tid].sleep == false)) { __kmp_debug_assert( "b->sleep[tid].sleep == false", "openmp/runtime/src/kmp_barrier.cpp" , 457); }; | |||
| 458 | } | |||
| 459 | ||||
| 460 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) | |||
| 461 | return; | |||
| 462 | // At this point, the thread's go location was set. This means the primary | |||
| 463 | // thread is safely in the barrier, and so this thread's data is | |||
| 464 | // up-to-date, but we should check again that this thread is really in | |||
| 465 | // use in the team, as it could have been woken up for the purpose of | |||
| 466 | // changing team size, or reaping threads at shutdown. | |||
| 467 | if (this_thr->th.th_used_in_team.load() == 1) | |||
| 468 | break; | |||
| 469 | } while (1); | |||
| 470 | ||||
| 471 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) | |||
| 472 | return; | |||
| 473 | ||||
| 474 | group_leader = ((tid % b->threads_per_group) == 0); | |||
| 475 | if (group_leader) { | |||
| 476 | // Tell all the threads in my group they can go! | |||
| 477 | for (size_t go_idx = my_go_index + 1; | |||
| 478 | go_idx < my_go_index + b->gos_per_group; go_idx++) { | |||
| 479 | b->go[go_idx].go.store(next_go); | |||
| 480 | } | |||
| 481 | // Fence added so that workers can see changes to go. sfence inadequate. | |||
| 482 | KMP_MFENCE()if (__builtin_expect(!!(!__kmp_cpuinfo.initialized), 0)) { __kmp_query_cpuid (&__kmp_cpuinfo); } if (__kmp_cpuinfo.flags.sse2) { __sync_synchronize (); }; | |||
| 483 | } | |||
| 484 | ||||
| 485 | #if KMP_BARRIER_ICV_PUSH1 | |||
| 486 | if (propagate_icvs) { // copy ICVs to final dest | |||
| 487 | __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, | |||
| 488 | tid, FALSE0); | |||
| 489 | copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, | |||
| 490 | (kmp_internal_control_t *)team->t.b->team_icvs); | |||
| 491 | copy_icvs(&thr_bar->th_fixed_icvs, | |||
| 492 | &team->t.t_implicit_task_taskdata[tid].td_icvs); | |||
| 493 | } | |||
| 494 | #endif | |||
| 495 | if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647) && group_leader) { | |||
| 496 | // This thread is now awake and participating in the barrier; | |||
| 497 | // wake up the other threads in the group | |||
| 498 | size_t nproc = this_thr->th.th_team_nproc; | |||
| 499 | size_t group_end = tid + b->threads_per_group; | |||
| 500 | if (nproc < group_end) | |||
| 501 | group_end = nproc; | |||
| 502 | __kmp_dist_barrier_wakeup(bt, team, tid + 1, group_end, 1, tid); | |||
| 503 | } | |||
| 504 | } else { // Primary thread | |||
| 505 | team = this_thr->th.th_team; | |||
| 506 | b = team->t.b; | |||
| 507 | my_current_iter = b->iter[tid].iter; | |||
| 508 | next_go = my_current_iter + distributedBarrier::MAX_ITERS; | |||
| 509 | #if KMP_BARRIER_ICV_PUSH1 | |||
| 510 | if (propagate_icvs) { | |||
| 511 | // primary thread has ICVs in final destination; copy | |||
| 512 | copy_icvs(&thr_bar->th_fixed_icvs, | |||
| 513 | &team->t.t_implicit_task_taskdata[tid].td_icvs); | |||
| 514 | } | |||
| 515 | #endif | |||
| 516 | // Tell all the group leaders they can go! | |||
| 517 | for (size_t go_idx = 0; go_idx < b->num_gos; go_idx += b->gos_per_group) { | |||
| 518 | b->go[go_idx].go.store(next_go); | |||
| 519 | } | |||
| 520 | ||||
| 521 | if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) { | |||
| 522 | // Wake-up the group leaders | |||
| 523 | size_t nproc = this_thr->th.th_team_nproc; | |||
| 524 | __kmp_dist_barrier_wakeup(bt, team, tid + b->threads_per_group, nproc, | |||
| 525 | b->threads_per_group, tid); | |||
| 526 | } | |||
| 527 | ||||
| 528 | // Tell all the threads in my group they can go! | |||
| 529 | for (size_t go_idx = 1; go_idx < b->gos_per_group; go_idx++) { | |||
| 530 | b->go[go_idx].go.store(next_go); | |||
| 531 | } | |||
| 532 | ||||
| 533 | // Fence added so that workers can see changes to go. sfence inadequate. | |||
| 534 | KMP_MFENCE()if (__builtin_expect(!!(!__kmp_cpuinfo.initialized), 0)) { __kmp_query_cpuid (&__kmp_cpuinfo); } if (__kmp_cpuinfo.flags.sse2) { __sync_synchronize (); }; | |||
| 535 | ||||
| 536 | if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) { | |||
| 537 | // Wake-up the other threads in my group | |||
| 538 | size_t nproc = this_thr->th.th_team_nproc; | |||
| 539 | size_t group_end = tid + b->threads_per_group; | |||
| 540 | if (nproc < group_end) | |||
| 541 | group_end = nproc; | |||
| 542 | __kmp_dist_barrier_wakeup(bt, team, tid + 1, group_end, 1, tid); | |||
| 543 | } | |||
| 544 | } | |||
| 545 | // Update to next iteration | |||
| 546 | KMP_ASSERT(my_current_iter == b->iter[tid].iter)if (!(my_current_iter == b->iter[tid].iter)) { __kmp_debug_assert ("my_current_iter == b->iter[tid].iter", "openmp/runtime/src/kmp_barrier.cpp" , 546); }; | |||
| 547 | b->iter[tid].iter = (b->iter[tid].iter + 1) % distributedBarrier::MAX_ITERS; | |||
| 548 | ||||
| 549 | KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_release: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | |||
| 550 | 20, ("__kmp_dist_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_release: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | |||
| 551 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_release: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); }; | |||
| 552 | } | |||
| 553 | ||||
| 554 | // Linear Barrier | |||
| 555 | template <bool cancellable = false> | |||
| 556 | static bool __kmp_linear_barrier_gather_template( | |||
| 557 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, | |||
| 558 | void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) { | |||
| 559 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_gather)((void)0); | |||
| 560 | kmp_team_t *team = this_thr->th.th_team; | |||
| 561 | kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; | |||
| 562 | kmp_info_t **other_threads = team->t.t_threads; | |||
| 563 | ||||
| 564 | KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | |||
| 565 | 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | |||
| 566 | ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | |||
| 567 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n" , gtid, team->t.t_id, tid, bt); }; | |||
| 568 | KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid])if (!(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid ])) { __kmp_debug_assert("this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]" , "openmp/runtime/src/kmp_barrier.cpp", 568); }; | |||
| 569 | ||||
| 570 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | |||
| 571 | // Barrier imbalance - save arrive time to the thread | |||
| 572 | if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { | |||
| 573 | this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = | |||
| 574 | __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0(); | |||
| 575 | } | |||
| 576 | #endif | |||
| 577 | // We now perform a linear reduction to signal that all of the threads have | |||
| 578 | // arrived. | |||
| 579 | if (!KMP_MASTER_TID(tid)(0 == (tid))) { | |||
| 580 | KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)" "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar ->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 << 2)); } | |||
| 581 | ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)" "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar ->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 << 2)); } | |||
| 582 | "arrived(%p): %llu => %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)" "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar ->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 << 2)); } | |||
| 583 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(0, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)" "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar ->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 << 2)); } | |||
| 584 | team->t.t_id, 0, &thr_bar->b_arrived, thr_bar->b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)" "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar ->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 << 2)); } | |||
| 585 | thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)" "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar ->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 << 2)); }; | |||
| 586 | // Mark arrival to primary thread | |||
| 587 | /* After performing this write, a worker thread may not assume that the team | |||
| 588 | is valid any more - it could be deallocated by the primary thread at any | |||
| 589 | time. */ | |||
| 590 | kmp_flag_64<> flag(&thr_bar->b_arrived, other_threads[0]); | |||
| 591 | flag.release(); | |||
| 592 | } else { | |||
| 593 | kmp_balign_team_t *team_bar = &team->t.t_bar[bt]; | |||
| 594 | int nproc = this_thr->th.th_team_nproc; | |||
| 595 | int i; | |||
| 596 | // Don't have to worry about sleep bit here or atomic since team setting | |||
| 597 | kmp_uint64 new_state = team_bar->b_arrived + KMP_BARRIER_STATE_BUMP(1 << 2); | |||
| 598 | ||||
| 599 | // Collect all the worker team member threads. | |||
| 600 | for (i = 1; i < nproc; ++i) { | |||
| 601 | #if KMP_CACHE_MANAGE | |||
| 602 | // Prefetch next thread's arrived count | |||
| 603 | if (i + 1 < nproc) | |||
| 604 | KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_arrived); | |||
| 605 | #endif /* KMP_CACHE_MANAGE */ | |||
| 606 | KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (i, team), team->t.t_id, i, &other_threads[i]->th.th_bar [bt].bb.b_arrived, new_state); } | |||
| 607 | "arrived(%p) == %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (i, team), team->t.t_id, i, &other_threads[i]->th.th_bar [bt].bb.b_arrived, new_state); } | |||
| 608 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (i, team), team->t.t_id, i, &other_threads[i]->th.th_bar [bt].bb.b_arrived, new_state); } | |||
| 609 | team->t.t_id, i,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (i, team), team->t.t_id, i, &other_threads[i]->th.th_bar [bt].bb.b_arrived, new_state); } | |||
| 610 | &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (i, team), team->t.t_id, i, &other_threads[i]->th.th_bar [bt].bb.b_arrived, new_state); }; | |||
| 611 | ||||
| 612 | // Wait for worker thread to arrive | |||
| 613 | if (cancellable) { | |||
| 614 | kmp_flag_64<true, false> flag( | |||
| 615 | &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state); | |||
| 616 | if (flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj)) | |||
| 617 | return true; | |||
| 618 | } else { | |||
| 619 | kmp_flag_64<> flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived, | |||
| 620 | new_state); | |||
| 621 | flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 622 | } | |||
| 623 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | |||
| 624 | // Barrier imbalance - write min of the thread time and the other thread | |||
| 625 | // time to the thread. | |||
| 626 | if (__kmp_forkjoin_frames_mode == 2) { | |||
| 627 | this_thr->th.th_bar_min_time = KMP_MIN(((this_thr->th.th_bar_min_time) < (other_threads[i]-> th.th_bar_min_time) ? (this_thr->th.th_bar_min_time) : (other_threads [i]->th.th_bar_min_time)) | |||
| 628 | this_thr->th.th_bar_min_time, other_threads[i]->th.th_bar_min_time)((this_thr->th.th_bar_min_time) < (other_threads[i]-> th.th_bar_min_time) ? (this_thr->th.th_bar_min_time) : (other_threads [i]->th.th_bar_min_time)); | |||
| 629 | } | |||
| 630 | #endif | |||
| 631 | if (reduce) { | |||
| 632 | KA_TRACE(100,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n" , gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team ->t.t_id, i); } | |||
| 633 | ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n" , gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team ->t.t_id, i); } | |||
| 634 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n" , gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team ->t.t_id, i); } | |||
| 635 | team->t.t_id, i))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n" , gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team ->t.t_id, i); }; | |||
| 636 | OMPT_REDUCTION_DECL(this_thr, gtid)ompt_data_t *my_task_data = (&(this_thr->th.th_current_task ->ompt_task_info.task_data)); ompt_data_t *my_parallel_data = (&(this_thr->th.th_team->t.ompt_team_info.parallel_data )); void *return_address = __ompt_load_return_address(gtid);; | |||
| 637 | OMPT_REDUCTION_BEGINif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction ) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction , ompt_scope_begin, my_parallel_data, my_task_data, return_address ); }; | |||
| 638 | (*reduce)(this_thr->th.th_local.reduce_data, | |||
| 639 | other_threads[i]->th.th_local.reduce_data); | |||
| 640 | OMPT_REDUCTION_ENDif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction ) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction , ompt_scope_end, my_parallel_data, my_task_data, return_address ); }; | |||
| 641 | } | |||
| 642 | } | |||
| 643 | // Don't have to worry about sleep bit here or atomic since team setting | |||
| 644 | team_bar->b_arrived = new_state; | |||
| 645 | KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d " "arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team-> t.t_id, &team_bar->b_arrived, new_state); } | |||
| 646 | "arrived(%p) = %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d " "arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team-> t.t_id, &team_bar->b_arrived, new_state); } | |||
| 647 | gtid, team->t.t_id, tid, team->t.t_id, &team_bar->b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d " "arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team-> t.t_id, &team_bar->b_arrived, new_state); } | |||
| 648 | new_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d " "arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team-> t.t_id, &team_bar->b_arrived, new_state); }; | |||
| 649 | } | |||
| 650 | KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | |||
| 651 | 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | |||
| 652 | ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | |||
| 653 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); }; | |||
| 654 | return false; | |||
| 655 | } | |||
| 656 | ||||
| 657 | template <bool cancellable = false> | |||
| 658 | static bool __kmp_linear_barrier_release_template( | |||
| 659 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, | |||
| 660 | int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) { | |||
| 661 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_release)((void)0); | |||
| 662 | kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; | |||
| 663 | kmp_team_t *team; | |||
| 664 | ||||
| 665 | if (KMP_MASTER_TID(tid)(0 == (tid))) { | |||
| 666 | unsigned int i; | |||
| 667 | kmp_uint32 nproc = this_thr->th.th_team_nproc; | |||
| 668 | kmp_info_t **other_threads; | |||
| 669 | ||||
| 670 | team = __kmp_threads[gtid]->th.th_team; | |||
| 671 | KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null", "openmp/runtime/src/kmp_barrier.cpp", 671); }; | |||
| 672 | other_threads = team->t.t_threads; | |||
| 673 | ||||
| 674 | KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) primary enter for "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) primary enter for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); } | |||
| 675 | "barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) primary enter for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); } | |||
| 676 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) primary enter for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); }; | |||
| 677 | ||||
| 678 | if (nproc > 1) { | |||
| 679 | #if KMP_BARRIER_ICV_PUSH1 | |||
| 680 | { | |||
| 681 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy)((void)0); | |||
| 682 | if (propagate_icvs) { | |||
| 683 | ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs)((void)0); | |||
| 684 | for (i = 1; i < nproc; ++i) { | |||
| 685 | __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i], | |||
| 686 | team, i, FALSE0); | |||
| 687 | ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs,copy_icvs((&team->t.t_implicit_task_taskdata[i].td_icvs ), (&team->t.t_implicit_task_taskdata[0].td_icvs)) | |||
| 688 | &team->t.t_implicit_task_taskdata[0].td_icvs)copy_icvs((&team->t.t_implicit_task_taskdata[i].td_icvs ), (&team->t.t_implicit_task_taskdata[0].td_icvs)); | |||
| 689 | } | |||
| 690 | ngo_sync()((void)0); | |||
| 691 | } | |||
| 692 | } | |||
| 693 | #endif // KMP_BARRIER_ICV_PUSH | |||
| 694 | ||||
| 695 | // Now, release all of the worker threads | |||
| 696 | for (i = 1; i < nproc; ++i) { | |||
| 697 | #if KMP_CACHE_MANAGE | |||
| 698 | // Prefetch next thread's go flag | |||
| 699 | if (i + 1 < nproc) | |||
| 700 | KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_go); | |||
| 701 | #endif /* KMP_CACHE_MANAGE */ | |||
| 702 | KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) " "go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads [i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads [i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar [bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1 << 2)); } | |||
| 703 | 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) " "go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads [i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads [i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar [bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1 << 2)); } | |||
| 704 | ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) " "go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads [i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads [i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar [bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1 << 2)); } | |||
| 705 | "go(%p): %u => %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) " "go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads [i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads [i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar [bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1 << 2)); } | |||
| 706 | gtid, team->t.t_id, tid, other_threads[i]->th.th_info.ds.ds_gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) " "go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads [i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads [i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar [bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1 << 2)); } | |||
| 707 | team->t.t_id, i, &other_threads[i]->th.th_bar[bt].bb.b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) " "go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads [i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads [i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar [bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1 << 2)); } | |||
| 708 | other_threads[i]->th.th_bar[bt].bb.b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) " "go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads [i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads [i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar [bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1 << 2)); } | |||
| 709 | other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) " "go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads [i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads [i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar [bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1 << 2)); }; | |||
| 710 | kmp_flag_64<> flag(&other_threads[i]->th.th_bar[bt].bb.b_go, | |||
| 711 | other_threads[i]); | |||
| 712 | flag.release(); | |||
| 713 | } | |||
| 714 | } | |||
| 715 | } else { // Wait for the PRIMARY thread to release us | |||
| 716 | KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n" , gtid, &thr_bar->b_go, (1 << 2)); } | |||
| 717 | gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n" , gtid, &thr_bar->b_go, (1 << 2)); }; | |||
| 718 | if (cancellable) { | |||
| 719 | kmp_flag_64<true, false> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP(1 << 2)); | |||
| 720 | if (flag.wait(this_thr, TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj)) | |||
| 721 | return true; | |||
| 722 | } else { | |||
| 723 | kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP(1 << 2)); | |||
| 724 | flag.wait(this_thr, TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 725 | } | |||
| 726 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | |||
| 727 | if ((__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 && itt_sync_obj == NULL__null) || KMP_ITT_DEBUG0) { | |||
| 728 | // In a fork barrier; cannot get the object reliably (or ITTNOTIFY is | |||
| 729 | // disabled) | |||
| 730 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1); | |||
| 731 | // Cancel wait on previous parallel region... | |||
| 732 | __kmp_itt_task_starting(itt_sync_obj); | |||
| 733 | ||||
| 734 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) | |||
| 735 | return false; | |||
| 736 | ||||
| 737 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); | |||
| 738 | if (itt_sync_obj != NULL__null) | |||
| 739 | // Call prepare as early as possible for "new" barrier | |||
| 740 | __kmp_itt_task_finished(itt_sync_obj); | |||
| 741 | } else | |||
| 742 | #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ | |||
| 743 | // Early exit for reaping threads releasing forkjoin barrier | |||
| 744 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) | |||
| 745 | return false; | |||
| 746 | // The worker thread may now assume that the team is valid. | |||
| 747 | #ifdef KMP_DEBUG1 | |||
| 748 | tid = __kmp_tid_from_gtid(gtid); | |||
| 749 | team = __kmp_threads[gtid]->th.th_team; | |||
| 750 | #endif | |||
| 751 | KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null", "openmp/runtime/src/kmp_barrier.cpp", 751); }; | |||
| 752 | TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE)(thr_bar->b_go) = (0); | |||
| 753 | KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n" , gtid, team->t.t_id, tid, &thr_bar->b_go, 0); } | |||
| 754 | ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n" , gtid, team->t.t_id, tid, &thr_bar->b_go, 0); } | |||
| 755 | gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n" , gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }; | |||
| 756 | KMP_MB(); // Flush all pending memory write invalidates. | |||
| 757 | } | |||
| 758 | KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | |||
| 759 | 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | |||
| 760 | ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | |||
| 761 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); }; | |||
| 762 | return false; | |||
| 763 | } | |||
| 764 | ||||
| 765 | static void __kmp_linear_barrier_gather( | |||
| 766 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, | |||
| 767 | void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) { | |||
| 768 | __kmp_linear_barrier_gather_template<false>( | |||
| 769 | bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 770 | } | |||
| 771 | ||||
| 772 | static bool __kmp_linear_barrier_gather_cancellable( | |||
| 773 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, | |||
| 774 | void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) { | |||
| 775 | return __kmp_linear_barrier_gather_template<true>( | |||
| 776 | bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 777 | } | |||
| 778 | ||||
| 779 | static void __kmp_linear_barrier_release( | |||
| 780 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, | |||
| 781 | int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) { | |||
| 782 | __kmp_linear_barrier_release_template<false>( | |||
| 783 | bt, this_thr, gtid, tid, propagate_icvs USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 784 | } | |||
| 785 | ||||
| 786 | static bool __kmp_linear_barrier_release_cancellable( | |||
| 787 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, | |||
| 788 | int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) { | |||
| 789 | return __kmp_linear_barrier_release_template<true>( | |||
| 790 | bt, this_thr, gtid, tid, propagate_icvs USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 791 | } | |||
| 792 | ||||
| 793 | // Tree barrier | |||
| 794 | static void __kmp_tree_barrier_gather( | |||
| 795 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, | |||
| 796 | void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) { | |||
| 797 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_gather)((void)0); | |||
| 798 | kmp_team_t *team = this_thr->th.th_team; | |||
| 799 | kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; | |||
| 800 | kmp_info_t **other_threads = team->t.t_threads; | |||
| 801 | kmp_uint32 nproc = this_thr->th.th_team_nproc; | |||
| 802 | kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt]; | |||
| 803 | kmp_uint32 branch_factor = 1 << branch_bits; | |||
| 804 | kmp_uint32 child; | |||
| 805 | kmp_uint32 child_tid; | |||
| 806 | kmp_uint64 new_state = 0; | |||
| 807 | ||||
| 808 | KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | |||
| 809 | 20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | |||
| 810 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n" , gtid, team->t.t_id, tid, bt); }; | |||
| 811 | KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid])if (!(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid ])) { __kmp_debug_assert("this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]" , "openmp/runtime/src/kmp_barrier.cpp", 811); }; | |||
| 812 | ||||
| 813 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | |||
| 814 | // Barrier imbalance - save arrive time to the thread | |||
| 815 | if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { | |||
| 816 | this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = | |||
| 817 | __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0(); | |||
| 818 | } | |||
| 819 | #endif | |||
| 820 | // Perform tree gather to wait until all threads have arrived; reduce any | |||
| 821 | // required data as we go | |||
| 822 | child_tid = (tid << branch_bits) + 1; | |||
| 823 | if (child_tid < nproc) { | |||
| 824 | // Parent threads wait for all their children to arrive | |||
| 825 | new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP(1 << 2); | |||
| 826 | child = 1; | |||
| 827 | do { | |||
| 828 | kmp_info_t *child_thr = other_threads[child_tid]; | |||
| 829 | kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; | |||
| 830 | #if KMP_CACHE_MANAGE | |||
| 831 | // Prefetch next thread's arrived count | |||
| 832 | if (child + 1 <= branch_factor && child_tid + 1 < nproc) | |||
| 833 | KMP_CACHE_PREFETCH( | |||
| 834 | &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_arrived); | |||
| 835 | #endif /* KMP_CACHE_MANAGE */ | |||
| 836 | KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_arrived, new_state); } | |||
| 837 | ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_arrived, new_state); } | |||
| 838 | "arrived(%p) == %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_arrived, new_state); } | |||
| 839 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_arrived, new_state); } | |||
| 840 | team->t.t_id, child_tid, &child_bar->b_arrived, new_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_arrived, new_state); }; | |||
| 841 | // Wait for child to arrive | |||
| 842 | kmp_flag_64<> flag(&child_bar->b_arrived, new_state); | |||
| 843 | flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 844 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | |||
| 845 | // Barrier imbalance - write min of the thread time and a child time to | |||
| 846 | // the thread. | |||
| 847 | if (__kmp_forkjoin_frames_mode == 2) { | |||
| 848 | this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,((this_thr->th.th_bar_min_time) < (child_thr->th.th_bar_min_time ) ? (this_thr->th.th_bar_min_time) : (child_thr->th.th_bar_min_time )) | |||
| 849 | child_thr->th.th_bar_min_time)((this_thr->th.th_bar_min_time) < (child_thr->th.th_bar_min_time ) ? (this_thr->th.th_bar_min_time) : (child_thr->th.th_bar_min_time )); | |||
| 850 | } | |||
| 851 | #endif | |||
| 852 | if (reduce) { | |||
| 853 | KA_TRACE(100,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n" , gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team ), team->t.t_id, child_tid); } | |||
| 854 | ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n" , gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team ), team->t.t_id, child_tid); } | |||
| 855 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n" , gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team ), team->t.t_id, child_tid); } | |||
| 856 | team->t.t_id, child_tid))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n" , gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team ), team->t.t_id, child_tid); }; | |||
| 857 | OMPT_REDUCTION_DECL(this_thr, gtid)ompt_data_t *my_task_data = (&(this_thr->th.th_current_task ->ompt_task_info.task_data)); ompt_data_t *my_parallel_data = (&(this_thr->th.th_team->t.ompt_team_info.parallel_data )); void *return_address = __ompt_load_return_address(gtid);; | |||
| 858 | OMPT_REDUCTION_BEGINif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction ) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction , ompt_scope_begin, my_parallel_data, my_task_data, return_address ); }; | |||
| 859 | (*reduce)(this_thr->th.th_local.reduce_data, | |||
| 860 | child_thr->th.th_local.reduce_data); | |||
| 861 | OMPT_REDUCTION_ENDif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction ) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction , ompt_scope_end, my_parallel_data, my_task_data, return_address ); }; | |||
| 862 | } | |||
| 863 | child++; | |||
| 864 | child_tid++; | |||
| 865 | } while (child <= branch_factor && child_tid < nproc); | |||
| 866 | } | |||
| 867 | ||||
| 868 | if (!KMP_MASTER_TID(tid)(0 == (tid))) { // Worker threads | |||
| 869 | kmp_int32 parent_tid = (tid - 1) >> branch_bits; | |||
| 870 | ||||
| 871 | KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid , &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar-> b_arrived + (1 << 2)); } | |||
| 872 | ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid , &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar-> b_arrived + (1 << 2)); } | |||
| 873 | "arrived(%p): %llu => %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid , &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar-> b_arrived + (1 << 2)); } | |||
| 874 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid , &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar-> b_arrived + (1 << 2)); } | |||
| 875 | team->t.t_id, parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid , &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar-> b_arrived + (1 << 2)); } | |||
| 876 | thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid , &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar-> b_arrived + (1 << 2)); }; | |||
| 877 | ||||
| 878 | // Mark arrival to parent thread | |||
| 879 | /* After performing this write, a worker thread may not assume that the team | |||
| 880 | is valid any more - it could be deallocated by the primary thread at any | |||
| 881 | time. */ | |||
| 882 | kmp_flag_64<> flag(&thr_bar->b_arrived, other_threads[parent_tid]); | |||
| 883 | flag.release(); | |||
| 884 | } else { | |||
| 885 | // Need to update the team arrived pointer if we are the primary thread | |||
| 886 | if (nproc > 1) // New value was already computed above | |||
| 887 | team->t.t_bar[bt].b_arrived = new_state; | |||
| 888 | else | |||
| 889 | team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP(1 << 2); | |||
| 890 | KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d " "arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team-> t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar [bt].b_arrived); } | |||
| 891 | "arrived(%p) = %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d " "arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team-> t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar [bt].b_arrived); } | |||
| 892 | gtid, team->t.t_id, tid, team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d " "arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team-> t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar [bt].b_arrived); } | |||
| 893 | &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d " "arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team-> t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar [bt].b_arrived); }; | |||
| 894 | } | |||
| 895 | KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | |||
| 896 | ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | |||
| 897 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); }; | |||
| 898 | } | |||
| 899 | ||||
| 900 | static void __kmp_tree_barrier_release( | |||
| 901 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, | |||
| 902 | int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) { | |||
| 903 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_release)((void)0); | |||
| 904 | kmp_team_t *team; | |||
| 905 | kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; | |||
| 906 | kmp_uint32 nproc; | |||
| 907 | kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt]; | |||
| 908 | kmp_uint32 branch_factor = 1 << branch_bits; | |||
| 909 | kmp_uint32 child; | |||
| 910 | kmp_uint32 child_tid; | |||
| 911 | ||||
| 912 | // Perform a tree release for all of the threads that have been gathered | |||
| 913 | if (!KMP_MASTER_TID((0 == (tid)) | |||
| 914 | tid)(0 == (tid))) { // Handle fork barrier workers who aren't part of a team yet | |||
| 915 | KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n" , gtid, &thr_bar->b_go, (1 << 2)); } | |||
| 916 | &thr_bar->b_go, KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n" , gtid, &thr_bar->b_go, (1 << 2)); }; | |||
| 917 | // Wait for parent thread to release us | |||
| 918 | kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP(1 << 2)); | |||
| 919 | flag.wait(this_thr, TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 920 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | |||
| 921 | if ((__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 && itt_sync_obj == NULL__null) || KMP_ITT_DEBUG0) { | |||
| 922 | // In fork barrier where we could not get the object reliably (or | |||
| 923 | // ITTNOTIFY is disabled) | |||
| 924 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1); | |||
| 925 | // Cancel wait on previous parallel region... | |||
| 926 | __kmp_itt_task_starting(itt_sync_obj); | |||
| 927 | ||||
| 928 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) | |||
| 929 | return; | |||
| 930 | ||||
| 931 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); | |||
| 932 | if (itt_sync_obj != NULL__null) | |||
| 933 | // Call prepare as early as possible for "new" barrier | |||
| 934 | __kmp_itt_task_finished(itt_sync_obj); | |||
| 935 | } else | |||
| 936 | #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ | |||
| 937 | // Early exit for reaping threads releasing forkjoin barrier | |||
| 938 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) | |||
| 939 | return; | |||
| 940 | ||||
| 941 | // The worker thread may now assume that the team is valid. | |||
| 942 | team = __kmp_threads[gtid]->th.th_team; | |||
| 943 | KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null", "openmp/runtime/src/kmp_barrier.cpp", 943); }; | |||
| 944 | tid = __kmp_tid_from_gtid(gtid); | |||
| 945 | ||||
| 946 | TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE)(thr_bar->b_go) = (0); | |||
| 947 | KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n" , gtid, team->t.t_id, tid, &thr_bar->b_go, 0); } | |||
| 948 | ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n" , gtid, team->t.t_id, tid, &thr_bar->b_go, 0); } | |||
| 949 | team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n" , gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }; | |||
| 950 | KMP_MB(); // Flush all pending memory write invalidates. | |||
| 951 | } else { | |||
| 952 | team = __kmp_threads[gtid]->th.th_team; | |||
| 953 | KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null", "openmp/runtime/src/kmp_barrier.cpp", 953); }; | |||
| 954 | KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) primary enter for "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) primary enter for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); } | |||
| 955 | "barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) primary enter for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); } | |||
| 956 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) primary enter for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); }; | |||
| 957 | } | |||
| 958 | nproc = this_thr->th.th_team_nproc; | |||
| 959 | child_tid = (tid << branch_bits) + 1; | |||
| 960 | ||||
| 961 | if (child_tid < nproc) { | |||
| 962 | kmp_info_t **other_threads = team->t.t_threads; | |||
| 963 | child = 1; | |||
| 964 | // Parent threads release all their children | |||
| 965 | do { | |||
| 966 | kmp_info_t *child_thr = other_threads[child_tid]; | |||
| 967 | kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; | |||
| 968 | #if KMP_CACHE_MANAGE | |||
| 969 | // Prefetch next thread's go count | |||
| 970 | if (child + 1 <= branch_factor && child_tid + 1 < nproc) | |||
| 971 | KMP_CACHE_PREFETCH( | |||
| 972 | &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_go); | |||
| 973 | #endif /* KMP_CACHE_MANAGE */ | |||
| 974 | ||||
| 975 | #if KMP_BARRIER_ICV_PUSH1 | |||
| 976 | { | |||
| 977 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy)((void)0); | |||
| 978 | if (propagate_icvs) { | |||
| 979 | __kmp_init_implicit_task(team->t.t_ident, | |||
| 980 | team->t.t_threads[child_tid], team, | |||
| 981 | child_tid, FALSE0); | |||
| 982 | copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs, | |||
| 983 | &team->t.t_implicit_task_taskdata[0].td_icvs); | |||
| 984 | } | |||
| 985 | } | |||
| 986 | #endif // KMP_BARRIER_ICV_PUSH | |||
| 987 | KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" "go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_go, child_bar->b_go, child_bar->b_go + (1 << 2)); } | |||
| 988 | ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" "go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_go, child_bar->b_go, child_bar->b_go + (1 << 2)); } | |||
| 989 | "go(%p): %u => %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" "go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_go, child_bar->b_go, child_bar->b_go + (1 << 2)); } | |||
| 990 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" "go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_go, child_bar->b_go, child_bar->b_go + (1 << 2)); } | |||
| 991 | team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" "go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_go, child_bar->b_go, child_bar->b_go + (1 << 2)); } | |||
| 992 | child_bar->b_go + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" "go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_go, child_bar->b_go, child_bar->b_go + (1 << 2)); }; | |||
| 993 | // Release child from barrier | |||
| 994 | kmp_flag_64<> flag(&child_bar->b_go, child_thr); | |||
| 995 | flag.release(); | |||
| 996 | child++; | |||
| 997 | child_tid++; | |||
| 998 | } while (child <= branch_factor && child_tid < nproc); | |||
| 999 | } | |||
| 1000 | KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | |||
| 1001 | 20, ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | |||
| 1002 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); }; | |||
| 1003 | } | |||
| 1004 | ||||
| 1005 | // Hyper Barrier | |||
| 1006 | static void __kmp_hyper_barrier_gather( | |||
| 1007 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, | |||
| 1008 | void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) { | |||
| 1009 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_gather)((void)0); | |||
| 1010 | kmp_team_t *team = this_thr->th.th_team; | |||
| 1011 | kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; | |||
| 1012 | kmp_info_t **other_threads = team->t.t_threads; | |||
| 1013 | kmp_uint64 new_state = KMP_BARRIER_UNUSED_STATE(1 << 1); | |||
| 1014 | kmp_uint32 num_threads = this_thr->th.th_team_nproc; | |||
| 1015 | kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt]; | |||
| 1016 | kmp_uint32 branch_factor = 1 << branch_bits; | |||
| 1017 | kmp_uint32 offset; | |||
| 1018 | kmp_uint32 level; | |||
| 1019 | ||||
| 1020 | KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | |||
| 1021 | 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | |||
| 1022 | ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | |||
| 1023 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n" , gtid, team->t.t_id, tid, bt); }; | |||
| 1024 | KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid])if (!(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid ])) { __kmp_debug_assert("this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]" , "openmp/runtime/src/kmp_barrier.cpp", 1024); }; | |||
| 1025 | ||||
| 1026 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | |||
| 1027 | // Barrier imbalance - save arrive time to the thread | |||
| 1028 | if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { | |||
| 1029 | this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = | |||
| 1030 | __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0(); | |||
| 1031 | } | |||
| 1032 | #endif | |||
| 1033 | /* Perform a hypercube-embedded tree gather to wait until all of the threads | |||
| 1034 | have arrived, and reduce any required data as we go. */ | |||
| 1035 | kmp_flag_64<> p_flag(&thr_bar->b_arrived); | |||
| 1036 | for (level = 0, offset = 1; offset < num_threads; | |||
| 1037 | level += branch_bits, offset <<= branch_bits) { | |||
| 1038 | kmp_uint32 child; | |||
| 1039 | kmp_uint32 child_tid; | |||
| 1040 | ||||
| 1041 | if (((tid >> level) & (branch_factor - 1)) != 0) { | |||
| 1042 | kmp_int32 parent_tid = tid & ~((1 << (level + branch_bits)) - 1); | |||
| 1043 | ||||
| 1044 | KMP_MB(); // Synchronize parent and child threads. | |||
| 1045 | KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid , &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar-> b_arrived + (1 << 2)); } | |||
| 1046 | ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid , &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar-> b_arrived + (1 << 2)); } | |||
| 1047 | "arrived(%p): %llu => %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid , &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar-> b_arrived + (1 << 2)); } | |||
| 1048 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid , &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar-> b_arrived + (1 << 2)); } | |||
| 1049 | team->t.t_id, parent_tid, &thr_bar->b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid , &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar-> b_arrived + (1 << 2)); } | |||
| 1050 | thr_bar->b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid , &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar-> b_arrived + (1 << 2)); } | |||
| 1051 | thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid , __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid , &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar-> b_arrived + (1 << 2)); }; | |||
| 1052 | // Mark arrival to parent thread | |||
| 1053 | /* After performing this write (in the last iteration of the enclosing for | |||
| 1054 | loop), a worker thread may not assume that the team is valid any more | |||
| 1055 | - it could be deallocated by the primary thread at any time. */ | |||
| 1056 | p_flag.set_waiter(other_threads[parent_tid]); | |||
| 1057 | p_flag.release(); | |||
| 1058 | break; | |||
| 1059 | } | |||
| 1060 | ||||
| 1061 | // Parent threads wait for children to arrive | |||
| 1062 | if (new_state == KMP_BARRIER_UNUSED_STATE(1 << 1)) | |||
| 1063 | new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP(1 << 2); | |||
| 1064 | for (child = 1, child_tid = tid + (1 << level); | |||
| 1065 | child < branch_factor && child_tid < num_threads; | |||
| 1066 | child++, child_tid += (1 << level)) { | |||
| 1067 | kmp_info_t *child_thr = other_threads[child_tid]; | |||
| 1068 | kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; | |||
| 1069 | #if KMP_CACHE_MANAGE | |||
| 1070 | kmp_uint32 next_child_tid = child_tid + (1 << level); | |||
| 1071 | // Prefetch next thread's arrived count | |||
| 1072 | if (child + 1 < branch_factor && next_child_tid < num_threads) | |||
| 1073 | KMP_CACHE_PREFETCH( | |||
| 1074 | &other_threads[next_child_tid]->th.th_bar[bt].bb.b_arrived); | |||
| 1075 | #endif /* KMP_CACHE_MANAGE */ | |||
| 1076 | KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_arrived, new_state); } | |||
| 1077 | ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_arrived, new_state); } | |||
| 1078 | "arrived(%p) == %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_arrived, new_state); } | |||
| 1079 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_arrived, new_state); } | |||
| 1080 | team->t.t_id, child_tid, &child_bar->b_arrived, new_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_arrived, new_state); }; | |||
| 1081 | // Wait for child to arrive | |||
| 1082 | kmp_flag_64<> c_flag(&child_bar->b_arrived, new_state); | |||
| 1083 | c_flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 1084 | KMP_MB(); // Synchronize parent and child threads. | |||
| 1085 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | |||
| 1086 | // Barrier imbalance - write min of the thread time and a child time to | |||
| 1087 | // the thread. | |||
| 1088 | if (__kmp_forkjoin_frames_mode == 2) { | |||
| 1089 | this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,((this_thr->th.th_bar_min_time) < (child_thr->th.th_bar_min_time ) ? (this_thr->th.th_bar_min_time) : (child_thr->th.th_bar_min_time )) | |||
| 1090 | child_thr->th.th_bar_min_time)((this_thr->th.th_bar_min_time) < (child_thr->th.th_bar_min_time ) ? (this_thr->th.th_bar_min_time) : (child_thr->th.th_bar_min_time )); | |||
| 1091 | } | |||
| 1092 | #endif | |||
| 1093 | if (reduce) { | |||
| 1094 | KA_TRACE(100,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n" , gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team ), team->t.t_id, child_tid); } | |||
| 1095 | ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n" , gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team ), team->t.t_id, child_tid); } | |||
| 1096 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n" , gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team ), team->t.t_id, child_tid); } | |||
| 1097 | team->t.t_id, child_tid))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n" , gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team ), team->t.t_id, child_tid); }; | |||
| 1098 | OMPT_REDUCTION_DECL(this_thr, gtid)ompt_data_t *my_task_data = (&(this_thr->th.th_current_task ->ompt_task_info.task_data)); ompt_data_t *my_parallel_data = (&(this_thr->th.th_team->t.ompt_team_info.parallel_data )); void *return_address = __ompt_load_return_address(gtid);; | |||
| 1099 | OMPT_REDUCTION_BEGINif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction ) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction , ompt_scope_begin, my_parallel_data, my_task_data, return_address ); }; | |||
| 1100 | (*reduce)(this_thr->th.th_local.reduce_data, | |||
| 1101 | child_thr->th.th_local.reduce_data); | |||
| 1102 | OMPT_REDUCTION_ENDif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction ) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction , ompt_scope_end, my_parallel_data, my_task_data, return_address ); }; | |||
| 1103 | } | |||
| 1104 | } | |||
| 1105 | } | |||
| 1106 | ||||
| 1107 | if (KMP_MASTER_TID(tid)(0 == (tid))) { | |||
| 1108 | // Need to update the team arrived pointer if we are the primary thread | |||
| 1109 | if (new_state == KMP_BARRIER_UNUSED_STATE(1 << 1)) | |||
| 1110 | team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP(1 << 2); | |||
| 1111 | else | |||
| 1112 | team->t.t_bar[bt].b_arrived = new_state; | |||
| 1113 | KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d " "arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team-> t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar [bt].b_arrived); } | |||
| 1114 | "arrived(%p) = %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d " "arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team-> t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar [bt].b_arrived); } | |||
| 1115 | gtid, team->t.t_id, tid, team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d " "arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team-> t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar [bt].b_arrived); } | |||
| 1116 | &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d " "arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team-> t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar [bt].b_arrived); }; | |||
| 1117 | } | |||
| 1118 | KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | |||
| 1119 | 20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | |||
| 1120 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); }; | |||
| 1121 | } | |||
| 1122 | ||||
| 1123 | // The reverse versions seem to beat the forward versions overall | |||
| 1124 | #define KMP_REVERSE_HYPER_BAR | |||
| 1125 | static void __kmp_hyper_barrier_release( | |||
| 1126 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, | |||
| 1127 | int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) { | |||
| 1128 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_release)((void)0); | |||
| 1129 | kmp_team_t *team; | |||
| 1130 | kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; | |||
| 1131 | kmp_info_t **other_threads; | |||
| 1132 | kmp_uint32 num_threads; | |||
| 1133 | kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt]; | |||
| 1134 | kmp_uint32 branch_factor = 1 << branch_bits; | |||
| 1135 | kmp_uint32 child; | |||
| 1136 | kmp_uint32 child_tid; | |||
| 1137 | kmp_uint32 offset; | |||
| 1138 | kmp_uint32 level; | |||
| 1139 | ||||
| 1140 | /* Perform a hypercube-embedded tree release for all of the threads that have | |||
| 1141 | been gathered. If KMP_REVERSE_HYPER_BAR is defined (default) the threads | |||
| 1142 | are released in the reverse order of the corresponding gather, otherwise | |||
| 1143 | threads are released in the same order. */ | |||
| 1144 | if (KMP_MASTER_TID(tid)(0 == (tid))) { // primary thread | |||
| 1145 | team = __kmp_threads[gtid]->th.th_team; | |||
| 1146 | KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null", "openmp/runtime/src/kmp_barrier.cpp", 1146); }; | |||
| 1147 | KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) primary enter for "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) primary enter for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); } | |||
| 1148 | "barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) primary enter for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); } | |||
| 1149 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) primary enter for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); }; | |||
| 1150 | #if KMP_BARRIER_ICV_PUSH1 | |||
| 1151 | if (propagate_icvs) { // primary already has ICVs in final destination; copy | |||
| 1152 | copy_icvs(&thr_bar->th_fixed_icvs, | |||
| 1153 | &team->t.t_implicit_task_taskdata[tid].td_icvs); | |||
| 1154 | } | |||
| 1155 | #endif | |||
| 1156 | } else { // Handle fork barrier workers who aren't part of a team yet | |||
| 1157 | KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n" , gtid, &thr_bar->b_go, (1 << 2)); } | |||
| 1158 | &thr_bar->b_go, KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n" , gtid, &thr_bar->b_go, (1 << 2)); }; | |||
| 1159 | // Wait for parent thread to release us | |||
| 1160 | kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP(1 << 2)); | |||
| 1161 | flag.wait(this_thr, TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 1162 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | |||
| 1163 | if ((__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 && itt_sync_obj == NULL__null) || KMP_ITT_DEBUG0) { | |||
| 1164 | // In fork barrier where we could not get the object reliably | |||
| 1165 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1); | |||
| 1166 | // Cancel wait on previous parallel region... | |||
| 1167 | __kmp_itt_task_starting(itt_sync_obj); | |||
| 1168 | ||||
| 1169 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) | |||
| 1170 | return; | |||
| 1171 | ||||
| 1172 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); | |||
| 1173 | if (itt_sync_obj != NULL__null) | |||
| 1174 | // Call prepare as early as possible for "new" barrier | |||
| 1175 | __kmp_itt_task_finished(itt_sync_obj); | |||
| 1176 | } else | |||
| 1177 | #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ | |||
| 1178 | // Early exit for reaping threads releasing forkjoin barrier | |||
| 1179 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) | |||
| 1180 | return; | |||
| 1181 | ||||
| 1182 | // The worker thread may now assume that the team is valid. | |||
| 1183 | team = __kmp_threads[gtid]->th.th_team; | |||
| 1184 | KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null", "openmp/runtime/src/kmp_barrier.cpp", 1184); }; | |||
| 1185 | tid = __kmp_tid_from_gtid(gtid); | |||
| 1186 | ||||
| 1187 | TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE)(thr_bar->b_go) = (0); | |||
| 1188 | KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n" , gtid, team->t.t_id, tid, &thr_bar->b_go, 0); } | |||
| 1189 | ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n" , gtid, team->t.t_id, tid, &thr_bar->b_go, 0); } | |||
| 1190 | gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n" , gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }; | |||
| 1191 | KMP_MB(); // Flush all pending memory write invalidates. | |||
| 1192 | } | |||
| 1193 | num_threads = this_thr->th.th_team_nproc; | |||
| 1194 | other_threads = team->t.t_threads; | |||
| 1195 | ||||
| 1196 | #ifdef KMP_REVERSE_HYPER_BAR | |||
| 1197 | // Count up to correct level for parent | |||
| 1198 | for (level = 0, offset = 1; | |||
| 1199 | offset < num_threads && (((tid >> level) & (branch_factor - 1)) == 0); | |||
| 1200 | level += branch_bits, offset <<= branch_bits) | |||
| 1201 | ; | |||
| 1202 | ||||
| 1203 | // Now go down from there | |||
| 1204 | for (level -= branch_bits, offset >>= branch_bits; offset != 0; | |||
| 1205 | level -= branch_bits, offset >>= branch_bits) | |||
| 1206 | #else | |||
| 1207 | // Go down the tree, level by level | |||
| 1208 | for (level = 0, offset = 1; offset < num_threads; | |||
| 1209 | level += branch_bits, offset <<= branch_bits) | |||
| 1210 | #endif // KMP_REVERSE_HYPER_BAR | |||
| 1211 | { | |||
| 1212 | #ifdef KMP_REVERSE_HYPER_BAR | |||
| 1213 | /* Now go in reverse order through the children, highest to lowest. | |||
| 1214 | Initial setting of child is conservative here. */ | |||
| 1215 | child = num_threads >> ((level == 0) ? level : level - 1); | |||
| 1216 | for (child = (child < branch_factor - 1) ? child : branch_factor - 1, | |||
| 1217 | child_tid = tid + (child << level); | |||
| 1218 | child >= 1; child--, child_tid -= (1 << level)) | |||
| 1219 | #else | |||
| 1220 | if (((tid >> level) & (branch_factor - 1)) != 0) | |||
| 1221 | // No need to go lower than this, since this is the level parent would be | |||
| 1222 | // notified | |||
| 1223 | break; | |||
| 1224 | // Iterate through children on this level of the tree | |||
| 1225 | for (child = 1, child_tid = tid + (1 << level); | |||
| 1226 | child < branch_factor && child_tid < num_threads; | |||
| 1227 | child++, child_tid += (1 << level)) | |||
| 1228 | #endif // KMP_REVERSE_HYPER_BAR | |||
| 1229 | { | |||
| 1230 | if (child_tid >= num_threads) | |||
| 1231 | continue; // Child doesn't exist so keep going | |||
| 1232 | else { | |||
| 1233 | kmp_info_t *child_thr = other_threads[child_tid]; | |||
| 1234 | kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; | |||
| 1235 | #if KMP_CACHE_MANAGE | |||
| 1236 | kmp_uint32 next_child_tid = child_tid - (1 << level); | |||
| 1237 | // Prefetch next thread's go count | |||
| 1238 | #ifdef KMP_REVERSE_HYPER_BAR | |||
| 1239 | if (child - 1 >= 1 && next_child_tid < num_threads) | |||
| 1240 | #else | |||
| 1241 | if (child + 1 < branch_factor && next_child_tid < num_threads) | |||
| 1242 | #endif // KMP_REVERSE_HYPER_BAR | |||
| 1243 | KMP_CACHE_PREFETCH( | |||
| 1244 | &other_threads[next_child_tid]->th.th_bar[bt].bb.b_go); | |||
| 1245 | #endif /* KMP_CACHE_MANAGE */ | |||
| 1246 | ||||
| 1247 | #if KMP_BARRIER_ICV_PUSH1 | |||
| 1248 | if (propagate_icvs) // push my fixed ICVs to my child | |||
| 1249 | copy_icvs(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs); | |||
| 1250 | #endif // KMP_BARRIER_ICV_PUSH | |||
| 1251 | ||||
| 1252 | KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" "go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_go, child_bar->b_go, child_bar->b_go + (1 << 2)); } | |||
| 1253 | 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" "go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_go, child_bar->b_go, child_bar->b_go + (1 << 2)); } | |||
| 1254 | ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" "go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_go, child_bar->b_go, child_bar->b_go + (1 << 2)); } | |||
| 1255 | "go(%p): %u => %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" "go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_go, child_bar->b_go, child_bar->b_go + (1 << 2)); } | |||
| 1256 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" "go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_go, child_bar->b_go, child_bar->b_go + (1 << 2)); } | |||
| 1257 | team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" "go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_go, child_bar->b_go, child_bar->b_go + (1 << 2)); } | |||
| 1258 | child_bar->b_go + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" "go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid, &child_bar ->b_go, child_bar->b_go, child_bar->b_go + (1 << 2)); }; | |||
| 1259 | // Release child from barrier | |||
| 1260 | kmp_flag_64<> flag(&child_bar->b_go, child_thr); | |||
| 1261 | flag.release(); | |||
| 1262 | } | |||
| 1263 | } | |||
| 1264 | } | |||
| 1265 | #if KMP_BARRIER_ICV_PUSH1 | |||
| 1266 | if (propagate_icvs && | |||
| 1267 | !KMP_MASTER_TID(tid)(0 == (tid))) { // copy ICVs locally to final dest | |||
| 1268 | __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, | |||
| 1269 | FALSE0); | |||
| 1270 | copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, | |||
| 1271 | &thr_bar->th_fixed_icvs); | |||
| 1272 | } | |||
| 1273 | #endif | |||
| 1274 | KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | |||
| 1275 | 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | |||
| 1276 | ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); } | |||
| 1277 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n" , gtid, team->t.t_id, tid, bt); }; | |||
| 1278 | } | |||
| 1279 | ||||
| 1280 | // Hierarchical Barrier | |||
| 1281 | ||||
| 1282 | // Initialize thread barrier data | |||
| 1283 | /* Initializes/re-initializes the hierarchical barrier data stored on a thread. | |||
| 1284 | Performs the minimum amount of initialization required based on how the team | |||
| 1285 | has changed. Returns true if leaf children will require both on-core and | |||
| 1286 | traditional wake-up mechanisms. For example, if the team size increases, | |||
| 1287 | threads already in the team will respond to on-core wakeup on their parent | |||
| 1288 | thread, but threads newly added to the team will only be listening on the | |||
| 1289 | their local b_go. */ | |||
| 1290 | static bool __kmp_init_hierarchical_barrier_thread(enum barrier_type bt, | |||
| 1291 | kmp_bstate_t *thr_bar, | |||
| 1292 | kmp_uint32 nproc, int gtid, | |||
| 1293 | int tid, kmp_team_t *team) { | |||
| 1294 | // Checks to determine if (re-)initialization is needed | |||
| 1295 | bool uninitialized = thr_bar->team == NULL__null; | |||
| 1296 | bool team_changed = team != thr_bar->team; | |||
| 1297 | bool team_sz_changed = nproc != thr_bar->nproc; | |||
| 1298 | bool tid_changed = tid != thr_bar->old_tid; | |||
| 1299 | bool retval = false; | |||
| 1300 | ||||
| 1301 | if (uninitialized || team_sz_changed) { | |||
| 1302 | __kmp_get_hierarchy(nproc, thr_bar); | |||
| 1303 | } | |||
| 1304 | ||||
| 1305 | if (uninitialized || team_sz_changed || tid_changed) { | |||
| 1306 | thr_bar->my_level = thr_bar->depth - 1; // default for primary thread | |||
| 1307 | thr_bar->parent_tid = -1; // default for primary thread | |||
| 1308 | if (!KMP_MASTER_TID(tid)(0 == (tid))) { | |||
| 1309 | // if not primary thread, find parent thread in hierarchy | |||
| 1310 | kmp_uint32 d = 0; | |||
| 1311 | while (d < thr_bar->depth) { // find parent based on level of thread in | |||
| 1312 | // hierarchy, and note level | |||
| 1313 | kmp_uint32 rem; | |||
| 1314 | if (d == thr_bar->depth - 2) { // reached level right below the primary | |||
| 1315 | thr_bar->parent_tid = 0; | |||
| 1316 | thr_bar->my_level = d; | |||
| 1317 | break; | |||
| 1318 | } else if ((rem = tid % thr_bar->skip_per_level[d + 1]) != 0) { | |||
| 1319 | // TODO: can we make the above op faster? | |||
| 1320 | // thread is not a subtree root at next level, so this is max | |||
| 1321 | thr_bar->parent_tid = tid - rem; | |||
| 1322 | thr_bar->my_level = d; | |||
| 1323 | break; | |||
| 1324 | } | |||
| 1325 | ++d; | |||
| 1326 | } | |||
| 1327 | } | |||
| 1328 | __kmp_type_convert(7 - ((tid - thr_bar->parent_tid) / | |||
| 1329 | (thr_bar->skip_per_level[thr_bar->my_level])), | |||
| 1330 | &(thr_bar->offset)); | |||
| 1331 | thr_bar->old_tid = tid; | |||
| 1332 | thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING0; | |||
| 1333 | thr_bar->team = team; | |||
| 1334 | thr_bar->parent_bar = | |||
| 1335 | &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb; | |||
| 1336 | } | |||
| 1337 | if (uninitialized || team_changed || tid_changed) { | |||
| 1338 | thr_bar->team = team; | |||
| 1339 | thr_bar->parent_bar = | |||
| 1340 | &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb; | |||
| 1341 | retval = true; | |||
| 1342 | } | |||
| 1343 | if (uninitialized || team_sz_changed || tid_changed) { | |||
| 1344 | thr_bar->nproc = nproc; | |||
| 1345 | thr_bar->leaf_kids = thr_bar->base_leaf_kids; | |||
| 1346 | if (thr_bar->my_level == 0) | |||
| 1347 | thr_bar->leaf_kids = 0; | |||
| 1348 | if (thr_bar->leaf_kids && (kmp_uint32)tid + thr_bar->leaf_kids + 1 > nproc) | |||
| 1349 | __kmp_type_convert(nproc - tid - 1, &(thr_bar->leaf_kids)); | |||
| 1350 | thr_bar->leaf_state = 0; | |||
| 1351 | for (int i = 0; i < thr_bar->leaf_kids; ++i) | |||
| 1352 | ((char *)&(thr_bar->leaf_state))[7 - i] = 1; | |||
| 1353 | } | |||
| 1354 | return retval; | |||
| 1355 | } | |||
| 1356 | ||||
| 1357 | static void __kmp_hierarchical_barrier_gather( | |||
| 1358 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, | |||
| 1359 | void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) { | |||
| 1360 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_gather)((void)0); | |||
| 1361 | kmp_team_t *team = this_thr->th.th_team; | |||
| 1362 | kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; | |||
| 1363 | kmp_uint32 nproc = this_thr->th.th_team_nproc; | |||
| 1364 | kmp_info_t **other_threads = team->t.t_threads; | |||
| 1365 | kmp_uint64 new_state = 0; | |||
| 1366 | ||||
| 1367 | int level = team->t.t_level; | |||
| 1368 | if (other_threads[0] | |||
| 1369 | ->th.th_teams_microtask) // are we inside the teams construct? | |||
| 1370 | if (this_thr->th.th_teams_size.nteams > 1) | |||
| 1371 | ++level; // level was not increased in teams construct for team_of_masters | |||
| 1372 | if (level == 1) | |||
| 1373 | thr_bar->use_oncore_barrier = 1; | |||
| 1374 | else | |||
| 1375 | thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested | |||
| 1376 | ||||
| 1377 | KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); } | |||
| 1378 | "barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); } | |||
| 1379 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); }; | |||
| 1380 | KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid])if (!(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid ])) { __kmp_debug_assert("this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]" , "openmp/runtime/src/kmp_barrier.cpp", 1380); }; | |||
| 1381 | ||||
| 1382 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | |||
| 1383 | // Barrier imbalance - save arrive time to the thread | |||
| 1384 | if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { | |||
| 1385 | this_thr->th.th_bar_arrive_time = __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0(); | |||
| 1386 | } | |||
| 1387 | #endif | |||
| 1388 | ||||
| 1389 | (void)__kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid, | |||
| 1390 | team); | |||
| 1391 | ||||
| 1392 | if (thr_bar->my_level) { // not a leaf (my_level==0 means leaf) | |||
| 1393 | kmp_int32 child_tid; | |||
| 1394 | new_state = | |||
| 1395 | (kmp_uint64)team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP(1 << 2); | |||
| 1396 | if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME(2147483647) && | |||
| 1397 | thr_bar->use_oncore_barrier) { | |||
| 1398 | if (thr_bar->leaf_kids) { | |||
| 1399 | // First, wait for leaf children to check-in on my b_arrived flag | |||
| 1400 | kmp_uint64 leaf_state = | |||
| 1401 | KMP_MASTER_TID(tid)(0 == (tid)) | |||
| 1402 | ? thr_bar->b_arrived | thr_bar->leaf_state | |||
| 1403 | : team->t.t_bar[bt].b_arrived | thr_bar->leaf_state; | |||
| 1404 | KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting " "for leaf kids\n", gtid, team->t.t_id, tid); } | |||
| 1405 | "for leaf kids\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting " "for leaf kids\n", gtid, team->t.t_id, tid); } | |||
| 1406 | gtid, team->t.t_id, tid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting " "for leaf kids\n", gtid, team->t.t_id, tid); }; | |||
| 1407 | kmp_flag_64<> flag(&thr_bar->b_arrived, leaf_state); | |||
| 1408 | flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 1409 | if (reduce) { | |||
| 1410 | OMPT_REDUCTION_DECL(this_thr, gtid)ompt_data_t *my_task_data = (&(this_thr->th.th_current_task ->ompt_task_info.task_data)); ompt_data_t *my_parallel_data = (&(this_thr->th.th_team->t.ompt_team_info.parallel_data )); void *return_address = __ompt_load_return_address(gtid);; | |||
| 1411 | OMPT_REDUCTION_BEGINif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction ) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction , ompt_scope_begin, my_parallel_data, my_task_data, return_address ); }; | |||
| 1412 | for (child_tid = tid + 1; child_tid <= tid + thr_bar->leaf_kids; | |||
| 1413 | ++child_tid) { | |||
| 1414 | KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " "T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid); } | |||
| 1415 | "T#%d(%d:%d)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " "T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid); } | |||
| 1416 | gtid, team->t.t_id, tid,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " "T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid); } | |||
| 1417 | __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " "T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid); } | |||
| 1418 | child_tid))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " "T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid); }; | |||
| 1419 | (*reduce)(this_thr->th.th_local.reduce_data, | |||
| 1420 | other_threads[child_tid]->th.th_local.reduce_data); | |||
| 1421 | } | |||
| 1422 | OMPT_REDUCTION_ENDif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction ) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction , ompt_scope_end, my_parallel_data, my_task_data, return_address ); }; | |||
| 1423 | } | |||
| 1424 | // clear leaf_state bits | |||
| 1425 | KMP_TEST_THEN_AND64(&thr_bar->b_arrived, ~(thr_bar->leaf_state))__sync_fetch_and_and((volatile kmp_uint64 *)(&thr_bar-> b_arrived), (kmp_uint64)(~(thr_bar->leaf_state))); | |||
| 1426 | } | |||
| 1427 | // Next, wait for higher level children on each child's b_arrived flag | |||
| 1428 | for (kmp_uint32 d = 1; d < thr_bar->my_level; | |||
| 1429 | ++d) { // gather lowest level threads first, but skip 0 | |||
| 1430 | kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1], | |||
| 1431 | skip = thr_bar->skip_per_level[d]; | |||
| 1432 | if (last > nproc) | |||
| 1433 | last = nproc; | |||
| 1434 | for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) { | |||
| 1435 | kmp_info_t *child_thr = other_threads[child_tid]; | |||
| 1436 | kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; | |||
| 1437 | KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " "T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id , tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, &child_bar->b_arrived, new_state); } | |||
| 1438 | "T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " "T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id , tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, &child_bar->b_arrived, new_state); } | |||
| 1439 | "arrived(%p) == %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " "T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id , tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, &child_bar->b_arrived, new_state); } | |||
| 1440 | gtid, team->t.t_id, tid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " "T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id , tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, &child_bar->b_arrived, new_state); } | |||
| 1441 | __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " "T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id , tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, &child_bar->b_arrived, new_state); } | |||
| 1442 | child_tid, &child_bar->b_arrived, new_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " "T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id , tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, &child_bar->b_arrived, new_state); }; | |||
| 1443 | kmp_flag_64<> flag(&child_bar->b_arrived, new_state); | |||
| 1444 | flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 1445 | if (reduce) { | |||
| 1446 | KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " "T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid); } | |||
| 1447 | "T#%d(%d:%d)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " "T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid); } | |||
| 1448 | gtid, team->t.t_id, tid,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " "T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid); } | |||
| 1449 | __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " "T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid); } | |||
| 1450 | child_tid))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " "T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid); }; | |||
| 1451 | (*reduce)(this_thr->th.th_local.reduce_data, | |||
| 1452 | child_thr->th.th_local.reduce_data); | |||
| 1453 | } | |||
| 1454 | } | |||
| 1455 | } | |||
| 1456 | } else { // Blocktime is not infinite | |||
| 1457 | for (kmp_uint32 d = 0; d < thr_bar->my_level; | |||
| 1458 | ++d) { // Gather lowest level threads first | |||
| 1459 | kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1], | |||
| 1460 | skip = thr_bar->skip_per_level[d]; | |||
| 1461 | if (last > nproc) | |||
| 1462 | last = nproc; | |||
| 1463 | for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) { | |||
| 1464 | kmp_info_t *child_thr = other_threads[child_tid]; | |||
| 1465 | kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; | |||
| 1466 | KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " "T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id , tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, &child_bar->b_arrived, new_state); } | |||
| 1467 | "T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " "T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id , tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, &child_bar->b_arrived, new_state); } | |||
| 1468 | "arrived(%p) == %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " "T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id , tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, &child_bar->b_arrived, new_state); } | |||
| 1469 | gtid, team->t.t_id, tid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " "T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id , tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, &child_bar->b_arrived, new_state); } | |||
| 1470 | __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " "T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id , tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, &child_bar->b_arrived, new_state); } | |||
| 1471 | child_tid, &child_bar->b_arrived, new_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " "T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id , tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, &child_bar->b_arrived, new_state); }; | |||
| 1472 | kmp_flag_64<> flag(&child_bar->b_arrived, new_state); | |||
| 1473 | flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 1474 | if (reduce) { | |||
| 1475 | KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " "T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid); } | |||
| 1476 | "T#%d(%d:%d)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " "T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid); } | |||
| 1477 | gtid, team->t.t_id, tid,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " "T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid); } | |||
| 1478 | __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " "T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid); } | |||
| 1479 | child_tid))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " "T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid (child_tid, team), team->t.t_id, child_tid); }; | |||
| 1480 | (*reduce)(this_thr->th.th_local.reduce_data, | |||
| 1481 | child_thr->th.th_local.reduce_data); | |||
| 1482 | } | |||
| 1483 | } | |||
| 1484 | } | |||
| 1485 | } | |||
| 1486 | } | |||
| 1487 | // All subordinates are gathered; now release parent if not primary thread | |||
| 1488 | ||||
| 1489 | if (!KMP_MASTER_TID(tid)(0 == (tid))) { // worker threads release parent in hierarchy | |||
| 1490 | KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing" " T#%d(%d:%d) arrived(%p): %llu => %llu\n", gtid, team-> t.t_id, tid, __kmp_gtid_from_tid(thr_bar->parent_tid, team ), team->t.t_id, thr_bar->parent_tid, &thr_bar-> b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 << 2)); } | |||
| 1491 | " T#%d(%d:%d) arrived(%p): %llu => %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing" " T#%d(%d:%d) arrived(%p): %llu => %llu\n", gtid, team-> t.t_id, tid, __kmp_gtid_from_tid(thr_bar->parent_tid, team ), team->t.t_id, thr_bar->parent_tid, &thr_bar-> b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 << 2)); } | |||
| 1492 | gtid, team->t.t_id, tid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing" " T#%d(%d:%d) arrived(%p): %llu => %llu\n", gtid, team-> t.t_id, tid, __kmp_gtid_from_tid(thr_bar->parent_tid, team ), team->t.t_id, thr_bar->parent_tid, &thr_bar-> b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 << 2)); } | |||
| 1493 | __kmp_gtid_from_tid(thr_bar->parent_tid, team), team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing" " T#%d(%d:%d) arrived(%p): %llu => %llu\n", gtid, team-> t.t_id, tid, __kmp_gtid_from_tid(thr_bar->parent_tid, team ), team->t.t_id, thr_bar->parent_tid, &thr_bar-> b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 << 2)); } | |||
| 1494 | thr_bar->parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing" " T#%d(%d:%d) arrived(%p): %llu => %llu\n", gtid, team-> t.t_id, tid, __kmp_gtid_from_tid(thr_bar->parent_tid, team ), team->t.t_id, thr_bar->parent_tid, &thr_bar-> b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 << 2)); } | |||
| 1495 | thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing" " T#%d(%d:%d) arrived(%p): %llu => %llu\n", gtid, team-> t.t_id, tid, __kmp_gtid_from_tid(thr_bar->parent_tid, team ), team->t.t_id, thr_bar->parent_tid, &thr_bar-> b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 << 2)); }; | |||
| 1496 | /* Mark arrival to parent: After performing this write, a worker thread may | |||
| 1497 | not assume that the team is valid any more - it could be deallocated by | |||
| 1498 | the primary thread at any time. */ | |||
| 1499 | if (thr_bar->my_level || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647) || | |||
| 1500 | !thr_bar->use_oncore_barrier) { // Parent is waiting on my b_arrived | |||
| 1501 | // flag; release it | |||
| 1502 | kmp_flag_64<> flag(&thr_bar->b_arrived, | |||
| 1503 | other_threads[thr_bar->parent_tid]); | |||
| 1504 | flag.release(); | |||
| 1505 | } else { | |||
| 1506 | // Leaf does special release on "offset" bits of parent's b_arrived flag | |||
| 1507 | thr_bar->b_arrived = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP(1 << 2); | |||
| 1508 | kmp_flag_oncore flag(&thr_bar->parent_bar->b_arrived, | |||
| 1509 | thr_bar->offset + 1); | |||
| 1510 | flag.set_waiter(other_threads[thr_bar->parent_tid]); | |||
| 1511 | flag.release(); | |||
| 1512 | } | |||
| 1513 | } else { // Primary thread needs to update the team's b_arrived value | |||
| 1514 | team->t.t_bar[bt].b_arrived = new_state; | |||
| 1515 | KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d " "arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team-> t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar [bt].b_arrived); } | |||
| 1516 | "arrived(%p) = %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d " "arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team-> t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar [bt].b_arrived); } | |||
| 1517 | gtid, team->t.t_id, tid, team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d " "arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team-> t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar [bt].b_arrived); } | |||
| 1518 | &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d " "arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team-> t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar [bt].b_arrived); }; | |||
| 1519 | } | |||
| 1520 | // Is the team access below unsafe or just technically invalid? | |||
| 1521 | KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); } | |||
| 1522 | "barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); } | |||
| 1523 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); }; | |||
| 1524 | } | |||
| 1525 | ||||
| 1526 | static void __kmp_hierarchical_barrier_release( | |||
| 1527 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, | |||
| 1528 | int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) { | |||
| 1529 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_release)((void)0); | |||
| 1530 | kmp_team_t *team; | |||
| 1531 | kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; | |||
| 1532 | kmp_uint32 nproc; | |||
| 1533 | bool team_change = false; // indicates on-core barrier shouldn't be used | |||
| 1534 | ||||
| 1535 | if (KMP_MASTER_TID(tid)(0 == (tid))) { | |||
| 1536 | team = __kmp_threads[gtid]->th.th_team; | |||
| 1537 | KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null", "openmp/runtime/src/kmp_barrier.cpp", 1537); }; | |||
| 1538 | KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) primary "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) primary " "entered barrier type %d\n", gtid, team->t.t_id, tid, bt) ; } | |||
| 1539 | "entered barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) primary " "entered barrier type %d\n", gtid, team->t.t_id, tid, bt) ; } | |||
| 1540 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) primary " "entered barrier type %d\n", gtid, team->t.t_id, tid, bt) ; }; | |||
| 1541 | } else { // Worker threads | |||
| 1542 | // Wait for parent thread to release me | |||
| 1543 | if (!thr_bar->use_oncore_barrier || | |||
| 1544 | __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647) || thr_bar->my_level != 0 || | |||
| 1545 | thr_bar->team == NULL__null) { | |||
| 1546 | // Use traditional method of waiting on my own b_go flag | |||
| 1547 | thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG1; | |||
| 1548 | kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP(1 << 2)); | |||
| 1549 | flag.wait(this_thr, TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 1550 | TCW_8(thr_bar->b_go,(thr_bar->b_go) = (0) | |||
| 1551 | KMP_INIT_BARRIER_STATE)(thr_bar->b_go) = (0); // Reset my b_go flag for next time | |||
| 1552 | } else { // Thread barrier data is initialized, this is a leaf, blocktime is | |||
| 1553 | // infinite, not nested | |||
| 1554 | // Wait on my "offset" bits on parent's b_go flag | |||
| 1555 | thr_bar->wait_flag = KMP_BARRIER_PARENT_FLAG2; | |||
| 1556 | kmp_flag_oncore flag(&thr_bar->parent_bar->b_go, KMP_BARRIER_STATE_BUMP(1 << 2), | |||
| 1557 | thr_bar->offset + 1, bt, | |||
| 1558 | this_thr USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 1559 | flag.wait(this_thr, TRUE(!0)); | |||
| 1560 | if (thr_bar->wait_flag == | |||
| 1561 | KMP_BARRIER_SWITCHING4) { // Thread was switched to own b_go | |||
| 1562 | TCW_8(thr_bar->b_go,(thr_bar->b_go) = (0) | |||
| 1563 | KMP_INIT_BARRIER_STATE)(thr_bar->b_go) = (0); // Reset my b_go flag for next time | |||
| 1564 | } else { // Reset my bits on parent's b_go flag | |||
| 1565 | (RCAST(volatile char *,reinterpret_cast<volatile char *>(&(thr_bar->parent_bar ->b_go)) | |||
| 1566 | &(thr_bar->parent_bar->b_go))reinterpret_cast<volatile char *>(&(thr_bar->parent_bar ->b_go)))[thr_bar->offset + 1] = 0; | |||
| 1567 | } | |||
| 1568 | } | |||
| 1569 | thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING0; | |||
| 1570 | // Early exit for reaping threads releasing forkjoin barrier | |||
| 1571 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) | |||
| 1572 | return; | |||
| 1573 | // The worker thread may now assume that the team is valid. | |||
| 1574 | team = __kmp_threads[gtid]->th.th_team; | |||
| 1575 | KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null", "openmp/runtime/src/kmp_barrier.cpp", 1575); }; | |||
| 1576 | tid = __kmp_tid_from_gtid(gtid); | |||
| 1577 | ||||
| 1578 | KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n" , gtid, team->t.t_id, tid, &thr_bar->b_go, 0); } | |||
| 1579 | 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n" , gtid, team->t.t_id, tid, &thr_bar->b_go, 0); } | |||
| 1580 | ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n" , gtid, team->t.t_id, tid, &thr_bar->b_go, 0); } | |||
| 1581 | gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n" , gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }; | |||
| 1582 | KMP_MB(); // Flush all pending memory write invalidates. | |||
| 1583 | } | |||
| 1584 | ||||
| 1585 | nproc = this_thr->th.th_team_nproc; | |||
| 1586 | int level = team->t.t_level; | |||
| 1587 | if (team->t.t_threads[0] | |||
| 1588 | ->th.th_teams_microtask) { // are we inside the teams construct? | |||
| 1589 | if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && | |||
| 1590 | this_thr->th.th_teams_level == level) | |||
| 1591 | ++level; // level was not increased in teams construct for team_of_workers | |||
| 1592 | if (this_thr->th.th_teams_size.nteams > 1) | |||
| 1593 | ++level; // level was not increased in teams construct for team_of_masters | |||
| 1594 | } | |||
| 1595 | if (level == 1) | |||
| 1596 | thr_bar->use_oncore_barrier = 1; | |||
| 1597 | else | |||
| 1598 | thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested | |||
| 1599 | ||||
| 1600 | // If the team size has increased, we still communicate with old leaves via | |||
| 1601 | // oncore barrier. | |||
| 1602 | unsigned short int old_leaf_kids = thr_bar->leaf_kids; | |||
| 1603 | kmp_uint64 old_leaf_state = thr_bar->leaf_state; | |||
| 1604 | team_change = __kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, | |||
| 1605 | tid, team); | |||
| 1606 | // But if the entire team changes, we won't use oncore barrier at all | |||
| 1607 | if (team_change) | |||
| 1608 | old_leaf_kids = 0; | |||
| 1609 | ||||
| 1610 | #if KMP_BARRIER_ICV_PUSH1 | |||
| 1611 | if (propagate_icvs) { | |||
| 1612 | __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, | |||
| 1613 | FALSE0); | |||
| 1614 | if (KMP_MASTER_TID((0 == (tid)) | |||
| 1615 | tid)(0 == (tid))) { // primary already has copy in final destination; copy | |||
| 1616 | copy_icvs(&thr_bar->th_fixed_icvs, | |||
| 1617 | &team->t.t_implicit_task_taskdata[tid].td_icvs); | |||
| 1618 | } else if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME(2147483647) && | |||
| 1619 | thr_bar->use_oncore_barrier) { // optimization for inf blocktime | |||
| 1620 | if (!thr_bar->my_level) // I'm a leaf in the hierarchy (my_level==0) | |||
| 1621 | // leaves (on-core children) pull parent's fixed ICVs directly to local | |||
| 1622 | // ICV store | |||
| 1623 | copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, | |||
| 1624 | &thr_bar->parent_bar->th_fixed_icvs); | |||
| 1625 | // non-leaves will get ICVs piggybacked with b_go via NGO store | |||
| 1626 | } else { // blocktime is not infinite; pull ICVs from parent's fixed ICVs | |||
| 1627 | if (thr_bar->my_level) // not a leaf; copy ICVs to my fixed ICVs child can | |||
| 1628 | // access | |||
| 1629 | copy_icvs(&thr_bar->th_fixed_icvs, &thr_bar->parent_bar->th_fixed_icvs); | |||
| 1630 | else // leaves copy parent's fixed ICVs directly to local ICV store | |||
| 1631 | copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, | |||
| 1632 | &thr_bar->parent_bar->th_fixed_icvs); | |||
| 1633 | } | |||
| 1634 | } | |||
| 1635 | #endif // KMP_BARRIER_ICV_PUSH | |||
| 1636 | ||||
| 1637 | // Now, release my children | |||
| 1638 | if (thr_bar->my_level) { // not a leaf | |||
| 1639 | kmp_int32 child_tid; | |||
| 1640 | kmp_uint32 last; | |||
| 1641 | if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME(2147483647) && | |||
| 1642 | thr_bar->use_oncore_barrier) { | |||
| 1643 | if (KMP_MASTER_TID(tid)(0 == (tid))) { // do a flat release | |||
| 1644 | // Set local b_go to bump children via NGO store of the cache line | |||
| 1645 | // containing IVCs and b_go. | |||
| 1646 | thr_bar->b_go = KMP_BARRIER_STATE_BUMP(1 << 2); | |||
| 1647 | // Use ngo stores if available; b_go piggybacks in the last 8 bytes of | |||
| 1648 | // the cache line | |||
| 1649 | ngo_load(&thr_bar->th_fixed_icvs)((void)0); | |||
| 1650 | // This loops over all the threads skipping only the leaf nodes in the | |||
| 1651 | // hierarchy | |||
| 1652 | for (child_tid = thr_bar->skip_per_level[1]; child_tid < (int)nproc; | |||
| 1653 | child_tid += thr_bar->skip_per_level[1]) { | |||
| 1654 | kmp_bstate_t *child_bar = | |||
| 1655 | &team->t.t_threads[child_tid]->th.th_bar[bt].bb; | |||
| 1656 | KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) " "releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team ->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team-> t.t_id, child_tid, &child_bar->b_go, child_bar->b_go , child_bar->b_go + (1 << 2)); } | |||
| 1657 | "releasing T#%d(%d:%d)"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) " "releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team ->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team-> t.t_id, child_tid, &child_bar->b_go, child_bar->b_go , child_bar->b_go + (1 << 2)); } | |||
| 1658 | " go(%p): %u => %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) " "releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team ->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team-> t.t_id, child_tid, &child_bar->b_go, child_bar->b_go , child_bar->b_go + (1 << 2)); } | |||
| 1659 | gtid, team->t.t_id, tid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) " "releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team ->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team-> t.t_id, child_tid, &child_bar->b_go, child_bar->b_go , child_bar->b_go + (1 << 2)); } | |||
| 1660 | __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) " "releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team ->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team-> t.t_id, child_tid, &child_bar->b_go, child_bar->b_go , child_bar->b_go + (1 << 2)); } | |||
| 1661 | child_tid, &child_bar->b_go, child_bar->b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) " "releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team ->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team-> t.t_id, child_tid, &child_bar->b_go, child_bar->b_go , child_bar->b_go + (1 << 2)); } | |||
| 1662 | child_bar->b_go + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) " "releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team ->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team-> t.t_id, child_tid, &child_bar->b_go, child_bar->b_go , child_bar->b_go + (1 << 2)); }; | |||
| 1663 | // Use ngo store (if available) to both store ICVs and release child | |||
| 1664 | // via child's b_go | |||
| 1665 | ngo_store_go(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs)memcpy((&child_bar->th_fixed_icvs), (&thr_bar-> th_fixed_icvs), 64); | |||
| 1666 | } | |||
| 1667 | ngo_sync()((void)0); | |||
| 1668 | } | |||
| 1669 | TCW_8(thr_bar->b_go,(thr_bar->b_go) = (0) | |||
| 1670 | KMP_INIT_BARRIER_STATE)(thr_bar->b_go) = (0); // Reset my b_go flag for next time | |||
| 1671 | // Now, release leaf children | |||
| 1672 | if (thr_bar->leaf_kids) { // if there are any | |||
| 1673 | // We test team_change on the off-chance that the level 1 team changed. | |||
| 1674 | if (team_change || | |||
| 1675 | old_leaf_kids < thr_bar->leaf_kids) { // some old, some new | |||
| 1676 | if (old_leaf_kids) { // release old leaf kids | |||
| 1677 | thr_bar->b_go |= old_leaf_state; | |||
| 1678 | } | |||
| 1679 | // Release new leaf kids | |||
| 1680 | last = tid + thr_bar->skip_per_level[1]; | |||
| 1681 | if (last > nproc) | |||
| 1682 | last = nproc; | |||
| 1683 | for (child_tid = tid + 1 + old_leaf_kids; child_tid < (int)last; | |||
| 1684 | ++child_tid) { // skip_per_level[0]=1 | |||
| 1685 | kmp_info_t *child_thr = team->t.t_threads[child_tid]; | |||
| 1686 | kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; | |||
| 1687 | KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing" " T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid , &child_bar->b_go, child_bar->b_go, child_bar-> b_go + (1 << 2)); } | |||
| 1688 | 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing" " T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid , &child_bar->b_go, child_bar->b_go, child_bar-> b_go + (1 << 2)); } | |||
| 1689 | ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing" " T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid , &child_bar->b_go, child_bar->b_go, child_bar-> b_go + (1 << 2)); } | |||
| 1690 | " T#%d(%d:%d) go(%p): %u => %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing" " T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid , &child_bar->b_go, child_bar->b_go, child_bar-> b_go + (1 << 2)); } | |||
| 1691 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing" " T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid , &child_bar->b_go, child_bar->b_go, child_bar-> b_go + (1 << 2)); } | |||
| 1692 | team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing" " T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid , &child_bar->b_go, child_bar->b_go, child_bar-> b_go + (1 << 2)); } | |||
| 1693 | child_bar->b_go + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing" " T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid , &child_bar->b_go, child_bar->b_go, child_bar-> b_go + (1 << 2)); }; | |||
| 1694 | // Release child using child's b_go flag | |||
| 1695 | kmp_flag_64<> flag(&child_bar->b_go, child_thr); | |||
| 1696 | flag.release(); | |||
| 1697 | } | |||
| 1698 | } else { // Release all children at once with leaf_state bits on my own | |||
| 1699 | // b_go flag | |||
| 1700 | thr_bar->b_go |= thr_bar->leaf_state; | |||
| 1701 | } | |||
| 1702 | } | |||
| 1703 | } else { // Blocktime is not infinite; do a simple hierarchical release | |||
| 1704 | for (int d = thr_bar->my_level - 1; d >= 0; | |||
| 1705 | --d) { // Release highest level threads first | |||
| 1706 | last = tid + thr_bar->skip_per_level[d + 1]; | |||
| 1707 | kmp_uint32 skip = thr_bar->skip_per_level[d]; | |||
| 1708 | if (last > nproc) | |||
| 1709 | last = nproc; | |||
| 1710 | for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) { | |||
| 1711 | kmp_info_t *child_thr = team->t.t_threads[child_tid]; | |||
| 1712 | kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; | |||
| 1713 | KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) " "releasing T#%d(%d:%d) go(%p): %u => %u\n", gtid, team-> t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t .t_id, child_tid, &child_bar->b_go, child_bar->b_go , child_bar->b_go + (1 << 2)); } | |||
| 1714 | "releasing T#%d(%d:%d) go(%p): %u => %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) " "releasing T#%d(%d:%d) go(%p): %u => %u\n", gtid, team-> t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t .t_id, child_tid, &child_bar->b_go, child_bar->b_go , child_bar->b_go + (1 << 2)); } | |||
| 1715 | gtid, team->t.t_id, tid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) " "releasing T#%d(%d:%d) go(%p): %u => %u\n", gtid, team-> t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t .t_id, child_tid, &child_bar->b_go, child_bar->b_go , child_bar->b_go + (1 << 2)); } | |||
| 1716 | __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) " "releasing T#%d(%d:%d) go(%p): %u => %u\n", gtid, team-> t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t .t_id, child_tid, &child_bar->b_go, child_bar->b_go , child_bar->b_go + (1 << 2)); } | |||
| 1717 | child_tid, &child_bar->b_go, child_bar->b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) " "releasing T#%d(%d:%d) go(%p): %u => %u\n", gtid, team-> t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t .t_id, child_tid, &child_bar->b_go, child_bar->b_go , child_bar->b_go + (1 << 2)); } | |||
| 1718 | child_bar->b_go + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) " "releasing T#%d(%d:%d) go(%p): %u => %u\n", gtid, team-> t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t .t_id, child_tid, &child_bar->b_go, child_bar->b_go , child_bar->b_go + (1 << 2)); }; | |||
| 1719 | // Release child using child's b_go flag | |||
| 1720 | kmp_flag_64<> flag(&child_bar->b_go, child_thr); | |||
| 1721 | flag.release(); | |||
| 1722 | } | |||
| 1723 | } | |||
| 1724 | } | |||
| 1725 | #if KMP_BARRIER_ICV_PUSH1 | |||
| 1726 | if (propagate_icvs && !KMP_MASTER_TID(tid)(0 == (tid))) | |||
| 1727 | // non-leaves copy ICVs from fixed ICVs to local dest | |||
| 1728 | copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, | |||
| 1729 | &thr_bar->th_fixed_icvs); | |||
| 1730 | #endif // KMP_BARRIER_ICV_PUSH | |||
| 1731 | } | |||
| 1732 | KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); } | |||
| 1733 | "barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); } | |||
| 1734 | gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for " "barrier type %d\n", gtid, team->t.t_id, tid, bt); }; | |||
| 1735 | } | |||
| 1736 | ||||
| 1737 | // End of Barrier Algorithms | |||
| 1738 | ||||
| 1739 | // type traits for cancellable value | |||
| 1740 | // if cancellable is true, then is_cancellable is a normal boolean variable | |||
| 1741 | // if cancellable is false, then is_cancellable is a compile time constant | |||
| 1742 | template <bool cancellable> struct is_cancellable {}; | |||
| 1743 | template <> struct is_cancellable<true> { | |||
| 1744 | bool value; | |||
| 1745 | is_cancellable() : value(false) {} | |||
| 1746 | is_cancellable(bool b) : value(b) {} | |||
| 1747 | is_cancellable &operator=(bool b) { | |||
| 1748 | value = b; | |||
| 1749 | return *this; | |||
| 1750 | } | |||
| 1751 | operator bool() const { return value; } | |||
| 1752 | }; | |||
| 1753 | template <> struct is_cancellable<false> { | |||
| 1754 | is_cancellable &operator=(bool b) { return *this; } | |||
| 1755 | constexpr operator bool() const { return false; } | |||
| 1756 | }; | |||
| 1757 | ||||
| 1758 | // Internal function to do a barrier. | |||
| 1759 | /* If is_split is true, do a split barrier, otherwise, do a plain barrier | |||
| 1760 | If reduce is non-NULL, do a split reduction barrier, otherwise, do a split | |||
| 1761 | barrier | |||
| 1762 | When cancellable = false, | |||
| 1763 | Returns 0 if primary thread, 1 if worker thread. | |||
| 1764 | When cancellable = true | |||
| 1765 | Returns 0 if not cancelled, 1 if cancelled. */ | |||
| 1766 | template <bool cancellable = false> | |||
| 1767 | static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split, | |||
| 1768 | size_t reduce_size, void *reduce_data, | |||
| 1769 | void (*reduce)(void *, void *)) { | |||
| 1770 | KMP_TIME_PARTITIONED_BLOCK(OMP_plain_barrier)((void)0); | |||
| 1771 | KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER)((void)0); | |||
| 1772 | int tid = __kmp_tid_from_gtid(gtid); | |||
| 1773 | kmp_info_t *this_thr = __kmp_threads[gtid]; | |||
| 1774 | kmp_team_t *team = this_thr->th.th_team; | |||
| 1775 | int status = 0; | |||
| 1776 | is_cancellable<cancellable> cancelled; | |||
| 1777 | #if OMPT_SUPPORT1 && OMPT_OPTIONAL1 | |||
| 1778 | ompt_data_t *my_task_data; | |||
| 1779 | ompt_data_t *my_parallel_data; | |||
| 1780 | void *return_address; | |||
| 1781 | ompt_sync_region_t barrier_kind; | |||
| 1782 | #endif | |||
| 1783 | ||||
| 1784 | KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) has arrived\n", gtid,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) has arrived\n" , gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid (gtid)); } | |||
| 1785 | __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) has arrived\n" , gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid (gtid)); }; | |||
| 1786 | ||||
| 1787 | #if OMPT_SUPPORT1 | |||
| 1788 | if (ompt_enabled.enabled) { | |||
| 1789 | #if OMPT_OPTIONAL1 | |||
| 1790 | my_task_data = OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data )); | |||
| 1791 | my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr)(&(this_thr->th.th_team->t.ompt_team_info.parallel_data )); | |||
| 1792 | return_address = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid); | |||
| 1793 | barrier_kind = __ompt_get_barrier_kind(bt, this_thr); | |||
| 1794 | if (ompt_enabled.ompt_callback_sync_region) { | |||
| 1795 | ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback( | |||
| 1796 | barrier_kind, ompt_scope_begin, my_parallel_data, my_task_data, | |||
| 1797 | return_address); | |||
| 1798 | } | |||
| 1799 | if (ompt_enabled.ompt_callback_sync_region_wait) { | |||
| 1800 | ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback( | |||
| 1801 | barrier_kind, ompt_scope_begin, my_parallel_data, my_task_data, | |||
| 1802 | return_address); | |||
| 1803 | } | |||
| 1804 | #endif | |||
| 1805 | // It is OK to report the barrier state after the barrier begin callback. | |||
| 1806 | // According to the OMPT specification, a compliant implementation may | |||
| 1807 | // even delay reporting this state until the barrier begins to wait. | |||
| 1808 | this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier; | |||
| 1809 | } | |||
| 1810 | #endif | |||
| 1811 | ||||
| 1812 | if (!team->t.t_serialized) { | |||
| 1813 | #if USE_ITT_BUILD1 | |||
| 1814 | // This value will be used in itt notify events below. | |||
| 1815 | void *itt_sync_obj = NULL__null; | |||
| 1816 | #if USE_ITT_NOTIFY1 | |||
| 1817 | if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) | |||
| 1818 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1); | |||
| 1819 | #endif | |||
| 1820 | #endif /* USE_ITT_BUILD */ | |||
| 1821 | if (__kmp_tasking_mode == tskm_extra_barrier) { | |||
| 1822 | __kmp_tasking_barrier(team, this_thr, gtid); | |||
| 1823 | KA_TRACE(15,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n" , gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid (gtid)); } | |||
| 1824 | ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n", gtid,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n" , gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid (gtid)); } | |||
| 1825 | __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n" , gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid (gtid)); }; | |||
| 1826 | } | |||
| 1827 | ||||
| 1828 | /* Copy the blocktime info to the thread, where __kmp_wait_template() can | |||
| 1829 | access it when the team struct is not guaranteed to exist. */ | |||
| 1830 | // See note about the corresponding code in __kmp_join_barrier() being | |||
| 1831 | // performance-critical. | |||
| 1832 | if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) { | |||
| 1833 | #if KMP_USE_MONITOR | |||
| 1834 | this_thr->th.th_team_bt_intervals = | |||
| 1835 | team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals; | |||
| 1836 | this_thr->th.th_team_bt_set = | |||
| 1837 | team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set; | |||
| 1838 | #else | |||
| 1839 | this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid)((((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs .bt_set) ? ((team)->t.t_threads[(tid)]->th.th_current_task ->td_icvs.blocktime) : __kmp_dflt_blocktime) * __kmp_ticks_per_msec ); | |||
| 1840 | #endif | |||
| 1841 | } | |||
| 1842 | ||||
| 1843 | #if USE_ITT_BUILD1 | |||
| 1844 | if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) | |||
| 1845 | __kmp_itt_barrier_starting(gtid, itt_sync_obj); | |||
| 1846 | #endif /* USE_ITT_BUILD */ | |||
| 1847 | #if USE_DEBUGGER0 | |||
| 1848 | // Let the debugger know: the thread arrived to the barrier and waiting. | |||
| 1849 | if (KMP_MASTER_TID(tid)(0 == (tid))) { // Primary thread counter stored in team struct | |||
| 1850 | team->t.t_bar[bt].b_master_arrived += 1; | |||
| 1851 | } else { | |||
| 1852 | this_thr->th.th_bar[bt].bb.b_worker_arrived += 1; | |||
| 1853 | } // if | |||
| 1854 | #endif /* USE_DEBUGGER */ | |||
| 1855 | if (reduce != NULL__null) { | |||
| 1856 | // KMP_DEBUG_ASSERT( is_split == TRUE ); // #C69956 | |||
| 1857 | this_thr->th.th_local.reduce_data = reduce_data; | |||
| 1858 | } | |||
| 1859 | ||||
| 1860 | if (KMP_MASTER_TID(tid)(0 == (tid)) && __kmp_tasking_mode != tskm_immediate_exec) | |||
| 1861 | // use 0 to only setup the current team if nthreads > 1 | |||
| 1862 | __kmp_task_team_setup(this_thr, team, 0); | |||
| 1863 | ||||
| 1864 | if (cancellable) { | |||
| 1865 | cancelled = __kmp_linear_barrier_gather_cancellable( | |||
| 1866 | bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 1867 | } else { | |||
| 1868 | switch (__kmp_barrier_gather_pattern[bt]) { | |||
| 1869 | case bp_dist_bar: { | |||
| 1870 | __kmp_dist_barrier_gather(bt, this_thr, gtid, tid, | |||
| 1871 | reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 1872 | break; | |||
| 1873 | } | |||
| 1874 | case bp_hyper_bar: { | |||
| 1875 | // don't set branch bits to 0; use linear | |||
| 1876 | KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt])if (!(__kmp_barrier_gather_branch_bits[bt])) { __kmp_debug_assert ("__kmp_barrier_gather_branch_bits[bt]", "openmp/runtime/src/kmp_barrier.cpp" , 1876); }; | |||
| 1877 | __kmp_hyper_barrier_gather(bt, this_thr, gtid, tid, | |||
| 1878 | reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 1879 | break; | |||
| 1880 | } | |||
| 1881 | case bp_hierarchical_bar: { | |||
| 1882 | __kmp_hierarchical_barrier_gather( | |||
| 1883 | bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 1884 | break; | |||
| 1885 | } | |||
| 1886 | case bp_tree_bar: { | |||
| 1887 | // don't set branch bits to 0; use linear | |||
| 1888 | KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt])if (!(__kmp_barrier_gather_branch_bits[bt])) { __kmp_debug_assert ("__kmp_barrier_gather_branch_bits[bt]", "openmp/runtime/src/kmp_barrier.cpp" , 1888); }; | |||
| 1889 | __kmp_tree_barrier_gather(bt, this_thr, gtid, tid, | |||
| 1890 | reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 1891 | break; | |||
| 1892 | } | |||
| 1893 | default: { | |||
| 1894 | __kmp_linear_barrier_gather(bt, this_thr, gtid, tid, | |||
| 1895 | reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 1896 | } | |||
| 1897 | } | |||
| 1898 | } | |||
| 1899 | ||||
| 1900 | KMP_MB(); | |||
| 1901 | ||||
| 1902 | if (KMP_MASTER_TID(tid)(0 == (tid))) { | |||
| 1903 | status = 0; | |||
| 1904 | if (__kmp_tasking_mode != tskm_immediate_exec && !cancelled) { | |||
| 1905 | __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 1906 | } | |||
| 1907 | #if USE_DEBUGGER0 | |||
| 1908 | // Let the debugger know: All threads are arrived and starting leaving the | |||
| 1909 | // barrier. | |||
| 1910 | team->t.t_bar[bt].b_team_arrived += 1; | |||
| 1911 | #endif | |||
| 1912 | ||||
| 1913 | if (__kmp_omp_cancellation) { | |||
| 1914 | kmp_int32 cancel_request = KMP_ATOMIC_LD_RLX(&team->t.t_cancel_request)(&team->t.t_cancel_request)->load(std::memory_order_relaxed ); | |||
| 1915 | // Reset cancellation flag for worksharing constructs | |||
| 1916 | if (cancel_request == cancel_loop || | |||
| 1917 | cancel_request == cancel_sections) { | |||
| 1918 | KMP_ATOMIC_ST_RLX(&team->t.t_cancel_request, cancel_noreq)(&team->t.t_cancel_request)->store(cancel_noreq, std ::memory_order_relaxed); | |||
| 1919 | } | |||
| 1920 | } | |||
| 1921 | #if USE_ITT_BUILD1 | |||
| 1922 | /* TODO: In case of split reduction barrier, primary thread may send | |||
| 1923 | acquired event early, before the final summation into the shared | |||
| 1924 | variable is done (final summation can be a long operation for array | |||
| 1925 | reductions). */ | |||
| 1926 | if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) | |||
| 1927 | __kmp_itt_barrier_middle(gtid, itt_sync_obj); | |||
| 1928 | #endif /* USE_ITT_BUILD */ | |||
| 1929 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | |||
| 1930 | // Barrier - report frame end (only if active_level == 1) | |||
| 1931 | if ((__itt_frame_submit_v3_ptr__kmp_itt_frame_submit_v3_ptr__3_0 || KMP_ITT_DEBUG0) && | |||
| 1932 | __kmp_forkjoin_frames_mode && | |||
| 1933 | (this_thr->th.th_teams_microtask == NULL__null || // either not in teams | |||
| 1934 | this_thr->th.th_teams_size.nteams == 1) && // or inside single team | |||
| 1935 | team->t.t_active_level == 1) { | |||
| 1936 | ident_t *loc = __kmp_threads[gtid]->th.th_ident; | |||
| 1937 | kmp_uint64 cur_time = __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0(); | |||
| 1938 | kmp_info_t **other_threads = team->t.t_threads; | |||
| 1939 | int nproc = this_thr->th.th_team_nproc; | |||
| 1940 | int i; | |||
| 1941 | switch (__kmp_forkjoin_frames_mode) { | |||
| 1942 | case 1: | |||
| 1943 | __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, | |||
| 1944 | loc, nproc); | |||
| 1945 | this_thr->th.th_frame_time = cur_time; | |||
| 1946 | break; | |||
| 1947 | case 2: // AC 2015-01-19: currently does not work for hierarchical (to | |||
| 1948 | // be fixed) | |||
| 1949 | __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, | |||
| 1950 | 1, loc, nproc); | |||
| 1951 | break; | |||
| 1952 | case 3: | |||
| 1953 | if (__itt_metadata_add_ptr__kmp_itt_metadata_add_ptr__3_0) { | |||
| 1954 | // Initialize with primary thread's wait time | |||
| 1955 | kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time; | |||
| 1956 | // Set arrive time to zero to be able to check it in | |||
| 1957 | // __kmp_invoke_task(); the same is done inside the loop below | |||
| 1958 | this_thr->th.th_bar_arrive_time = 0; | |||
| 1959 | for (i = 1; i < nproc; ++i) { | |||
| 1960 | delta += (cur_time - other_threads[i]->th.th_bar_arrive_time); | |||
| 1961 | other_threads[i]->th.th_bar_arrive_time = 0; | |||
| 1962 | } | |||
| 1963 | __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time, | |||
| 1964 | cur_time, delta, | |||
| 1965 | (kmp_uint64)(reduce != NULL__null)); | |||
| 1966 | } | |||
| 1967 | __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, | |||
| 1968 | loc, nproc); | |||
| 1969 | this_thr->th.th_frame_time = cur_time; | |||
| 1970 | break; | |||
| 1971 | } | |||
| 1972 | } | |||
| 1973 | #endif /* USE_ITT_BUILD */ | |||
| 1974 | } else { | |||
| 1975 | status = 1; | |||
| 1976 | #if USE_ITT_BUILD1 | |||
| 1977 | if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) | |||
| 1978 | __kmp_itt_barrier_middle(gtid, itt_sync_obj); | |||
| 1979 | #endif /* USE_ITT_BUILD */ | |||
| 1980 | } | |||
| 1981 | if ((status == 1 || !is_split) && !cancelled) { | |||
| 1982 | if (cancellable) { | |||
| 1983 | cancelled = __kmp_linear_barrier_release_cancellable( | |||
| 1984 | bt, this_thr, gtid, tid, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 1985 | } else { | |||
| 1986 | switch (__kmp_barrier_release_pattern[bt]) { | |||
| 1987 | case bp_dist_bar: { | |||
| 1988 | KMP_ASSERT(__kmp_barrier_release_branch_bits[bt])if (!(__kmp_barrier_release_branch_bits[bt])) { __kmp_debug_assert ("__kmp_barrier_release_branch_bits[bt]", "openmp/runtime/src/kmp_barrier.cpp" , 1988); }; | |||
| 1989 | __kmp_dist_barrier_release(bt, this_thr, gtid, tid, | |||
| 1990 | FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 1991 | break; | |||
| 1992 | } | |||
| 1993 | case bp_hyper_bar: { | |||
| 1994 | KMP_ASSERT(__kmp_barrier_release_branch_bits[bt])if (!(__kmp_barrier_release_branch_bits[bt])) { __kmp_debug_assert ("__kmp_barrier_release_branch_bits[bt]", "openmp/runtime/src/kmp_barrier.cpp" , 1994); }; | |||
| 1995 | __kmp_hyper_barrier_release(bt, this_thr, gtid, tid, | |||
| 1996 | FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 1997 | break; | |||
| 1998 | } | |||
| 1999 | case bp_hierarchical_bar: { | |||
| 2000 | __kmp_hierarchical_barrier_release( | |||
| 2001 | bt, this_thr, gtid, tid, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 2002 | break; | |||
| 2003 | } | |||
| 2004 | case bp_tree_bar: { | |||
| 2005 | KMP_ASSERT(__kmp_barrier_release_branch_bits[bt])if (!(__kmp_barrier_release_branch_bits[bt])) { __kmp_debug_assert ("__kmp_barrier_release_branch_bits[bt]", "openmp/runtime/src/kmp_barrier.cpp" , 2005); }; | |||
| 2006 | __kmp_tree_barrier_release(bt, this_thr, gtid, tid, | |||
| 2007 | FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 2008 | break; | |||
| 2009 | } | |||
| 2010 | default: { | |||
| 2011 | __kmp_linear_barrier_release(bt, this_thr, gtid, tid, | |||
| 2012 | FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 2013 | } | |||
| 2014 | } | |||
| 2015 | } | |||
| 2016 | if (__kmp_tasking_mode != tskm_immediate_exec && !cancelled) { | |||
| 2017 | __kmp_task_team_sync(this_thr, team); | |||
| 2018 | } | |||
| 2019 | } | |||
| 2020 | ||||
| 2021 | #if USE_ITT_BUILD1 | |||
| 2022 | /* GEH: TODO: Move this under if-condition above and also include in | |||
| 2023 | __kmp_end_split_barrier(). This will more accurately represent the actual | |||
| 2024 | release time of the threads for split barriers. */ | |||
| 2025 | if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) | |||
| 2026 | __kmp_itt_barrier_finished(gtid, itt_sync_obj); | |||
| 2027 | #endif /* USE_ITT_BUILD */ | |||
| 2028 | } else { // Team is serialized. | |||
| 2029 | status = 0; | |||
| 2030 | if (__kmp_tasking_mode != tskm_immediate_exec) { | |||
| 2031 | if (this_thr->th.th_task_team != NULL__null) { | |||
| 2032 | #if USE_ITT_NOTIFY1 | |||
| 2033 | void *itt_sync_obj = NULL__null; | |||
| 2034 | if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) { | |||
| 2035 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1); | |||
| 2036 | __kmp_itt_barrier_starting(gtid, itt_sync_obj); | |||
| 2037 | } | |||
| 2038 | #endif | |||
| 2039 | ||||
| 2040 | KMP_DEBUG_ASSERT(if (!(this_thr->th.th_task_team->tt.tt_found_proxy_tasks == (!0) || this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered == (!0))) { __kmp_debug_assert("this_thr->th.th_task_team->tt.tt_found_proxy_tasks == (!0) || this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered == (!0)" , "openmp/runtime/src/kmp_barrier.cpp", 2043); } | |||
| 2041 | this_thr->th.th_task_team->tt.tt_found_proxy_tasks == TRUE ||if (!(this_thr->th.th_task_team->tt.tt_found_proxy_tasks == (!0) || this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered == (!0))) { __kmp_debug_assert("this_thr->th.th_task_team->tt.tt_found_proxy_tasks == (!0) || this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered == (!0)" , "openmp/runtime/src/kmp_barrier.cpp", 2043); } | |||
| 2042 | this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered ==if (!(this_thr->th.th_task_team->tt.tt_found_proxy_tasks == (!0) || this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered == (!0))) { __kmp_debug_assert("this_thr->th.th_task_team->tt.tt_found_proxy_tasks == (!0) || this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered == (!0)" , "openmp/runtime/src/kmp_barrier.cpp", 2043); } | |||
| 2043 | TRUE)if (!(this_thr->th.th_task_team->tt.tt_found_proxy_tasks == (!0) || this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered == (!0))) { __kmp_debug_assert("this_thr->th.th_task_team->tt.tt_found_proxy_tasks == (!0) || this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered == (!0)" , "openmp/runtime/src/kmp_barrier.cpp", 2043); }; | |||
| 2044 | __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 2045 | __kmp_task_team_setup(this_thr, team, 0); | |||
| 2046 | ||||
| 2047 | #if USE_ITT_BUILD1 | |||
| 2048 | if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) | |||
| 2049 | __kmp_itt_barrier_finished(gtid, itt_sync_obj); | |||
| 2050 | #endif /* USE_ITT_BUILD */ | |||
| 2051 | } | |||
| 2052 | } | |||
| 2053 | } | |||
| 2054 | KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n",if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n" , gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid (gtid), status); } | |||
| 2055 | gtid, __kmp_team_from_gtid(gtid)->t.t_id,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n" , gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid (gtid), status); } | |||
| 2056 | __kmp_tid_from_gtid(gtid), status))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n" , gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid (gtid), status); }; | |||
| 2057 | ||||
| 2058 | #if OMPT_SUPPORT1 | |||
| 2059 | if (ompt_enabled.enabled) { | |||
| 2060 | #if OMPT_OPTIONAL1 | |||
| 2061 | if (ompt_enabled.ompt_callback_sync_region_wait) { | |||
| 2062 | ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback( | |||
| 2063 | barrier_kind, ompt_scope_end, my_parallel_data, my_task_data, | |||
| 2064 | return_address); | |||
| 2065 | } | |||
| 2066 | if (ompt_enabled.ompt_callback_sync_region) { | |||
| 2067 | ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback( | |||
| 2068 | barrier_kind, ompt_scope_end, my_parallel_data, my_task_data, | |||
| 2069 | return_address); | |||
| 2070 | } | |||
| 2071 | #endif | |||
| 2072 | this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; | |||
| 2073 | } | |||
| 2074 | #endif | |||
| 2075 | ||||
| 2076 | if (cancellable) | |||
| 2077 | return (int)cancelled; | |||
| 2078 | return status; | |||
| 2079 | } | |||
| 2080 | ||||
| 2081 | // Returns 0 if primary thread, 1 if worker thread. | |||
| 2082 | int __kmp_barrier(enum barrier_type bt, int gtid, int is_split, | |||
| 2083 | size_t reduce_size, void *reduce_data, | |||
| 2084 | void (*reduce)(void *, void *)) { | |||
| 2085 | return __kmp_barrier_template<>(bt, gtid, is_split, reduce_size, reduce_data, | |||
| 2086 | reduce); | |||
| 2087 | } | |||
| 2088 | ||||
| 2089 | #if defined(KMP_GOMP_COMPAT) | |||
| 2090 | // Returns 1 if cancelled, 0 otherwise | |||
| 2091 | int __kmp_barrier_gomp_cancel(int gtid) { | |||
| 2092 | if (__kmp_omp_cancellation) { | |||
| 2093 | int cancelled = __kmp_barrier_template<true>(bs_plain_barrier, gtid, FALSE0, | |||
| 2094 | 0, NULL__null, NULL__null); | |||
| 2095 | if (cancelled) { | |||
| 2096 | int tid = __kmp_tid_from_gtid(gtid); | |||
| 2097 | kmp_info_t *this_thr = __kmp_threads[gtid]; | |||
| 2098 | if (KMP_MASTER_TID(tid)(0 == (tid))) { | |||
| 2099 | // Primary thread does not need to revert anything | |||
| 2100 | } else { | |||
| 2101 | // Workers need to revert their private b_arrived flag | |||
| 2102 | this_thr->th.th_bar[bs_plain_barrier].bb.b_arrived -= | |||
| 2103 | KMP_BARRIER_STATE_BUMP(1 << 2); | |||
| 2104 | } | |||
| 2105 | } | |||
| 2106 | return cancelled; | |||
| 2107 | } | |||
| 2108 | __kmp_barrier(bs_plain_barrier, gtid, FALSE0, 0, NULL__null, NULL__null); | |||
| 2109 | return FALSE0; | |||
| 2110 | } | |||
| 2111 | #endif | |||
| 2112 | ||||
| 2113 | void __kmp_end_split_barrier(enum barrier_type bt, int gtid) { | |||
| 2114 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_end_split_barrier)((void)0); | |||
| 2115 | KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER)((void)0); | |||
| 2116 | KMP_DEBUG_ASSERT(bt < bs_last_barrier)if (!(bt < bs_last_barrier)) { __kmp_debug_assert("bt < bs_last_barrier" , "openmp/runtime/src/kmp_barrier.cpp", 2116); }; | |||
| 2117 | int tid = __kmp_tid_from_gtid(gtid); | |||
| 2118 | kmp_info_t *this_thr = __kmp_threads[gtid]; | |||
| 2119 | kmp_team_t *team = this_thr->th.th_team; | |||
| 2120 | ||||
| 2121 | if (!team->t.t_serialized) { | |||
| 2122 | if (KMP_MASTER_GTID(gtid)(0 == __kmp_tid_from_gtid((gtid)))) { | |||
| 2123 | switch (__kmp_barrier_release_pattern[bt]) { | |||
| 2124 | case bp_dist_bar: { | |||
| 2125 | __kmp_dist_barrier_release(bt, this_thr, gtid, tid, | |||
| 2126 | FALSE0 USE_ITT_BUILD_ARG(NULL), __null); | |||
| 2127 | break; | |||
| 2128 | } | |||
| 2129 | case bp_hyper_bar: { | |||
| 2130 | KMP_ASSERT(__kmp_barrier_release_branch_bits[bt])if (!(__kmp_barrier_release_branch_bits[bt])) { __kmp_debug_assert ("__kmp_barrier_release_branch_bits[bt]", "openmp/runtime/src/kmp_barrier.cpp" , 2130); }; | |||
| 2131 | __kmp_hyper_barrier_release(bt, this_thr, gtid, tid, | |||
| 2132 | FALSE0 USE_ITT_BUILD_ARG(NULL), __null); | |||
| 2133 | break; | |||
| 2134 | } | |||
| 2135 | case bp_hierarchical_bar: { | |||
| 2136 | __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid, | |||
| 2137 | FALSE0 USE_ITT_BUILD_ARG(NULL), __null); | |||
| 2138 | break; | |||
| 2139 | } | |||
| 2140 | case bp_tree_bar: { | |||
| 2141 | KMP_ASSERT(__kmp_barrier_release_branch_bits[bt])if (!(__kmp_barrier_release_branch_bits[bt])) { __kmp_debug_assert ("__kmp_barrier_release_branch_bits[bt]", "openmp/runtime/src/kmp_barrier.cpp" , 2141); }; | |||
| 2142 | __kmp_tree_barrier_release(bt, this_thr, gtid, tid, | |||
| 2143 | FALSE0 USE_ITT_BUILD_ARG(NULL), __null); | |||
| 2144 | break; | |||
| 2145 | } | |||
| 2146 | default: { | |||
| 2147 | __kmp_linear_barrier_release(bt, this_thr, gtid, tid, | |||
| 2148 | FALSE0 USE_ITT_BUILD_ARG(NULL), __null); | |||
| 2149 | } | |||
| 2150 | } | |||
| 2151 | if (__kmp_tasking_mode != tskm_immediate_exec) { | |||
| 2152 | __kmp_task_team_sync(this_thr, team); | |||
| 2153 | } // if | |||
| 2154 | } | |||
| 2155 | } | |||
| 2156 | } | |||
| 2157 | ||||
| 2158 | void __kmp_join_barrier(int gtid) { | |||
| 2159 | KMP_TIME_PARTITIONED_BLOCK(OMP_join_barrier)((void)0); | |||
| 2160 | KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER)((void)0); | |||
| 2161 | ||||
| 2162 | KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid])if (!(__kmp_threads && __kmp_threads[gtid])) { __kmp_debug_assert ("__kmp_threads && __kmp_threads[gtid]", "openmp/runtime/src/kmp_barrier.cpp" , 2162); }; | |||
| ||||
| 2163 | ||||
| 2164 | kmp_info_t *this_thr = __kmp_threads[gtid]; | |||
| 2165 | kmp_team_t *team; | |||
| 2166 | int tid; | |||
| 2167 | #ifdef KMP_DEBUG1 | |||
| 2168 | int team_id; | |||
| 2169 | #endif /* KMP_DEBUG */ | |||
| 2170 | #if USE_ITT_BUILD1 | |||
| 2171 | void *itt_sync_obj = NULL__null; | |||
| 2172 | #if USE_ITT_NOTIFY1 | |||
| 2173 | if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) // Don't call routine without need | |||
| 2174 | // Get object created at fork_barrier | |||
| 2175 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); | |||
| 2176 | #endif | |||
| 2177 | #endif /* USE_ITT_BUILD */ | |||
| 2178 | #if ((USE_ITT_BUILD1 && USE_ITT_NOTIFY1) || defined KMP_DEBUG1) | |||
| 2179 | int nproc = this_thr->th.th_team_nproc; | |||
| 2180 | #endif | |||
| 2181 | KMP_MB(); | |||
| 2182 | ||||
| 2183 | // Get current info | |||
| 2184 | team = this_thr->th.th_team; | |||
| 2185 | KMP_DEBUG_ASSERT(nproc == team->t.t_nproc)if (!(nproc == team->t.t_nproc)) { __kmp_debug_assert("nproc == team->t.t_nproc" , "openmp/runtime/src/kmp_barrier.cpp", 2185); }; | |||
| 2186 | tid = __kmp_tid_from_gtid(gtid); | |||
| 2187 | #ifdef KMP_DEBUG1 | |||
| 2188 | team_id = team->t.t_id; | |||
| 2189 | kmp_info_t *master_thread = this_thr->th.th_team_master; | |||
| 2190 | if (master_thread != team->t.t_threads[0]) { | |||
| 2191 | __kmp_print_structure(); | |||
| 2192 | } | |||
| 2193 | #endif /* KMP_DEBUG */ | |||
| 2194 | KMP_DEBUG_ASSERT(master_thread == team->t.t_threads[0])if (!(master_thread == team->t.t_threads[0])) { __kmp_debug_assert ("master_thread == team->t.t_threads[0]", "openmp/runtime/src/kmp_barrier.cpp" , 2194); }; | |||
| 2195 | KMP_MB(); | |||
| 2196 | ||||
| 2197 | // Verify state | |||
| 2198 | KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_team))if (!(((void *)(this_thr->th.th_team)))) { __kmp_debug_assert ("((void *)(this_thr->th.th_team))", "openmp/runtime/src/kmp_barrier.cpp" , 2198); }; | |||
| 2199 | KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_root))if (!(((void *)(this_thr->th.th_root)))) { __kmp_debug_assert ("((void *)(this_thr->th.th_root))", "openmp/runtime/src/kmp_barrier.cpp" , 2199); }; | |||
| 2200 | KMP_DEBUG_ASSERT(this_thr == team->t.t_threads[tid])if (!(this_thr == team->t.t_threads[tid])) { __kmp_debug_assert ("this_thr == team->t.t_threads[tid]", "openmp/runtime/src/kmp_barrier.cpp" , 2200); }; | |||
| 2201 | KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n" , gtid, team_id, tid); } | |||
| 2202 | gtid, team_id, tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n" , gtid, team_id, tid); }; | |||
| 2203 | ||||
| 2204 | #if OMPT_SUPPORT1 | |||
| 2205 | if (ompt_enabled.enabled) { | |||
| 2206 | #if OMPT_OPTIONAL1 | |||
| 2207 | ompt_data_t *my_task_data; | |||
| 2208 | ompt_data_t *my_parallel_data; | |||
| 2209 | void *codeptr = NULL__null; | |||
| 2210 | int ds_tid = this_thr->th.th_info.ds.ds_tid; | |||
| 2211 | if (KMP_MASTER_TID(ds_tid)(0 == (ds_tid)) && | |||
| 2212 | (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback || | |||
| 2213 | ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback)) | |||
| 2214 | codeptr = team->t.ompt_team_info.master_return_address; | |||
| 2215 | my_task_data = OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data )); | |||
| 2216 | my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr)(&(this_thr->th.th_team->t.ompt_team_info.parallel_data )); | |||
| 2217 | if (ompt_enabled.ompt_callback_sync_region) { | |||
| 2218 | ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback( | |||
| ||||
| 2219 | ompt_sync_region_barrier_implicit, ompt_scope_begin, my_parallel_data, | |||
| 2220 | my_task_data, codeptr); | |||
| 2221 | } | |||
| 2222 | if (ompt_enabled.ompt_callback_sync_region_wait) { | |||
| 2223 | ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback( | |||
| 2224 | ompt_sync_region_barrier_implicit, ompt_scope_begin, my_parallel_data, | |||
| 2225 | my_task_data, codeptr); | |||
| 2226 | } | |||
| 2227 | if (!KMP_MASTER_TID(ds_tid)(0 == (ds_tid))) | |||
| 2228 | this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data )); | |||
| 2229 | #endif | |||
| 2230 | this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier_implicit; | |||
| 2231 | } | |||
| 2232 | #endif | |||
| 2233 | ||||
| 2234 | if (__kmp_tasking_mode == tskm_extra_barrier) { | |||
| 2235 | __kmp_tasking_barrier(team, this_thr, gtid); | |||
| 2236 | KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) past tasking barrier\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) past tasking barrier\n" , gtid, team_id, tid); } | |||
| 2237 | gtid, team_id, tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) past tasking barrier\n" , gtid, team_id, tid); }; | |||
| 2238 | } | |||
| 2239 | #ifdef KMP_DEBUG1 | |||
| 2240 | if (__kmp_tasking_mode != tskm_immediate_exec) { | |||
| 2241 | KA_TRACE(20, ("__kmp_join_barrier: T#%d, old team = %d, old task_team = "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_barrier: T#%d, old team = %d, old task_team = " "%p, th_task_team = %p\n", __kmp_gtid_from_thread(this_thr), team_id, team->t.t_task_team[this_thr->th.th_task_state ], this_thr->th.th_task_team); } | |||
| 2242 | "%p, th_task_team = %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_barrier: T#%d, old team = %d, old task_team = " "%p, th_task_team = %p\n", __kmp_gtid_from_thread(this_thr), team_id, team->t.t_task_team[this_thr->th.th_task_state ], this_thr->th.th_task_team); } | |||
| 2243 | __kmp_gtid_from_thread(this_thr), team_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_barrier: T#%d, old team = %d, old task_team = " "%p, th_task_team = %p\n", __kmp_gtid_from_thread(this_thr), team_id, team->t.t_task_team[this_thr->th.th_task_state ], this_thr->th.th_task_team); } | |||
| 2244 | team->t.t_task_team[this_thr->th.th_task_state],if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_barrier: T#%d, old team = %d, old task_team = " "%p, th_task_team = %p\n", __kmp_gtid_from_thread(this_thr), team_id, team->t.t_task_team[this_thr->th.th_task_state ], this_thr->th.th_task_team); } | |||
| 2245 | this_thr->th.th_task_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_barrier: T#%d, old team = %d, old task_team = " "%p, th_task_team = %p\n", __kmp_gtid_from_thread(this_thr), team_id, team->t.t_task_team[this_thr->th.th_task_state ], this_thr->th.th_task_team); }; | |||
| 2246 | if (this_thr->th.th_task_team) | |||
| 2247 | KMP_DEBUG_ASSERT(this_thr->th.th_task_team ==if (!(this_thr->th.th_task_team == team->t.t_task_team[ this_thr->th.th_task_state])) { __kmp_debug_assert("this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]" , "openmp/runtime/src/kmp_barrier.cpp", 2248); } | |||
| 2248 | team->t.t_task_team[this_thr->th.th_task_state])if (!(this_thr->th.th_task_team == team->t.t_task_team[ this_thr->th.th_task_state])) { __kmp_debug_assert("this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]" , "openmp/runtime/src/kmp_barrier.cpp", 2248); }; | |||
| 2249 | } | |||
| 2250 | #endif /* KMP_DEBUG */ | |||
| 2251 | ||||
| 2252 | /* Copy the blocktime info to the thread, where __kmp_wait_template() can | |||
| 2253 | access it when the team struct is not guaranteed to exist. Doing these | |||
| 2254 | loads causes a cache miss slows down EPCC parallel by 2x. As a workaround, | |||
| 2255 | we do not perform the copy if blocktime=infinite, since the values are not | |||
| 2256 | used by __kmp_wait_template() in that case. */ | |||
| 2257 | if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) { | |||
| 2258 | #if KMP_USE_MONITOR | |||
| 2259 | this_thr->th.th_team_bt_intervals = | |||
| 2260 | team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals; | |||
| 2261 | this_thr->th.th_team_bt_set = | |||
| 2262 | team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set; | |||
| 2263 | #else | |||
| 2264 | this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid)((((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs .bt_set) ? ((team)->t.t_threads[(tid)]->th.th_current_task ->td_icvs.blocktime) : __kmp_dflt_blocktime) * __kmp_ticks_per_msec ); | |||
| 2265 | #endif | |||
| 2266 | } | |||
| 2267 | ||||
| 2268 | #if USE_ITT_BUILD1 | |||
| 2269 | if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) | |||
| 2270 | __kmp_itt_barrier_starting(gtid, itt_sync_obj); | |||
| 2271 | #endif /* USE_ITT_BUILD */ | |||
| 2272 | ||||
| 2273 | switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) { | |||
| 2274 | case bp_dist_bar: { | |||
| 2275 | __kmp_dist_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, | |||
| 2276 | NULL__null USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 2277 | break; | |||
| 2278 | } | |||
| 2279 | case bp_hyper_bar: { | |||
| 2280 | KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier])if (!(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier])) { __kmp_debug_assert("__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]" , "openmp/runtime/src/kmp_barrier.cpp", 2280); }; | |||
| 2281 | __kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, | |||
| 2282 | NULL__null USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 2283 | break; | |||
| 2284 | } | |||
| 2285 | case bp_hierarchical_bar: { | |||
| 2286 | __kmp_hierarchical_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, | |||
| 2287 | NULL__null USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 2288 | break; | |||
| 2289 | } | |||
| 2290 | case bp_tree_bar: { | |||
| 2291 | KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier])if (!(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier])) { __kmp_debug_assert("__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]" , "openmp/runtime/src/kmp_barrier.cpp", 2291); }; | |||
| 2292 | __kmp_tree_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, | |||
| 2293 | NULL__null USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 2294 | break; | |||
| 2295 | } | |||
| 2296 | default: { | |||
| 2297 | __kmp_linear_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, | |||
| 2298 | NULL__null USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 2299 | } | |||
| 2300 | } | |||
| 2301 | ||||
| 2302 | /* From this point on, the team data structure may be deallocated at any time | |||
| 2303 | by the primary thread - it is unsafe to reference it in any of the worker | |||
| 2304 | threads. Any per-team data items that need to be referenced before the | |||
| 2305 | end of the barrier should be moved to the kmp_task_team_t structs. */ | |||
| 2306 | if (KMP_MASTER_TID(tid)(0 == (tid))) { | |||
| 2307 | if (__kmp_tasking_mode != tskm_immediate_exec) { | |||
| 2308 | __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 2309 | } | |||
| 2310 | if (__kmp_display_affinity) { | |||
| 2311 | KMP_CHECK_UPDATE(team->t.t_display_affinity, 0)if ((team->t.t_display_affinity) != (0)) (team->t.t_display_affinity ) = (0); | |||
| 2312 | } | |||
| 2313 | #if KMP_STATS_ENABLED0 | |||
| 2314 | // Have primary thread flag the workers to indicate they are now waiting for | |||
| 2315 | // next parallel region, Also wake them up so they switch their timers to | |||
| 2316 | // idle. | |||
| 2317 | for (int i = 0; i < team->t.t_nproc; ++i) { | |||
| 2318 | kmp_info_t *team_thread = team->t.t_threads[i]; | |||
| 2319 | if (team_thread == this_thr) | |||
| 2320 | continue; | |||
| 2321 | team_thread->th.th_stats->setIdleFlag(); | |||
| 2322 | if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647) && | |||
| 2323 | team_thread->th.th_sleep_loc != NULL__null) | |||
| 2324 | __kmp_null_resume_wrapper(team_thread); | |||
| 2325 | } | |||
| 2326 | #endif | |||
| 2327 | #if USE_ITT_BUILD1 | |||
| 2328 | if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) | |||
| 2329 | __kmp_itt_barrier_middle(gtid, itt_sync_obj); | |||
| 2330 | #endif /* USE_ITT_BUILD */ | |||
| 2331 | ||||
| 2332 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | |||
| 2333 | // Join barrier - report frame end | |||
| 2334 | if ((__itt_frame_submit_v3_ptr__kmp_itt_frame_submit_v3_ptr__3_0 || KMP_ITT_DEBUG0) && | |||
| 2335 | __kmp_forkjoin_frames_mode && | |||
| 2336 | (this_thr->th.th_teams_microtask == NULL__null || // either not in teams | |||
| 2337 | this_thr->th.th_teams_size.nteams == 1) && // or inside single team | |||
| 2338 | team->t.t_active_level == 1) { | |||
| 2339 | kmp_uint64 cur_time = __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0(); | |||
| 2340 | ident_t *loc = team->t.t_ident; | |||
| 2341 | kmp_info_t **other_threads = team->t.t_threads; | |||
| 2342 | switch (__kmp_forkjoin_frames_mode) { | |||
| 2343 | case 1: | |||
| 2344 | __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, | |||
| 2345 | loc, nproc); | |||
| 2346 | break; | |||
| 2347 | case 2: | |||
| 2348 | __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1, | |||
| 2349 | loc, nproc); | |||
| 2350 | break; | |||
| 2351 | case 3: | |||
| 2352 | if (__itt_metadata_add_ptr__kmp_itt_metadata_add_ptr__3_0) { | |||
| 2353 | // Initialize with primary thread's wait time | |||
| 2354 | kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time; | |||
| 2355 | // Set arrive time to zero to be able to check it in | |||
| 2356 | // __kmp_invoke_task(); the same is done inside the loop below | |||
| 2357 | this_thr->th.th_bar_arrive_time = 0; | |||
| 2358 | for (int i = 1; i < nproc; ++i) { | |||
| 2359 | delta += (cur_time - other_threads[i]->th.th_bar_arrive_time); | |||
| 2360 | other_threads[i]->th.th_bar_arrive_time = 0; | |||
| 2361 | } | |||
| 2362 | __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time, | |||
| 2363 | cur_time, delta, 0); | |||
| 2364 | } | |||
| 2365 | __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, | |||
| 2366 | loc, nproc); | |||
| 2367 | this_thr->th.th_frame_time = cur_time; | |||
| 2368 | break; | |||
| 2369 | } | |||
| 2370 | } | |||
| 2371 | #endif /* USE_ITT_BUILD */ | |||
| 2372 | } | |||
| 2373 | #if USE_ITT_BUILD1 | |||
| 2374 | else { | |||
| 2375 | if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) | |||
| 2376 | __kmp_itt_barrier_middle(gtid, itt_sync_obj); | |||
| 2377 | } | |||
| 2378 | #endif /* USE_ITT_BUILD */ | |||
| 2379 | ||||
| 2380 | #if KMP_DEBUG1 | |||
| 2381 | if (KMP_MASTER_TID(tid)(0 == (tid))) { | |||
| 2382 | KA_TRACE(if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n" , gtid, team_id, tid, nproc); } | |||
| 2383 | 15,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n" , gtid, team_id, tid, nproc); } | |||
| 2384 | ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n",if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n" , gtid, team_id, tid, nproc); } | |||
| 2385 | gtid, team_id, tid, nproc))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n" , gtid, team_id, tid, nproc); }; | |||
| 2386 | } | |||
| 2387 | #endif /* KMP_DEBUG */ | |||
| 2388 | ||||
| 2389 | // TODO now, mark worker threads as done so they may be disbanded | |||
| 2390 | KMP_MB(); // Flush all pending memory write invalidates. | |||
| 2391 | KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) leaving\n" , gtid, team_id, tid); } | |||
| 2392 | ("__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) leaving\n" , gtid, team_id, tid); }; | |||
| 2393 | ||||
| 2394 | } | |||
| 2395 | ||||
| 2396 | // TODO release worker threads' fork barriers as we are ready instead of all at | |||
| 2397 | // once | |||
| 2398 | void __kmp_fork_barrier(int gtid, int tid) { | |||
| 2399 | KMP_TIME_PARTITIONED_BLOCK(OMP_fork_barrier)((void)0); | |||
| 2400 | KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER)((void)0); | |||
| 2401 | kmp_info_t *this_thr = __kmp_threads[gtid]; | |||
| 2402 | kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL__null; | |||
| 2403 | #if USE_ITT_BUILD1 | |||
| 2404 | void *itt_sync_obj = NULL__null; | |||
| 2405 | #endif /* USE_ITT_BUILD */ | |||
| 2406 | if (team) | |||
| 2407 | ||||
| 2408 | KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) has arrived\n", gtid,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:%d) has arrived\n" , gtid, (team != __null) ? team->t.t_id : -1, tid); } | |||
| 2409 | (team != NULL) ? team->t.t_id : -1, tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:%d) has arrived\n" , gtid, (team != __null) ? team->t.t_id : -1, tid); }; | |||
| 2410 | ||||
| 2411 | // th_team pointer only valid for primary thread here | |||
| 2412 | if (KMP_MASTER_TID(tid)(0 == (tid))) { | |||
| 2413 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | |||
| 2414 | if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) { | |||
| 2415 | // Create itt barrier object | |||
| 2416 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 1); | |||
| 2417 | __kmp_itt_barrier_middle(gtid, itt_sync_obj); // Call acquired/releasing | |||
| 2418 | } | |||
| 2419 | #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ | |||
| 2420 | ||||
| 2421 | #ifdef KMP_DEBUG1 | |||
| 2422 | KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_barrier.cpp" , 2422); }; | |||
| 2423 | kmp_info_t **other_threads = team->t.t_threads; | |||
| 2424 | int i; | |||
| 2425 | ||||
| 2426 | // Verify state | |||
| 2427 | KMP_MB(); | |||
| 2428 | ||||
| 2429 | for (i = 1; i < team->t.t_nproc; ++i) { | |||
| 2430 | KA_TRACE(500,if (kmp_a_debug >= 500) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go " "== %u.\n", gtid, team->t.t_id, other_threads[i]->th.th_info .ds.ds_gtid, team->t.t_id, other_threads[i]->th.th_info .ds.ds_tid, other_threads[i]->th.th_bar[bs_forkjoin_barrier ].bb.b_go); } | |||
| 2431 | ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go "if (kmp_a_debug >= 500) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go " "== %u.\n", gtid, team->t.t_id, other_threads[i]->th.th_info .ds.ds_gtid, team->t.t_id, other_threads[i]->th.th_info .ds.ds_tid, other_threads[i]->th.th_bar[bs_forkjoin_barrier ].bb.b_go); } | |||
| 2432 | "== %u.\n",if (kmp_a_debug >= 500) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go " "== %u.\n", gtid, team->t.t_id, other_threads[i]->th.th_info .ds.ds_gtid, team->t.t_id, other_threads[i]->th.th_info .ds.ds_tid, other_threads[i]->th.th_bar[bs_forkjoin_barrier ].bb.b_go); } | |||
| 2433 | gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid,if (kmp_a_debug >= 500) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go " "== %u.\n", gtid, team->t.t_id, other_threads[i]->th.th_info .ds.ds_gtid, team->t.t_id, other_threads[i]->th.th_info .ds.ds_tid, other_threads[i]->th.th_bar[bs_forkjoin_barrier ].bb.b_go); } | |||
| 2434 | team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid,if (kmp_a_debug >= 500) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go " "== %u.\n", gtid, team->t.t_id, other_threads[i]->th.th_info .ds.ds_gtid, team->t.t_id, other_threads[i]->th.th_info .ds.ds_tid, other_threads[i]->th.th_bar[bs_forkjoin_barrier ].bb.b_go); } | |||
| 2435 | other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go))if (kmp_a_debug >= 500) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go " "== %u.\n", gtid, team->t.t_id, other_threads[i]->th.th_info .ds.ds_gtid, team->t.t_id, other_threads[i]->th.th_info .ds.ds_tid, other_threads[i]->th.th_bar[bs_forkjoin_barrier ].bb.b_go); }; | |||
| 2436 | KMP_DEBUG_ASSERT(if (!(((other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb .b_go) & ~((1 << 0))) == 0)) { __kmp_debug_assert("((other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) & ~((1 << 0))) == 0" , "openmp/runtime/src/kmp_barrier.cpp", 2438); } | |||
| 2437 | (TCR_4(other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) &if (!(((other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb .b_go) & ~((1 << 0))) == 0)) { __kmp_debug_assert("((other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) & ~((1 << 0))) == 0" , "openmp/runtime/src/kmp_barrier.cpp", 2438); } | |||
| 2438 | ~(KMP_BARRIER_SLEEP_STATE)) == KMP_INIT_BARRIER_STATE)if (!(((other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb .b_go) & ~((1 << 0))) == 0)) { __kmp_debug_assert("((other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) & ~((1 << 0))) == 0" , "openmp/runtime/src/kmp_barrier.cpp", 2438); }; | |||
| 2439 | KMP_DEBUG_ASSERT(other_threads[i]->th.th_team == team)if (!(other_threads[i]->th.th_team == team)) { __kmp_debug_assert ("other_threads[i]->th.th_team == team", "openmp/runtime/src/kmp_barrier.cpp" , 2439); }; | |||
| 2440 | } | |||
| 2441 | #endif | |||
| 2442 | ||||
| 2443 | if (__kmp_tasking_mode != tskm_immediate_exec) { | |||
| 2444 | // 0 indicates setup current task team if nthreads > 1 | |||
| 2445 | __kmp_task_team_setup(this_thr, team, 0); | |||
| 2446 | } | |||
| 2447 | ||||
| 2448 | /* The primary thread may have changed its blocktime between join barrier | |||
| 2449 | and fork barrier. Copy the blocktime info to the thread, where | |||
| 2450 | __kmp_wait_template() can access it when the team struct is not | |||
| 2451 | guaranteed to exist. */ | |||
| 2452 | // See note about the corresponding code in __kmp_join_barrier() being | |||
| 2453 | // performance-critical | |||
| 2454 | if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) { | |||
| 2455 | #if KMP_USE_MONITOR | |||
| 2456 | this_thr->th.th_team_bt_intervals = | |||
| 2457 | team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals; | |||
| 2458 | this_thr->th.th_team_bt_set = | |||
| 2459 | team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set; | |||
| 2460 | #else | |||
| 2461 | this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid)((((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs .bt_set) ? ((team)->t.t_threads[(tid)]->th.th_current_task ->td_icvs.blocktime) : __kmp_dflt_blocktime) * __kmp_ticks_per_msec ); | |||
| 2462 | #endif | |||
| 2463 | } | |||
| 2464 | } // primary thread | |||
| 2465 | ||||
| 2466 | switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) { | |||
| 2467 | case bp_dist_bar: { | |||
| 2468 | __kmp_dist_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, | |||
| 2469 | TRUE(!0) USE_ITT_BUILD_ARG(NULL), __null); | |||
| 2470 | break; | |||
| 2471 | } | |||
| 2472 | case bp_hyper_bar: { | |||
| 2473 | KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier])if (!(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]) ) { __kmp_debug_assert("__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]" , "openmp/runtime/src/kmp_barrier.cpp", 2473); }; | |||
| 2474 | __kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, | |||
| 2475 | TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 2476 | break; | |||
| 2477 | } | |||
| 2478 | case bp_hierarchical_bar: { | |||
| 2479 | __kmp_hierarchical_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, | |||
| 2480 | TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 2481 | break; | |||
| 2482 | } | |||
| 2483 | case bp_tree_bar: { | |||
| 2484 | KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier])if (!(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]) ) { __kmp_debug_assert("__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]" , "openmp/runtime/src/kmp_barrier.cpp", 2484); }; | |||
| 2485 | __kmp_tree_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, | |||
| 2486 | TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 2487 | break; | |||
| 2488 | } | |||
| 2489 | default: { | |||
| 2490 | __kmp_linear_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, | |||
| 2491 | TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj); | |||
| 2492 | } | |||
| 2493 | } | |||
| 2494 | ||||
| 2495 | #if OMPT_SUPPORT1 | |||
| 2496 | if (ompt_enabled.enabled && | |||
| 2497 | this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) { | |||
| 2498 | int ds_tid = this_thr->th.th_info.ds.ds_tid; | |||
| 2499 | ompt_data_t *task_data = (team) | |||
| 2500 | ? OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data )) | |||
| 2501 | : &(this_thr->th.ompt_thread_info.task_data); | |||
| 2502 | this_thr->th.ompt_thread_info.state = ompt_state_overhead; | |||
| 2503 | #if OMPT_OPTIONAL1 | |||
| 2504 | void *codeptr = NULL__null; | |||
| 2505 | if (KMP_MASTER_TID(ds_tid)(0 == (ds_tid)) && | |||
| 2506 | (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback || | |||
| 2507 | ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback)) | |||
| 2508 | codeptr = team ? team->t.ompt_team_info.master_return_address : NULL__null; | |||
| 2509 | if (ompt_enabled.ompt_callback_sync_region_wait) { | |||
| 2510 | ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback( | |||
| 2511 | ompt_sync_region_barrier_implicit, ompt_scope_end, NULL__null, task_data, | |||
| 2512 | codeptr); | |||
| 2513 | } | |||
| 2514 | if (ompt_enabled.ompt_callback_sync_region) { | |||
| 2515 | ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback( | |||
| 2516 | ompt_sync_region_barrier_implicit, ompt_scope_end, NULL__null, task_data, | |||
| 2517 | codeptr); | |||
| 2518 | } | |||
| 2519 | #endif | |||
| 2520 | if (!KMP_MASTER_TID(ds_tid)(0 == (ds_tid)) && ompt_enabled.ompt_callback_implicit_task) { | |||
| 2521 | ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback( | |||
| 2522 | ompt_scope_end, NULL__null, task_data, 0, ds_tid, | |||
| 2523 | ompt_task_implicit); // TODO: Can this be ompt_task_initial? | |||
| 2524 | } | |||
| 2525 | } | |||
| 2526 | #endif | |||
| 2527 | ||||
| 2528 | // Early exit for reaping threads releasing forkjoin barrier | |||
| 2529 | if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) { | |||
| 2530 | this_thr->th.th_task_team = NULL__null; | |||
| 2531 | ||||
| 2532 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | |||
| 2533 | if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) { | |||
| 2534 | if (!KMP_MASTER_TID(tid)(0 == (tid))) { | |||
| 2535 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); | |||
| 2536 | if (itt_sync_obj) | |||
| 2537 | __kmp_itt_barrier_finished(gtid, itt_sync_obj); | |||
| 2538 | } | |||
| 2539 | } | |||
| 2540 | #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ | |||
| 2541 | KA_TRACE(10, ("__kmp_fork_barrier: T#%d is leaving early\n", gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d is leaving early\n" , gtid); }; | |||
| 2542 | return; | |||
| 2543 | } | |||
| 2544 | ||||
| 2545 | /* We can now assume that a valid team structure has been allocated by the | |||
| 2546 | primary thread and propagated to all worker threads. The current thread, | |||
| 2547 | however, may not be part of the team, so we can't blindly assume that the | |||
| 2548 | team pointer is non-null. */ | |||
| 2549 | team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team)((void *)(this_thr->th.th_team)); | |||
| 2550 | KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null", "openmp/runtime/src/kmp_barrier.cpp", 2550); }; | |||
| 2551 | tid = __kmp_tid_from_gtid(gtid); | |||
| 2552 | ||||
| 2553 | #if KMP_BARRIER_ICV_PULL | |||
| 2554 | /* Primary thread's copy of the ICVs was set up on the implicit taskdata in | |||
| 2555 | __kmp_reinitialize_team. __kmp_fork_call() assumes the primary thread's | |||
| 2556 | implicit task has this data before this function is called. We cannot | |||
| 2557 | modify __kmp_fork_call() to look at the fixed ICVs in the primary thread's | |||
| 2558 | thread struct, because it is not always the case that the threads arrays | |||
| 2559 | have been allocated when __kmp_fork_call() is executed. */ | |||
| 2560 | { | |||
| 2561 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy)((void)0); | |||
| 2562 | if (!KMP_MASTER_TID(tid)(0 == (tid))) { // primary thread already has ICVs | |||
| 2563 | // Copy the initial ICVs from the primary thread's thread struct to the | |||
| 2564 | // implicit task for this tid. | |||
| 2565 | KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n" , gtid, tid); } | |||
| 2566 | ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n" , gtid, tid); }; | |||
| 2567 | __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, | |||
| 2568 | tid, FALSE0); | |||
| 2569 | copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, | |||
| 2570 | &team->t.t_threads[0] | |||
| 2571 | ->th.th_bar[bs_forkjoin_barrier] | |||
| 2572 | .bb.th_fixed_icvs); | |||
| 2573 | } | |||
| 2574 | } | |||
| 2575 | #endif // KMP_BARRIER_ICV_PULL | |||
| 2576 | ||||
| 2577 | if (__kmp_tasking_mode != tskm_immediate_exec) { | |||
| 2578 | __kmp_task_team_sync(this_thr, team); | |||
| 2579 | } | |||
| 2580 | ||||
| 2581 | #if KMP_AFFINITY_SUPPORTED1 | |||
| 2582 | kmp_proc_bind_t proc_bind = team->t.t_proc_bind; | |||
| 2583 | if (proc_bind == proc_bind_intel) { | |||
| 2584 | // Call dynamic affinity settings | |||
| 2585 | if (__kmp_affinity.type == affinity_balanced && team->t.t_size_changed) { | |||
| 2586 | __kmp_balanced_affinity(this_thr, team->t.t_nproc); | |||
| 2587 | } | |||
| 2588 | } else if (proc_bind != proc_bind_false) { | |||
| 2589 | if (this_thr->th.th_new_place == this_thr->th.th_current_place) { | |||
| 2590 | KA_TRACE(100, ("__kmp_fork_barrier: T#%d already in correct place %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d already in correct place %d\n" , __kmp_gtid_from_thread(this_thr), this_thr->th.th_current_place ); } | |||
| 2591 | __kmp_gtid_from_thread(this_thr),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d already in correct place %d\n" , __kmp_gtid_from_thread(this_thr), this_thr->th.th_current_place ); } | |||
| 2592 | this_thr->th.th_current_place))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d already in correct place %d\n" , __kmp_gtid_from_thread(this_thr), this_thr->th.th_current_place ); }; | |||
| 2593 | } else { | |||
| 2594 | __kmp_affinity_set_place(gtid); | |||
| 2595 | } | |||
| 2596 | } | |||
| 2597 | #endif // KMP_AFFINITY_SUPPORTED | |||
| 2598 | // Perform the display affinity functionality | |||
| 2599 | if (__kmp_display_affinity) { | |||
| 2600 | if (team->t.t_display_affinity | |||
| 2601 | #if KMP_AFFINITY_SUPPORTED1 | |||
| 2602 | || (__kmp_affinity.type == affinity_balanced && team->t.t_size_changed) | |||
| 2603 | #endif | |||
| 2604 | ) { | |||
| 2605 | // NULL means use the affinity-format-var ICV | |||
| 2606 | __kmp_aux_display_affinity(gtid, NULL__null); | |||
| 2607 | this_thr->th.th_prev_num_threads = team->t.t_nproc; | |||
| 2608 | this_thr->th.th_prev_level = team->t.t_level; | |||
| 2609 | } | |||
| 2610 | } | |||
| 2611 | if (!KMP_MASTER_TID(tid)(0 == (tid))) | |||
| 2612 | KMP_CHECK_UPDATE(this_thr->th.th_def_allocator, team->t.t_def_allocator)if ((this_thr->th.th_def_allocator) != (team->t.t_def_allocator )) (this_thr->th.th_def_allocator) = (team->t.t_def_allocator ); | |||
| 2613 | ||||
| 2614 | #if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 | |||
| 2615 | if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) { | |||
| 2616 | if (!KMP_MASTER_TID(tid)(0 == (tid))) { | |||
| 2617 | // Get correct barrier object | |||
| 2618 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); | |||
| 2619 | __kmp_itt_barrier_finished(gtid, itt_sync_obj); // Workers call acquired | |||
| 2620 | } // (prepare called inside barrier_release) | |||
| 2621 | } | |||
| 2622 | #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ | |||
| 2623 | KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) is leaving\n", gtid,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:%d) is leaving\n" , gtid, team->t.t_id, tid); } | |||
| 2624 | team->t.t_id, tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:%d) is leaving\n" , gtid, team->t.t_id, tid); }; | |||
| 2625 | } | |||
| 2626 | ||||
| 2627 | void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, | |||
| 2628 | kmp_internal_control_t *new_icvs, ident_t *loc) { | |||
| 2629 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_setup_icv_copy)((void)0); | |||
| 2630 | ||||
| 2631 | KMP_DEBUG_ASSERT(team && new_nproc && new_icvs)if (!(team && new_nproc && new_icvs)) { __kmp_debug_assert ("team && new_nproc && new_icvs", "openmp/runtime/src/kmp_barrier.cpp" , 2631); }; | |||
| 2632 | KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc)if (!((!(__kmp_init_parallel)) || new_icvs->nproc)) { __kmp_debug_assert ("(!(__kmp_init_parallel)) || new_icvs->nproc", "openmp/runtime/src/kmp_barrier.cpp" , 2632); }; | |||
| 2633 | ||||
| 2634 | /* Primary thread's copy of the ICVs was set up on the implicit taskdata in | |||
| 2635 | __kmp_reinitialize_team. __kmp_fork_call() assumes the primary thread's | |||
| 2636 | implicit task has this data before this function is called. */ | |||
| 2637 | #if KMP_BARRIER_ICV_PULL | |||
| 2638 | /* Copy ICVs to primary thread's thread structure into th_fixed_icvs (which | |||
| 2639 | remains untouched), where all of the worker threads can access them and | |||
| 2640 | make their own copies after the barrier. */ | |||
| 2641 | KMP_DEBUG_ASSERT(team->t.t_threads[0])if (!(team->t.t_threads[0])) { __kmp_debug_assert("team->t.t_threads[0]" , "openmp/runtime/src/kmp_barrier.cpp", 2641); }; // The threads arrays should be | |||
| 2642 | // allocated at this point | |||
| 2643 | copy_icvs( | |||
| 2644 | &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs, | |||
| 2645 | new_icvs); | |||
| 2646 | KF_TRACE(10, ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n", 0,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n" , 0, team->t.t_threads[0], team); } | |||
| 2647 | team->t.t_threads[0], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n" , 0, team->t.t_threads[0], team); }; | |||
| 2648 | #elif KMP_BARRIER_ICV_PUSH1 | |||
| 2649 | // The ICVs will be propagated in the fork barrier, so nothing needs to be | |||
| 2650 | // done here. | |||
| 2651 | KF_TRACE(10, ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n", 0,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n" , 0, team->t.t_threads[0], team); } | |||
| 2652 | team->t.t_threads[0], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n" , 0, team->t.t_threads[0], team); }; | |||
| 2653 | #else | |||
| 2654 | // Copy the ICVs to each of the non-primary threads. This takes O(nthreads) | |||
| 2655 | // time. | |||
| 2656 | ngo_load(new_icvs)((void)0); | |||
| 2657 | KMP_DEBUG_ASSERT(team->t.t_threads[0])if (!(team->t.t_threads[0])) { __kmp_debug_assert("team->t.t_threads[0]" , "openmp/runtime/src/kmp_barrier.cpp", 2657); }; // The threads arrays should be | |||
| 2658 | // allocated at this point | |||
| 2659 | for (int f = 1; f < new_nproc; ++f) { // Skip the primary thread | |||
| 2660 | // TODO: GEH - pass in better source location info since usually NULL here | |||
| 2661 | KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n" , f, team->t.t_threads[f], team); } | |||
| 2662 | f, team->t.t_threads[f], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n" , f, team->t.t_threads[f], team); }; | |||
| 2663 | __kmp_init_implicit_task(loc, team->t.t_threads[f], team, f, FALSE0); | |||
| 2664 | ngo_store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs)copy_icvs((&team->t.t_implicit_task_taskdata[f].td_icvs ), (new_icvs)); | |||
| 2665 | KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n" , f, team->t.t_threads[f], team); } | |||
| 2666 | f, team->t.t_threads[f], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n" , f, team->t.t_threads[f], team); }; | |||
| 2667 | } | |||
| 2668 | ngo_sync()((void)0); | |||
| 2669 | #endif // KMP_BARRIER_ICV_PULL | |||
| 2670 | } |