LLVM OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Groups Pages
kmp.h
1 
2 /*
3  * kmp.h -- KPTS runtime header file.
4  */
5 
6 //===----------------------------------------------------------------------===//
7 //
8 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9 // See https://llvm.org/LICENSE.txt for license information.
10 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef KMP_H
15 #define KMP_H
16 
17 #include "kmp_config.h"
18 
19 /* #define BUILD_PARALLEL_ORDERED 1 */
20 
21 /* This fix replaces gettimeofday with clock_gettime for better scalability on
22  the Altix. Requires user code to be linked with -lrt. */
23 //#define FIX_SGI_CLOCK
24 
25 /* Defines for OpenMP 3.0 tasking and auto scheduling */
26 
27 #ifndef KMP_STATIC_STEAL_ENABLED
28 #define KMP_STATIC_STEAL_ENABLED 1
29 #endif
30 
31 #define TASK_CURRENT_NOT_QUEUED 0
32 #define TASK_CURRENT_QUEUED 1
33 
34 #ifdef BUILD_TIED_TASK_STACK
35 #define TASK_STACK_EMPTY 0 // entries when the stack is empty
36 #define TASK_STACK_BLOCK_BITS 5 // Used in TASK_STACK_SIZE and TASK_STACK_MASK
37 // Number of entries in each task stack array
38 #define TASK_STACK_BLOCK_SIZE (1 << TASK_STACK_BLOCK_BITS)
39 // Mask for determining index into stack block
40 #define TASK_STACK_INDEX_MASK (TASK_STACK_BLOCK_SIZE - 1)
41 #endif // BUILD_TIED_TASK_STACK
42 
43 #define TASK_NOT_PUSHED 1
44 #define TASK_SUCCESSFULLY_PUSHED 0
45 #define TASK_TIED 1
46 #define TASK_UNTIED 0
47 #define TASK_EXPLICIT 1
48 #define TASK_IMPLICIT 0
49 #define TASK_PROXY 1
50 #define TASK_FULL 0
51 
52 #define KMP_CANCEL_THREADS
53 #define KMP_THREAD_ATTR
54 
55 // Android does not have pthread_cancel. Undefine KMP_CANCEL_THREADS if being
56 // built on Android
57 #if defined(__ANDROID__)
58 #undef KMP_CANCEL_THREADS
59 #endif
60 
61 #include <signal.h>
62 #include <stdarg.h>
63 #include <stddef.h>
64 #include <stdio.h>
65 #include <stdlib.h>
66 #include <string.h>
67 /* include <ctype.h> don't use; problems with /MD on Windows* OS NT due to bad
68  Microsoft library. Some macros provided below to replace these functions */
69 #ifndef __ABSOFT_WIN
70 #include <sys/types.h>
71 #endif
72 #include <limits.h>
73 #include <time.h>
74 
75 #include <errno.h>
76 
77 #include "kmp_os.h"
78 
79 #include "kmp_safe_c_api.h"
80 
81 #if KMP_STATS_ENABLED
82 class kmp_stats_list;
83 #endif
84 
85 #if KMP_USE_HIER_SCHED
86 // Only include hierarchical scheduling if affinity is supported
87 #undef KMP_USE_HIER_SCHED
88 #define KMP_USE_HIER_SCHED KMP_AFFINITY_SUPPORTED
89 #endif
90 
91 #if KMP_USE_HWLOC && KMP_AFFINITY_SUPPORTED
92 #include "hwloc.h"
93 #ifndef HWLOC_OBJ_NUMANODE
94 #define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE
95 #endif
96 #ifndef HWLOC_OBJ_PACKAGE
97 #define HWLOC_OBJ_PACKAGE HWLOC_OBJ_SOCKET
98 #endif
99 #if HWLOC_API_VERSION >= 0x00020000
100 // hwloc 2.0 changed type of depth of object from unsigned to int
101 typedef int kmp_hwloc_depth_t;
102 #else
103 typedef unsigned int kmp_hwloc_depth_t;
104 #endif
105 #endif
106 
107 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
108 #include <xmmintrin.h>
109 #endif
110 
111 #include "kmp_debug.h"
112 #include "kmp_lock.h"
113 #include "kmp_version.h"
114 #if USE_DEBUGGER
115 #include "kmp_debugger.h"
116 #endif
117 #include "kmp_i18n.h"
118 
119 #define KMP_HANDLE_SIGNALS (KMP_OS_UNIX || KMP_OS_WINDOWS)
120 
121 #include "kmp_wrapper_malloc.h"
122 #if KMP_OS_UNIX
123 #include <unistd.h>
124 #if !defined NSIG && defined _NSIG
125 #define NSIG _NSIG
126 #endif
127 #endif
128 
129 #if KMP_OS_LINUX
130 #pragma weak clock_gettime
131 #endif
132 
133 #if OMPT_SUPPORT
134 #include "ompt-internal.h"
135 #endif
136 
137 #if OMP_50_ENABLED
138 // Affinity format function
139 #include "kmp_str.h"
140 #endif
141 
142 // 0 - no fast memory allocation, alignment: 8-byte on x86, 16-byte on x64.
143 // 3 - fast allocation using sync, non-sync free lists of any size, non-self
144 // free lists of limited size.
145 #ifndef USE_FAST_MEMORY
146 #define USE_FAST_MEMORY 3
147 #endif
148 
149 #ifndef KMP_NESTED_HOT_TEAMS
150 #define KMP_NESTED_HOT_TEAMS 0
151 #define USE_NESTED_HOT_ARG(x)
152 #else
153 #if KMP_NESTED_HOT_TEAMS
154 #if OMP_40_ENABLED
155 #define USE_NESTED_HOT_ARG(x) , x
156 #else
157 // Nested hot teams feature depends on omp 4.0, disable it for earlier versions
158 #undef KMP_NESTED_HOT_TEAMS
159 #define KMP_NESTED_HOT_TEAMS 0
160 #define USE_NESTED_HOT_ARG(x)
161 #endif
162 #else
163 #define USE_NESTED_HOT_ARG(x)
164 #endif
165 #endif
166 
167 // Assume using BGET compare_exchange instruction instead of lock by default.
168 #ifndef USE_CMP_XCHG_FOR_BGET
169 #define USE_CMP_XCHG_FOR_BGET 1
170 #endif
171 
172 // Test to see if queuing lock is better than bootstrap lock for bget
173 // #ifndef USE_QUEUING_LOCK_FOR_BGET
174 // #define USE_QUEUING_LOCK_FOR_BGET
175 // #endif
176 
177 #define KMP_NSEC_PER_SEC 1000000000L
178 #define KMP_USEC_PER_SEC 1000000L
179 
188 enum {
193  /* 0x04 is no longer used */
202  KMP_IDENT_BARRIER_IMPL_MASK = 0x01C0,
203  KMP_IDENT_BARRIER_IMPL_FOR = 0x0040,
204  KMP_IDENT_BARRIER_IMPL_SECTIONS = 0x00C0,
205 
206  KMP_IDENT_BARRIER_IMPL_SINGLE = 0x0140,
207  KMP_IDENT_BARRIER_IMPL_WORKSHARE = 0x01C0,
208 
220  KMP_IDENT_ATOMIC_HINT_UNCONTENDED = 0x010000,
221  KMP_IDENT_ATOMIC_HINT_CONTENDED = 0x020000,
222  KMP_IDENT_ATOMIC_HINT_NONSPECULATIVE = 0x040000,
223  KMP_IDENT_ATOMIC_HINT_SPECULATIVE = 0x080000,
224 };
225 
229 typedef struct ident {
230  kmp_int32 reserved_1;
231  kmp_int32 flags;
233  kmp_int32 reserved_2;
234 #if USE_ITT_BUILD
235 /* but currently used for storing region-specific ITT */
236 /* contextual information. */
237 #endif /* USE_ITT_BUILD */
238  kmp_int32 reserved_3;
239  char const *psource;
243 } ident_t;
248 // Some forward declarations.
249 typedef union kmp_team kmp_team_t;
250 typedef struct kmp_taskdata kmp_taskdata_t;
251 typedef union kmp_task_team kmp_task_team_t;
252 typedef union kmp_team kmp_team_p;
253 typedef union kmp_info kmp_info_p;
254 typedef union kmp_root kmp_root_p;
255 
256 #ifdef __cplusplus
257 extern "C" {
258 #endif
259 
260 /* ------------------------------------------------------------------------ */
261 
262 /* Pack two 32-bit signed integers into a 64-bit signed integer */
263 /* ToDo: Fix word ordering for big-endian machines. */
264 #define KMP_PACK_64(HIGH_32, LOW_32) \
265  ((kmp_int64)((((kmp_uint64)(HIGH_32)) << 32) | (kmp_uint64)(LOW_32)))
266 
267 // Generic string manipulation macros. Assume that _x is of type char *
268 #define SKIP_WS(_x) \
269  { \
270  while (*(_x) == ' ' || *(_x) == '\t') \
271  (_x)++; \
272  }
273 #define SKIP_DIGITS(_x) \
274  { \
275  while (*(_x) >= '0' && *(_x) <= '9') \
276  (_x)++; \
277  }
278 #define SKIP_TOKEN(_x) \
279  { \
280  while ((*(_x) >= '0' && *(_x) <= '9') || (*(_x) >= 'a' && *(_x) <= 'z') || \
281  (*(_x) >= 'A' && *(_x) <= 'Z') || *(_x) == '_') \
282  (_x)++; \
283  }
284 #define SKIP_TO(_x, _c) \
285  { \
286  while (*(_x) != '\0' && *(_x) != (_c)) \
287  (_x)++; \
288  }
289 
290 /* ------------------------------------------------------------------------ */
291 
292 #define KMP_MAX(x, y) ((x) > (y) ? (x) : (y))
293 #define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
294 
295 /* ------------------------------------------------------------------------ */
296 /* Enumeration types */
297 
298 enum kmp_state_timer {
299  ts_stop,
300  ts_start,
301  ts_pause,
302 
303  ts_last_state
304 };
305 
306 enum dynamic_mode {
307  dynamic_default,
308 #ifdef USE_LOAD_BALANCE
309  dynamic_load_balance,
310 #endif /* USE_LOAD_BALANCE */
311  dynamic_random,
312  dynamic_thread_limit,
313  dynamic_max
314 };
315 
316 /* external schedule constants, duplicate enum omp_sched in omp.h in order to
317  * not include it here */
318 #ifndef KMP_SCHED_TYPE_DEFINED
319 #define KMP_SCHED_TYPE_DEFINED
320 typedef enum kmp_sched {
321  kmp_sched_lower = 0, // lower and upper bounds are for routine parameter check
322  // Note: need to adjust __kmp_sch_map global array in case enum is changed
323  kmp_sched_static = 1, // mapped to kmp_sch_static_chunked (33)
324  kmp_sched_dynamic = 2, // mapped to kmp_sch_dynamic_chunked (35)
325  kmp_sched_guided = 3, // mapped to kmp_sch_guided_chunked (36)
326  kmp_sched_auto = 4, // mapped to kmp_sch_auto (38)
327  kmp_sched_upper_std = 5, // upper bound for standard schedules
328  kmp_sched_lower_ext = 100, // lower bound of Intel extension schedules
329  kmp_sched_trapezoidal = 101, // mapped to kmp_sch_trapezoidal (39)
330 #if KMP_STATIC_STEAL_ENABLED
331  kmp_sched_static_steal = 102, // mapped to kmp_sch_static_steal (44)
332 #endif
333  kmp_sched_upper,
334  kmp_sched_default = kmp_sched_static, // default scheduling
335  kmp_sched_monotonic = 0x80000000
336 } kmp_sched_t;
337 #endif
338 
343 enum sched_type : kmp_int32 {
345  kmp_sch_static_chunked = 33,
347  kmp_sch_dynamic_chunked = 35,
349  kmp_sch_runtime = 37,
351  kmp_sch_trapezoidal = 39,
352 
353  /* accessible only through KMP_SCHEDULE environment variable */
354  kmp_sch_static_greedy = 40,
355  kmp_sch_static_balanced = 41,
356  /* accessible only through KMP_SCHEDULE environment variable */
357  kmp_sch_guided_iterative_chunked = 42,
358  kmp_sch_guided_analytical_chunked = 43,
359  /* accessible only through KMP_SCHEDULE environment variable */
360  kmp_sch_static_steal = 44,
361 
362 #if OMP_45_ENABLED
363  /* static with chunk adjustment (e.g., simd) */
364  kmp_sch_static_balanced_chunked = 45,
365  kmp_sch_guided_simd = 46,
366  kmp_sch_runtime_simd = 47,
367 #endif
368 
369  /* accessible only through KMP_SCHEDULE environment variable */
373  kmp_ord_static_chunked = 65,
375  kmp_ord_dynamic_chunked = 67,
376  kmp_ord_guided_chunked = 68,
377  kmp_ord_runtime = 69,
379  kmp_ord_trapezoidal = 71,
382 #if OMP_40_ENABLED
383  /* Schedules for Distribute construct */
386 #endif
387 
388  /* For the "nomerge" versions, kmp_dispatch_next*() will always return a
389  single iteration/chunk, even if the loop is serialized. For the schedule
390  types listed above, the entire iteration vector is returned if the loop is
391  serialized. This doesn't work for gcc/gcomp sections. */
392  kmp_nm_lower = 160,
394  kmp_nm_static_chunked =
395  (kmp_sch_static_chunked - kmp_sch_lower + kmp_nm_lower),
397  kmp_nm_dynamic_chunked = 163,
399  kmp_nm_runtime = 165,
400  kmp_nm_auto = 166,
401  kmp_nm_trapezoidal = 167,
402 
403  /* accessible only through KMP_SCHEDULE environment variable */
404  kmp_nm_static_greedy = 168,
405  kmp_nm_static_balanced = 169,
406  /* accessible only through KMP_SCHEDULE environment variable */
407  kmp_nm_guided_iterative_chunked = 170,
408  kmp_nm_guided_analytical_chunked = 171,
409  kmp_nm_static_steal =
410  172, /* accessible only through OMP_SCHEDULE environment variable */
411 
412  kmp_nm_ord_static_chunked = 193,
414  kmp_nm_ord_dynamic_chunked = 195,
415  kmp_nm_ord_guided_chunked = 196,
416  kmp_nm_ord_runtime = 197,
418  kmp_nm_ord_trapezoidal = 199,
421 #if OMP_45_ENABLED
422  /* Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. Since
423  we need to distinguish the three possible cases (no modifier, monotonic
424  modifier, nonmonotonic modifier), we need separate bits for each modifier.
425  The absence of monotonic does not imply nonmonotonic, especially since 4.5
426  says that the behaviour of the "no modifier" case is implementation defined
427  in 4.5, but will become "nonmonotonic" in 5.0.
428 
429  Since we're passing a full 32 bit value, we can use a couple of high bits
430  for these flags; out of paranoia we avoid the sign bit.
431 
432  These modifiers can be or-ed into non-static schedules by the compiler to
433  pass the additional information. They will be stripped early in the
434  processing in __kmp_dispatch_init when setting up schedules, so most of the
435  code won't ever see schedules with these bits set. */
436  kmp_sch_modifier_monotonic =
437  (1 << 29),
438  kmp_sch_modifier_nonmonotonic =
439  (1 << 30),
441 #define SCHEDULE_WITHOUT_MODIFIERS(s) \
442  (enum sched_type)( \
443  (s) & ~(kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic))
444 #define SCHEDULE_HAS_MONOTONIC(s) (((s)&kmp_sch_modifier_monotonic) != 0)
445 #define SCHEDULE_HAS_NONMONOTONIC(s) (((s)&kmp_sch_modifier_nonmonotonic) != 0)
446 #define SCHEDULE_HAS_NO_MODIFIERS(s) \
447  (((s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)) == 0)
448 #define SCHEDULE_GET_MODIFIERS(s) \
449  ((enum sched_type)( \
450  (s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)))
451 #define SCHEDULE_SET_MODIFIERS(s, m) \
452  (s = (enum sched_type)((kmp_int32)s | (kmp_int32)m))
453 #else
454 /* By doing this we hope to avoid multiple tests on OMP_45_ENABLED. Compilers
455  can now eliminate tests on compile time constants and dead code that results
456  from them, so we can leave code guarded by such an if in place. */
457 #define SCHEDULE_WITHOUT_MODIFIERS(s) (s)
458 #define SCHEDULE_HAS_MONOTONIC(s) false
459 #define SCHEDULE_HAS_NONMONOTONIC(s) false
460 #define SCHEDULE_HAS_NO_MODIFIERS(s) true
461 #define SCHEDULE_GET_MODIFIERS(s) ((enum sched_type)0)
462 #define SCHEDULE_SET_MODIFIERS(s, m) /* Nothing */
463 #endif
464 #define SCHEDULE_NONMONOTONIC 0
465 #define SCHEDULE_MONOTONIC 1
466 
468 };
469 
470 // Apply modifiers on internal kind to standard kind
471 static inline void
472 __kmp_sched_apply_mods_stdkind(kmp_sched_t *kind,
473  enum sched_type internal_kind) {
474 #if OMP_50_ENABLED
475  if (SCHEDULE_HAS_MONOTONIC(internal_kind)) {
476  *kind = (kmp_sched_t)((int)*kind | (int)kmp_sched_monotonic);
477  }
478 #endif
479 }
480 
481 // Apply modifiers on standard kind to internal kind
482 static inline void
483 __kmp_sched_apply_mods_intkind(kmp_sched_t kind,
484  enum sched_type *internal_kind) {
485 #if OMP_50_ENABLED
486  if ((int)kind & (int)kmp_sched_monotonic) {
487  *internal_kind = (enum sched_type)((int)*internal_kind |
488  (int)kmp_sch_modifier_monotonic);
489  }
490 #endif
491 }
492 
493 // Get standard schedule without modifiers
494 static inline kmp_sched_t __kmp_sched_without_mods(kmp_sched_t kind) {
495 #if OMP_50_ENABLED
496  return (kmp_sched_t)((int)kind & ~((int)kmp_sched_monotonic));
497 #else
498  return kind;
499 #endif
500 }
501 
502 /* Type to keep runtime schedule set via OMP_SCHEDULE or omp_set_schedule() */
503 typedef union kmp_r_sched {
504  struct {
505  enum sched_type r_sched_type;
506  int chunk;
507  };
508  kmp_int64 sched;
509 } kmp_r_sched_t;
510 
511 extern enum sched_type __kmp_sch_map[]; // map OMP 3.0 schedule types with our
512 // internal schedule types
513 
514 enum library_type {
515  library_none,
516  library_serial,
517  library_turnaround,
518  library_throughput
519 };
520 
521 #if KMP_OS_LINUX
522 enum clock_function_type {
523  clock_function_gettimeofday,
524  clock_function_clock_gettime
525 };
526 #endif /* KMP_OS_LINUX */
527 
528 #if KMP_MIC_SUPPORTED
529 enum mic_type { non_mic, mic1, mic2, mic3, dummy };
530 #endif
531 
532 /* -- fast reduction stuff ------------------------------------------------ */
533 
534 #undef KMP_FAST_REDUCTION_BARRIER
535 #define KMP_FAST_REDUCTION_BARRIER 1
536 
537 #undef KMP_FAST_REDUCTION_CORE_DUO
538 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
539 #define KMP_FAST_REDUCTION_CORE_DUO 1
540 #endif
541 
542 enum _reduction_method {
543  reduction_method_not_defined = 0,
544  critical_reduce_block = (1 << 8),
545  atomic_reduce_block = (2 << 8),
546  tree_reduce_block = (3 << 8),
547  empty_reduce_block = (4 << 8)
548 };
549 
550 // Description of the packed_reduction_method variable:
551 // The packed_reduction_method variable consists of two enum types variables
552 // that are packed together into 0-th byte and 1-st byte:
553 // 0: (packed_reduction_method & 0x000000FF) is a 'enum barrier_type' value of
554 // barrier that will be used in fast reduction: bs_plain_barrier or
555 // bs_reduction_barrier
556 // 1: (packed_reduction_method & 0x0000FF00) is a reduction method that will
557 // be used in fast reduction;
558 // Reduction method is of 'enum _reduction_method' type and it's defined the way
559 // so that the bits of 0-th byte are empty, so no need to execute a shift
560 // instruction while packing/unpacking
561 
562 #if KMP_FAST_REDUCTION_BARRIER
563 #define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method, barrier_type) \
564  ((reduction_method) | (barrier_type))
565 
566 #define UNPACK_REDUCTION_METHOD(packed_reduction_method) \
567  ((enum _reduction_method)((packed_reduction_method) & (0x0000FF00)))
568 
569 #define UNPACK_REDUCTION_BARRIER(packed_reduction_method) \
570  ((enum barrier_type)((packed_reduction_method) & (0x000000FF)))
571 #else
572 #define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method, barrier_type) \
573  (reduction_method)
574 
575 #define UNPACK_REDUCTION_METHOD(packed_reduction_method) \
576  (packed_reduction_method)
577 
578 #define UNPACK_REDUCTION_BARRIER(packed_reduction_method) (bs_plain_barrier)
579 #endif
580 
581 #define TEST_REDUCTION_METHOD(packed_reduction_method, which_reduction_block) \
582  ((UNPACK_REDUCTION_METHOD(packed_reduction_method)) == \
583  (which_reduction_block))
584 
585 #if KMP_FAST_REDUCTION_BARRIER
586 #define TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER \
587  (PACK_REDUCTION_METHOD_AND_BARRIER(tree_reduce_block, bs_reduction_barrier))
588 
589 #define TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER \
590  (PACK_REDUCTION_METHOD_AND_BARRIER(tree_reduce_block, bs_plain_barrier))
591 #endif
592 
593 typedef int PACKED_REDUCTION_METHOD_T;
594 
595 /* -- end of fast reduction stuff ----------------------------------------- */
596 
597 #if KMP_OS_WINDOWS
598 #define USE_CBLKDATA
599 #if KMP_MSVC_COMPAT
600 #pragma warning(push)
601 #pragma warning(disable : 271 310)
602 #endif
603 #include <windows.h>
604 #if KMP_MSVC_COMPAT
605 #pragma warning(pop)
606 #endif
607 #endif
608 
609 #if KMP_OS_UNIX
610 #include <dlfcn.h>
611 #include <pthread.h>
612 #endif
613 
614 /* Only Linux* OS and Windows* OS support thread affinity. */
615 #if KMP_AFFINITY_SUPPORTED
616 
617 // GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later).
618 #if KMP_OS_WINDOWS
619 #if _MSC_VER < 1600 && KMP_MSVC_COMPAT
620 typedef struct GROUP_AFFINITY {
621  KAFFINITY Mask;
622  WORD Group;
623  WORD Reserved[3];
624 } GROUP_AFFINITY;
625 #endif /* _MSC_VER < 1600 */
626 #if KMP_GROUP_AFFINITY
627 extern int __kmp_num_proc_groups;
628 #else
629 static const int __kmp_num_proc_groups = 1;
630 #endif /* KMP_GROUP_AFFINITY */
631 typedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD);
632 extern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount;
633 
634 typedef WORD (*kmp_GetActiveProcessorGroupCount_t)(void);
635 extern kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount;
636 
637 typedef BOOL (*kmp_GetThreadGroupAffinity_t)(HANDLE, GROUP_AFFINITY *);
638 extern kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity;
639 
640 typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *,
641  GROUP_AFFINITY *);
642 extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity;
643 #endif /* KMP_OS_WINDOWS */
644 
645 #if KMP_USE_HWLOC
646 extern hwloc_topology_t __kmp_hwloc_topology;
647 extern int __kmp_hwloc_error;
648 extern int __kmp_numa_detected;
649 extern int __kmp_tile_depth;
650 #endif
651 
652 extern size_t __kmp_affin_mask_size;
653 #define KMP_AFFINITY_CAPABLE() (__kmp_affin_mask_size > 0)
654 #define KMP_AFFINITY_DISABLE() (__kmp_affin_mask_size = 0)
655 #define KMP_AFFINITY_ENABLE(mask_size) (__kmp_affin_mask_size = mask_size)
656 #define KMP_CPU_SET_ITERATE(i, mask) \
657  for (i = (mask)->begin(); (int)i != (mask)->end(); i = (mask)->next(i))
658 #define KMP_CPU_SET(i, mask) (mask)->set(i)
659 #define KMP_CPU_ISSET(i, mask) (mask)->is_set(i)
660 #define KMP_CPU_CLR(i, mask) (mask)->clear(i)
661 #define KMP_CPU_ZERO(mask) (mask)->zero()
662 #define KMP_CPU_COPY(dest, src) (dest)->copy(src)
663 #define KMP_CPU_AND(dest, src) (dest)->bitwise_and(src)
664 #define KMP_CPU_COMPLEMENT(max_bit_number, mask) (mask)->bitwise_not()
665 #define KMP_CPU_UNION(dest, src) (dest)->bitwise_or(src)
666 #define KMP_CPU_ALLOC(ptr) (ptr = __kmp_affinity_dispatch->allocate_mask())
667 #define KMP_CPU_FREE(ptr) __kmp_affinity_dispatch->deallocate_mask(ptr)
668 #define KMP_CPU_ALLOC_ON_STACK(ptr) KMP_CPU_ALLOC(ptr)
669 #define KMP_CPU_FREE_FROM_STACK(ptr) KMP_CPU_FREE(ptr)
670 #define KMP_CPU_INTERNAL_ALLOC(ptr) KMP_CPU_ALLOC(ptr)
671 #define KMP_CPU_INTERNAL_FREE(ptr) KMP_CPU_FREE(ptr)
672 #define KMP_CPU_INDEX(arr, i) __kmp_affinity_dispatch->index_mask_array(arr, i)
673 #define KMP_CPU_ALLOC_ARRAY(arr, n) \
674  (arr = __kmp_affinity_dispatch->allocate_mask_array(n))
675 #define KMP_CPU_FREE_ARRAY(arr, n) \
676  __kmp_affinity_dispatch->deallocate_mask_array(arr)
677 #define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) KMP_CPU_ALLOC_ARRAY(arr, n)
678 #define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) KMP_CPU_FREE_ARRAY(arr, n)
679 #define __kmp_get_system_affinity(mask, abort_bool) \
680  (mask)->get_system_affinity(abort_bool)
681 #define __kmp_set_system_affinity(mask, abort_bool) \
682  (mask)->set_system_affinity(abort_bool)
683 #define __kmp_get_proc_group(mask) (mask)->get_proc_group()
684 
685 class KMPAffinity {
686 public:
687  class Mask {
688  public:
689  void *operator new(size_t n);
690  void operator delete(void *p);
691  void *operator new[](size_t n);
692  void operator delete[](void *p);
693  virtual ~Mask() {}
694  // Set bit i to 1
695  virtual void set(int i) {}
696  // Return bit i
697  virtual bool is_set(int i) const { return false; }
698  // Set bit i to 0
699  virtual void clear(int i) {}
700  // Zero out entire mask
701  virtual void zero() {}
702  // Copy src into this mask
703  virtual void copy(const Mask *src) {}
704  // this &= rhs
705  virtual void bitwise_and(const Mask *rhs) {}
706  // this |= rhs
707  virtual void bitwise_or(const Mask *rhs) {}
708  // this = ~this
709  virtual void bitwise_not() {}
710  // API for iterating over an affinity mask
711  // for (int i = mask->begin(); i != mask->end(); i = mask->next(i))
712  virtual int begin() const { return 0; }
713  virtual int end() const { return 0; }
714  virtual int next(int previous) const { return 0; }
715  // Set the system's affinity to this affinity mask's value
716  virtual int set_system_affinity(bool abort_on_error) const { return -1; }
717  // Set this affinity mask to the current system affinity
718  virtual int get_system_affinity(bool abort_on_error) { return -1; }
719  // Only 1 DWORD in the mask should have any procs set.
720  // Return the appropriate index, or -1 for an invalid mask.
721  virtual int get_proc_group() const { return -1; }
722  };
723  void *operator new(size_t n);
724  void operator delete(void *p);
725  // Need virtual destructor
726  virtual ~KMPAffinity() = default;
727  // Determine if affinity is capable
728  virtual void determine_capable(const char *env_var) {}
729  // Bind the current thread to os proc
730  virtual void bind_thread(int proc) {}
731  // Factory functions to allocate/deallocate a mask
732  virtual Mask *allocate_mask() { return nullptr; }
733  virtual void deallocate_mask(Mask *m) {}
734  virtual Mask *allocate_mask_array(int num) { return nullptr; }
735  virtual void deallocate_mask_array(Mask *m) {}
736  virtual Mask *index_mask_array(Mask *m, int index) { return nullptr; }
737  static void pick_api();
738  static void destroy_api();
739  enum api_type {
740  NATIVE_OS
741 #if KMP_USE_HWLOC
742  ,
743  HWLOC
744 #endif
745  };
746  virtual api_type get_api_type() const {
747  KMP_ASSERT(0);
748  return NATIVE_OS;
749  }
750 
751 private:
752  static bool picked_api;
753 };
754 
755 typedef KMPAffinity::Mask kmp_affin_mask_t;
756 extern KMPAffinity *__kmp_affinity_dispatch;
757 
758 // Declare local char buffers with this size for printing debug and info
759 // messages, using __kmp_affinity_print_mask().
760 #define KMP_AFFIN_MASK_PRINT_LEN 1024
761 
762 enum affinity_type {
763  affinity_none = 0,
764  affinity_physical,
765  affinity_logical,
766  affinity_compact,
767  affinity_scatter,
768  affinity_explicit,
769  affinity_balanced,
770  affinity_disabled, // not used outsize the env var parser
771  affinity_default
772 };
773 
774 enum affinity_gran {
775  affinity_gran_fine = 0,
776  affinity_gran_thread,
777  affinity_gran_core,
778  affinity_gran_tile,
779  affinity_gran_numa,
780  affinity_gran_package,
781  affinity_gran_node,
782 #if KMP_GROUP_AFFINITY
783  // The "group" granularity isn't necesssarily coarser than all of the
784  // other levels, but we put it last in the enum.
785  affinity_gran_group,
786 #endif /* KMP_GROUP_AFFINITY */
787  affinity_gran_default
788 };
789 
790 enum affinity_top_method {
791  affinity_top_method_all = 0, // try all (supported) methods, in order
792 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
793  affinity_top_method_apicid,
794  affinity_top_method_x2apicid,
795 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
796  affinity_top_method_cpuinfo, // KMP_CPUINFO_FILE is usable on Windows* OS, too
797 #if KMP_GROUP_AFFINITY
798  affinity_top_method_group,
799 #endif /* KMP_GROUP_AFFINITY */
800  affinity_top_method_flat,
801 #if KMP_USE_HWLOC
802  affinity_top_method_hwloc,
803 #endif
804  affinity_top_method_default
805 };
806 
807 #define affinity_respect_mask_default (-1)
808 
809 extern enum affinity_type __kmp_affinity_type; /* Affinity type */
810 extern enum affinity_gran __kmp_affinity_gran; /* Affinity granularity */
811 extern int __kmp_affinity_gran_levels; /* corresponding int value */
812 extern int __kmp_affinity_dups; /* Affinity duplicate masks */
813 extern enum affinity_top_method __kmp_affinity_top_method;
814 extern int __kmp_affinity_compact; /* Affinity 'compact' value */
815 extern int __kmp_affinity_offset; /* Affinity offset value */
816 extern int __kmp_affinity_verbose; /* Was verbose specified for KMP_AFFINITY? */
817 extern int __kmp_affinity_warnings; /* KMP_AFFINITY warnings enabled ? */
818 extern int __kmp_affinity_respect_mask; // Respect process' init affinity mask?
819 extern char *__kmp_affinity_proclist; /* proc ID list */
820 extern kmp_affin_mask_t *__kmp_affinity_masks;
821 extern unsigned __kmp_affinity_num_masks;
822 extern void __kmp_affinity_bind_thread(int which);
823 
824 extern kmp_affin_mask_t *__kmp_affin_fullMask;
825 extern char *__kmp_cpuinfo_file;
826 
827 #endif /* KMP_AFFINITY_SUPPORTED */
828 
829 #if OMP_40_ENABLED
830 
831 // This needs to be kept in sync with the values in omp.h !!!
832 typedef enum kmp_proc_bind_t {
833  proc_bind_false = 0,
834  proc_bind_true,
835  proc_bind_master,
836  proc_bind_close,
837  proc_bind_spread,
838  proc_bind_intel, // use KMP_AFFINITY interface
839  proc_bind_default
840 } kmp_proc_bind_t;
841 
842 typedef struct kmp_nested_proc_bind_t {
843  kmp_proc_bind_t *bind_types;
844  int size;
845  int used;
846 } kmp_nested_proc_bind_t;
847 
848 extern kmp_nested_proc_bind_t __kmp_nested_proc_bind;
849 
850 #endif /* OMP_40_ENABLED */
851 
852 #if OMP_50_ENABLED
853 extern int __kmp_display_affinity;
854 extern char *__kmp_affinity_format;
855 static const size_t KMP_AFFINITY_FORMAT_SIZE = 512;
856 #endif // OMP_50_ENABLED
857 
858 #if KMP_AFFINITY_SUPPORTED
859 #define KMP_PLACE_ALL (-1)
860 #define KMP_PLACE_UNDEFINED (-2)
861 // Is KMP_AFFINITY is being used instead of OMP_PROC_BIND/OMP_PLACES?
862 #define KMP_AFFINITY_NON_PROC_BIND \
863  ((__kmp_nested_proc_bind.bind_types[0] == proc_bind_false || \
864  __kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) && \
865  (__kmp_affinity_num_masks > 0 || __kmp_affinity_type == affinity_balanced))
866 #endif /* KMP_AFFINITY_SUPPORTED */
867 
868 extern int __kmp_affinity_num_places;
869 
870 #if OMP_40_ENABLED
871 typedef enum kmp_cancel_kind_t {
872  cancel_noreq = 0,
873  cancel_parallel = 1,
874  cancel_loop = 2,
875  cancel_sections = 3,
876  cancel_taskgroup = 4
877 } kmp_cancel_kind_t;
878 #endif // OMP_40_ENABLED
879 
880 // KMP_HW_SUBSET support:
881 typedef struct kmp_hws_item {
882  int num;
883  int offset;
884 } kmp_hws_item_t;
885 
886 extern kmp_hws_item_t __kmp_hws_socket;
887 extern kmp_hws_item_t __kmp_hws_node;
888 extern kmp_hws_item_t __kmp_hws_tile;
889 extern kmp_hws_item_t __kmp_hws_core;
890 extern kmp_hws_item_t __kmp_hws_proc;
891 extern int __kmp_hws_requested;
892 extern int __kmp_hws_abs_flag; // absolute or per-item number requested
893 
894 /* ------------------------------------------------------------------------ */
895 
896 #define KMP_PAD(type, sz) \
897  (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1))
898 
899 // We need to avoid using -1 as a GTID as +1 is added to the gtid
900 // when storing it in a lock, and the value 0 is reserved.
901 #define KMP_GTID_DNE (-2) /* Does not exist */
902 #define KMP_GTID_SHUTDOWN (-3) /* Library is shutting down */
903 #define KMP_GTID_MONITOR (-4) /* Monitor thread ID */
904 #define KMP_GTID_UNKNOWN (-5) /* Is not known */
905 #define KMP_GTID_MIN (-6) /* Minimal gtid for low bound check in DEBUG */
906 
907 #if OMP_50_ENABLED
908 /* OpenMP 5.0 Memory Management support */
909 
910 #ifndef __OMP_H
911 // Duplicate type definitios from omp.h
912 typedef uintptr_t omp_uintptr_t;
913 
914 typedef enum {
915  OMP_ATK_THREADMODEL = 1,
916  OMP_ATK_ALIGNMENT = 2,
917  OMP_ATK_ACCESS = 3,
918  OMP_ATK_POOL_SIZE = 4,
919  OMP_ATK_FALLBACK = 5,
920  OMP_ATK_FB_DATA = 6,
921  OMP_ATK_PINNED = 7,
922  OMP_ATK_PARTITION = 8
923 } omp_alloctrait_key_t;
924 
925 typedef enum {
926  OMP_ATV_FALSE = 0,
927  OMP_ATV_TRUE = 1,
928  OMP_ATV_DEFAULT = 2,
929  OMP_ATV_CONTENDED = 3,
930  OMP_ATV_UNCONTENDED = 4,
931  OMP_ATV_SEQUENTIAL = 5,
932  OMP_ATV_PRIVATE = 6,
933  OMP_ATV_ALL = 7,
934  OMP_ATV_THREAD = 8,
935  OMP_ATV_PTEAM = 9,
936  OMP_ATV_CGROUP = 10,
937  OMP_ATV_DEFAULT_MEM_FB = 11,
938  OMP_ATV_NULL_FB = 12,
939  OMP_ATV_ABORT_FB = 13,
940  OMP_ATV_ALLOCATOR_FB = 14,
941  OMP_ATV_ENVIRONMENT = 15,
942  OMP_ATV_NEAREST = 16,
943  OMP_ATV_BLOCKED = 17,
944  OMP_ATV_INTERLEAVED = 18
945 } omp_alloctrait_value_t;
946 
947 typedef void *omp_memspace_handle_t;
948 extern omp_memspace_handle_t const omp_default_mem_space;
949 extern omp_memspace_handle_t const omp_large_cap_mem_space;
950 extern omp_memspace_handle_t const omp_const_mem_space;
951 extern omp_memspace_handle_t const omp_high_bw_mem_space;
952 extern omp_memspace_handle_t const omp_low_lat_mem_space;
953 
954 typedef struct {
955  omp_alloctrait_key_t key;
956  omp_uintptr_t value;
957 } omp_alloctrait_t;
958 
959 typedef void *omp_allocator_handle_t;
960 extern omp_allocator_handle_t const omp_null_allocator;
961 extern omp_allocator_handle_t const omp_default_mem_alloc;
962 extern omp_allocator_handle_t const omp_large_cap_mem_alloc;
963 extern omp_allocator_handle_t const omp_const_mem_alloc;
964 extern omp_allocator_handle_t const omp_high_bw_mem_alloc;
965 extern omp_allocator_handle_t const omp_low_lat_mem_alloc;
966 extern omp_allocator_handle_t const omp_cgroup_mem_alloc;
967 extern omp_allocator_handle_t const omp_pteam_mem_alloc;
968 extern omp_allocator_handle_t const omp_thread_mem_alloc;
969 extern omp_allocator_handle_t const kmp_max_mem_alloc;
970 extern omp_allocator_handle_t __kmp_def_allocator;
971 
972 // end of duplicate type definitios from omp.h
973 #endif
974 
975 extern int __kmp_memkind_available;
976 
977 typedef omp_memspace_handle_t kmp_memspace_t; // placeholder
978 
979 typedef struct kmp_allocator_t {
980  omp_memspace_handle_t memspace;
981  void **memkind; // pointer to memkind
982  int alignment;
983  omp_alloctrait_value_t fb;
984  kmp_allocator_t *fb_data;
985  kmp_uint64 pool_size;
986  kmp_uint64 pool_used;
987 } kmp_allocator_t;
988 
989 extern omp_allocator_handle_t __kmpc_init_allocator(int gtid,
990  omp_memspace_handle_t,
991  int ntraits,
992  omp_alloctrait_t traits[]);
993 extern void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t al);
994 extern void __kmpc_set_default_allocator(int gtid, omp_allocator_handle_t al);
995 extern omp_allocator_handle_t __kmpc_get_default_allocator(int gtid);
996 extern void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
997 extern void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
998 
999 extern void __kmp_init_memkind();
1000 extern void __kmp_fini_memkind();
1001 #endif // OMP_50_ENABLED
1002 
1003 /* ------------------------------------------------------------------------ */
1004 
1005 #define KMP_UINT64_MAX \
1006  (~((kmp_uint64)1 << ((sizeof(kmp_uint64) * (1 << 3)) - 1)))
1007 
1008 #define KMP_MIN_NTH 1
1009 
1010 #ifndef KMP_MAX_NTH
1011 #if defined(PTHREAD_THREADS_MAX) && PTHREAD_THREADS_MAX < INT_MAX
1012 #define KMP_MAX_NTH PTHREAD_THREADS_MAX
1013 #else
1014 #define KMP_MAX_NTH INT_MAX
1015 #endif
1016 #endif /* KMP_MAX_NTH */
1017 
1018 #ifdef PTHREAD_STACK_MIN
1019 #define KMP_MIN_STKSIZE PTHREAD_STACK_MIN
1020 #else
1021 #define KMP_MIN_STKSIZE ((size_t)(32 * 1024))
1022 #endif
1023 
1024 #define KMP_MAX_STKSIZE (~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)))
1025 
1026 #if KMP_ARCH_X86
1027 #define KMP_DEFAULT_STKSIZE ((size_t)(2 * 1024 * 1024))
1028 #elif KMP_ARCH_X86_64
1029 #define KMP_DEFAULT_STKSIZE ((size_t)(4 * 1024 * 1024))
1030 #define KMP_BACKUP_STKSIZE ((size_t)(2 * 1024 * 1024))
1031 #else
1032 #define KMP_DEFAULT_STKSIZE ((size_t)(1024 * 1024))
1033 #endif
1034 
1035 #define KMP_DEFAULT_MALLOC_POOL_INCR ((size_t)(1024 * 1024))
1036 #define KMP_MIN_MALLOC_POOL_INCR ((size_t)(4 * 1024))
1037 #define KMP_MAX_MALLOC_POOL_INCR \
1038  (~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)))
1039 
1040 #define KMP_MIN_STKOFFSET (0)
1041 #define KMP_MAX_STKOFFSET KMP_MAX_STKSIZE
1042 #if KMP_OS_DARWIN
1043 #define KMP_DEFAULT_STKOFFSET KMP_MIN_STKOFFSET
1044 #else
1045 #define KMP_DEFAULT_STKOFFSET CACHE_LINE
1046 #endif
1047 
1048 #define KMP_MIN_STKPADDING (0)
1049 #define KMP_MAX_STKPADDING (2 * 1024 * 1024)
1050 
1051 #define KMP_BLOCKTIME_MULTIPLIER \
1052  (1000) /* number of blocktime units per second */
1053 #define KMP_MIN_BLOCKTIME (0)
1054 #define KMP_MAX_BLOCKTIME \
1055  (INT_MAX) /* Must be this for "infinite" setting the work */
1056 #define KMP_DEFAULT_BLOCKTIME (200) /* __kmp_blocktime is in milliseconds */
1057 
1058 #if KMP_USE_MONITOR
1059 #define KMP_DEFAULT_MONITOR_STKSIZE ((size_t)(64 * 1024))
1060 #define KMP_MIN_MONITOR_WAKEUPS (1) // min times monitor wakes up per second
1061 #define KMP_MAX_MONITOR_WAKEUPS (1000) // max times monitor can wake up per sec
1062 
1063 /* Calculate new number of monitor wakeups for a specific block time based on
1064  previous monitor_wakeups. Only allow increasing number of wakeups */
1065 #define KMP_WAKEUPS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \
1066  (((blocktime) == KMP_MAX_BLOCKTIME) \
1067  ? (monitor_wakeups) \
1068  : ((blocktime) == KMP_MIN_BLOCKTIME) \
1069  ? KMP_MAX_MONITOR_WAKEUPS \
1070  : ((monitor_wakeups) > (KMP_BLOCKTIME_MULTIPLIER / (blocktime))) \
1071  ? (monitor_wakeups) \
1072  : (KMP_BLOCKTIME_MULTIPLIER) / (blocktime))
1073 
1074 /* Calculate number of intervals for a specific block time based on
1075  monitor_wakeups */
1076 #define KMP_INTERVALS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \
1077  (((blocktime) + (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) - 1) / \
1078  (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)))
1079 #else
1080 #define KMP_BLOCKTIME(team, tid) \
1081  (get__bt_set(team, tid) ? get__blocktime(team, tid) : __kmp_dflt_blocktime)
1082 #if KMP_OS_UNIX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
1083 // HW TSC is used to reduce overhead (clock tick instead of nanosecond).
1084 extern kmp_uint64 __kmp_ticks_per_msec;
1085 #if KMP_COMPILER_ICC
1086 #define KMP_NOW() ((kmp_uint64)_rdtsc())
1087 #else
1088 #define KMP_NOW() __kmp_hardware_timestamp()
1089 #endif
1090 #define KMP_NOW_MSEC() (KMP_NOW() / __kmp_ticks_per_msec)
1091 #define KMP_BLOCKTIME_INTERVAL(team, tid) \
1092  (KMP_BLOCKTIME(team, tid) * __kmp_ticks_per_msec)
1093 #define KMP_BLOCKING(goal, count) ((goal) > KMP_NOW())
1094 #else
1095 // System time is retrieved sporadically while blocking.
1096 extern kmp_uint64 __kmp_now_nsec();
1097 #define KMP_NOW() __kmp_now_nsec()
1098 #define KMP_NOW_MSEC() (KMP_NOW() / KMP_USEC_PER_SEC)
1099 #define KMP_BLOCKTIME_INTERVAL(team, tid) \
1100  (KMP_BLOCKTIME(team, tid) * KMP_USEC_PER_SEC)
1101 #define KMP_BLOCKING(goal, count) ((count) % 1000 != 0 || (goal) > KMP_NOW())
1102 #endif
1103 #endif // KMP_USE_MONITOR
1104 
1105 #define KMP_MIN_STATSCOLS 40
1106 #define KMP_MAX_STATSCOLS 4096
1107 #define KMP_DEFAULT_STATSCOLS 80
1108 
1109 #define KMP_MIN_INTERVAL 0
1110 #define KMP_MAX_INTERVAL (INT_MAX - 1)
1111 #define KMP_DEFAULT_INTERVAL 0
1112 
1113 #define KMP_MIN_CHUNK 1
1114 #define KMP_MAX_CHUNK (INT_MAX - 1)
1115 #define KMP_DEFAULT_CHUNK 1
1116 
1117 #define KMP_DFLT_DISP_NUM_BUFF 7
1118 #define KMP_MAX_ORDERED 8
1119 
1120 #define KMP_MAX_FIELDS 32
1121 
1122 #define KMP_MAX_BRANCH_BITS 31
1123 
1124 #define KMP_MAX_ACTIVE_LEVELS_LIMIT INT_MAX
1125 
1126 #define KMP_MAX_DEFAULT_DEVICE_LIMIT INT_MAX
1127 
1128 #define KMP_MAX_TASK_PRIORITY_LIMIT INT_MAX
1129 
1130 /* Minimum number of threads before switch to TLS gtid (experimentally
1131  determined) */
1132 /* josh TODO: what about OS X* tuning? */
1133 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1134 #define KMP_TLS_GTID_MIN 5
1135 #else
1136 #define KMP_TLS_GTID_MIN INT_MAX
1137 #endif
1138 
1139 #define KMP_MASTER_TID(tid) ((tid) == 0)
1140 #define KMP_WORKER_TID(tid) ((tid) != 0)
1141 
1142 #define KMP_MASTER_GTID(gtid) (__kmp_tid_from_gtid((gtid)) == 0)
1143 #define KMP_WORKER_GTID(gtid) (__kmp_tid_from_gtid((gtid)) != 0)
1144 #define KMP_INITIAL_GTID(gtid) ((gtid) == 0)
1145 
1146 #ifndef TRUE
1147 #define FALSE 0
1148 #define TRUE (!FALSE)
1149 #endif
1150 
1151 /* NOTE: all of the following constants must be even */
1152 
1153 #if KMP_OS_WINDOWS
1154 #define KMP_INIT_WAIT 64U /* initial number of spin-tests */
1155 #define KMP_NEXT_WAIT 32U /* susequent number of spin-tests */
1156 #elif KMP_OS_CNK
1157 #define KMP_INIT_WAIT 16U /* initial number of spin-tests */
1158 #define KMP_NEXT_WAIT 8U /* susequent number of spin-tests */
1159 #elif KMP_OS_LINUX
1160 #define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
1161 #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1162 #elif KMP_OS_DARWIN
1163 /* TODO: tune for KMP_OS_DARWIN */
1164 #define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
1165 #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1166 #elif KMP_OS_DRAGONFLY
1167 /* TODO: tune for KMP_OS_DRAGONFLY */
1168 #define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
1169 #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1170 #elif KMP_OS_FREEBSD
1171 /* TODO: tune for KMP_OS_FREEBSD */
1172 #define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
1173 #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1174 #elif KMP_OS_NETBSD
1175 /* TODO: tune for KMP_OS_NETBSD */
1176 #define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
1177 #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1178 #elif KMP_OS_HURD
1179 /* TODO: tune for KMP_OS_HURD */
1180 #define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
1181 #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1182 #elif KMP_OS_OPENBSD
1183 /* TODO: tune for KMP_OS_OPENBSD */
1184 #define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
1185 #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1186 #elif KMP_OS_KFREEBSD
1187 /* TODO: tune for KMP_OS_KFREEBSD */
1188 #define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
1189 #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1190 #endif
1191 
1192 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1193 typedef struct kmp_cpuid {
1194  kmp_uint32 eax;
1195  kmp_uint32 ebx;
1196  kmp_uint32 ecx;
1197  kmp_uint32 edx;
1198 } kmp_cpuid_t;
1199 
1200 typedef struct kmp_cpuinfo {
1201  int initialized; // If 0, other fields are not initialized.
1202  int signature; // CPUID(1).EAX
1203  int family; // CPUID(1).EAX[27:20]+CPUID(1).EAX[11:8] (Extended Family+Family)
1204  int model; // ( CPUID(1).EAX[19:16] << 4 ) + CPUID(1).EAX[7:4] ( ( Extended
1205  // Model << 4 ) + Model)
1206  int stepping; // CPUID(1).EAX[3:0] ( Stepping )
1207  int sse2; // 0 if SSE2 instructions are not supported, 1 otherwise.
1208  int rtm; // 0 if RTM instructions are not supported, 1 otherwise.
1209  int cpu_stackoffset;
1210  int apic_id;
1211  int physical_id;
1212  int logical_id;
1213  kmp_uint64 frequency; // Nominal CPU frequency in Hz.
1214  char name[3 * sizeof(kmp_cpuid_t)]; // CPUID(0x80000002,0x80000003,0x80000004)
1215 } kmp_cpuinfo_t;
1216 
1217 extern void __kmp_query_cpuid(kmp_cpuinfo_t *p);
1218 
1219 #if KMP_OS_UNIX
1220 // subleaf is only needed for cache and topology discovery and can be set to
1221 // zero in most cases
1222 static inline void __kmp_x86_cpuid(int leaf, int subleaf, struct kmp_cpuid *p) {
1223  __asm__ __volatile__("cpuid"
1224  : "=a"(p->eax), "=b"(p->ebx), "=c"(p->ecx), "=d"(p->edx)
1225  : "a"(leaf), "c"(subleaf));
1226 }
1227 // Load p into FPU control word
1228 static inline void __kmp_load_x87_fpu_control_word(const kmp_int16 *p) {
1229  __asm__ __volatile__("fldcw %0" : : "m"(*p));
1230 }
1231 // Store FPU control word into p
1232 static inline void __kmp_store_x87_fpu_control_word(kmp_int16 *p) {
1233  __asm__ __volatile__("fstcw %0" : "=m"(*p));
1234 }
1235 static inline void __kmp_clear_x87_fpu_status_word() {
1236 #if KMP_MIC
1237  // 32-bit protected mode x87 FPU state
1238  struct x87_fpu_state {
1239  unsigned cw;
1240  unsigned sw;
1241  unsigned tw;
1242  unsigned fip;
1243  unsigned fips;
1244  unsigned fdp;
1245  unsigned fds;
1246  };
1247  struct x87_fpu_state fpu_state = {0, 0, 0, 0, 0, 0, 0};
1248  __asm__ __volatile__("fstenv %0\n\t" // store FP env
1249  "andw $0x7f00, %1\n\t" // clear 0-7,15 bits of FP SW
1250  "fldenv %0\n\t" // load FP env back
1251  : "+m"(fpu_state), "+m"(fpu_state.sw));
1252 #else
1253  __asm__ __volatile__("fnclex");
1254 #endif // KMP_MIC
1255 }
1256 #if __SSE__
1257 static inline void __kmp_load_mxcsr(const kmp_uint32 *p) { _mm_setcsr(*p); }
1258 static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = _mm_getcsr(); }
1259 #else
1260 static inline void __kmp_load_mxcsr(const kmp_uint32 *p) {}
1261 static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = 0; }
1262 #endif
1263 #else
1264 // Windows still has these as external functions in assembly file
1265 extern void __kmp_x86_cpuid(int mode, int mode2, struct kmp_cpuid *p);
1266 extern void __kmp_load_x87_fpu_control_word(const kmp_int16 *p);
1267 extern void __kmp_store_x87_fpu_control_word(kmp_int16 *p);
1268 extern void __kmp_clear_x87_fpu_status_word();
1269 static inline void __kmp_load_mxcsr(const kmp_uint32 *p) { _mm_setcsr(*p); }
1270 static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = _mm_getcsr(); }
1271 #endif // KMP_OS_UNIX
1272 
1273 #define KMP_X86_MXCSR_MASK 0xffffffc0 /* ignore status flags (6 lsb) */
1274 
1275 #if KMP_ARCH_X86
1276 extern void __kmp_x86_pause(void);
1277 #elif KMP_MIC
1278 // Performance testing on KNC (C0QS-7120 P/A/X/D, 61-core, 16 GB Memory) showed
1279 // regression after removal of extra PAUSE from spin loops. Changing
1280 // the delay from 100 to 300 showed even better performance than double PAUSE
1281 // on Spec OMP2001 and LCPC tasking tests, no regressions on EPCC.
1282 static inline void __kmp_x86_pause(void) { _mm_delay_32(300); }
1283 #else
1284 static inline void __kmp_x86_pause(void) { _mm_pause(); }
1285 #endif
1286 #define KMP_CPU_PAUSE() __kmp_x86_pause()
1287 #elif KMP_ARCH_PPC64
1288 #define KMP_PPC64_PRI_LOW() __asm__ volatile("or 1, 1, 1")
1289 #define KMP_PPC64_PRI_MED() __asm__ volatile("or 2, 2, 2")
1290 #define KMP_PPC64_PRI_LOC_MB() __asm__ volatile("" : : : "memory")
1291 #define KMP_CPU_PAUSE() \
1292  do { \
1293  KMP_PPC64_PRI_LOW(); \
1294  KMP_PPC64_PRI_MED(); \
1295  KMP_PPC64_PRI_LOC_MB(); \
1296  } while (0)
1297 #else
1298 #define KMP_CPU_PAUSE() /* nothing to do */
1299 #endif
1300 
1301 #define KMP_INIT_YIELD(count) \
1302  { (count) = __kmp_yield_init; }
1303 
1304 #define KMP_OVERSUBSCRIBED \
1305  (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))
1306 
1307 #define KMP_TRY_YIELD \
1308  ((__kmp_use_yield == 1) || (__kmp_use_yield == 2 && (KMP_OVERSUBSCRIBED)))
1309 
1310 #define KMP_TRY_YIELD_OVERSUB \
1311  ((__kmp_use_yield == 1 || __kmp_use_yield == 2) && (KMP_OVERSUBSCRIBED))
1312 
1313 #define KMP_YIELD(cond) \
1314  { \
1315  KMP_CPU_PAUSE(); \
1316  if ((cond) && (KMP_TRY_YIELD)) \
1317  __kmp_yield(); \
1318  }
1319 
1320 #define KMP_YIELD_OVERSUB() \
1321  { \
1322  KMP_CPU_PAUSE(); \
1323  if ((KMP_TRY_YIELD_OVERSUB)) \
1324  __kmp_yield(); \
1325  }
1326 
1327 // Note the decrement of 2 in the following Macros. With KMP_LIBRARY=turnaround,
1328 // there should be no yielding since initial value from KMP_INIT_YIELD() is odd.
1329 #define KMP_YIELD_SPIN(count) \
1330  { \
1331  KMP_CPU_PAUSE(); \
1332  if (KMP_TRY_YIELD) { \
1333  (count) -= 2; \
1334  if (!(count)) { \
1335  __kmp_yield(); \
1336  (count) = __kmp_yield_next; \
1337  } \
1338  } \
1339  }
1340 
1341 #define KMP_YIELD_OVERSUB_ELSE_SPIN(count) \
1342  { \
1343  KMP_CPU_PAUSE(); \
1344  if ((KMP_TRY_YIELD_OVERSUB)) \
1345  __kmp_yield(); \
1346  else if (__kmp_use_yield == 1) { \
1347  (count) -= 2; \
1348  if (!(count)) { \
1349  __kmp_yield(); \
1350  (count) = __kmp_yield_next; \
1351  } \
1352  } \
1353  }
1354 
1355 /* ------------------------------------------------------------------------ */
1356 /* Support datatypes for the orphaned construct nesting checks. */
1357 /* ------------------------------------------------------------------------ */
1358 
1359 enum cons_type {
1360  ct_none,
1361  ct_parallel,
1362  ct_pdo,
1363  ct_pdo_ordered,
1364  ct_psections,
1365  ct_psingle,
1366  ct_critical,
1367  ct_ordered_in_parallel,
1368  ct_ordered_in_pdo,
1369  ct_master,
1370  ct_reduce,
1371  ct_barrier
1372 };
1373 
1374 #define IS_CONS_TYPE_ORDERED(ct) ((ct) == ct_pdo_ordered)
1375 
1376 struct cons_data {
1377  ident_t const *ident;
1378  enum cons_type type;
1379  int prev;
1380  kmp_user_lock_p
1381  name; /* address exclusively for critical section name comparison */
1382 };
1383 
1384 struct cons_header {
1385  int p_top, w_top, s_top;
1386  int stack_size, stack_top;
1387  struct cons_data *stack_data;
1388 };
1389 
1390 struct kmp_region_info {
1391  char *text;
1392  int offset[KMP_MAX_FIELDS];
1393  int length[KMP_MAX_FIELDS];
1394 };
1395 
1396 /* ---------------------------------------------------------------------- */
1397 /* ---------------------------------------------------------------------- */
1398 
1399 #if KMP_OS_WINDOWS
1400 typedef HANDLE kmp_thread_t;
1401 typedef DWORD kmp_key_t;
1402 #endif /* KMP_OS_WINDOWS */
1403 
1404 #if KMP_OS_UNIX
1405 typedef pthread_t kmp_thread_t;
1406 typedef pthread_key_t kmp_key_t;
1407 #endif
1408 
1409 extern kmp_key_t __kmp_gtid_threadprivate_key;
1410 
1411 typedef struct kmp_sys_info {
1412  long maxrss; /* the maximum resident set size utilized (in kilobytes) */
1413  long minflt; /* the number of page faults serviced without any I/O */
1414  long majflt; /* the number of page faults serviced that required I/O */
1415  long nswap; /* the number of times a process was "swapped" out of memory */
1416  long inblock; /* the number of times the file system had to perform input */
1417  long oublock; /* the number of times the file system had to perform output */
1418  long nvcsw; /* the number of times a context switch was voluntarily */
1419  long nivcsw; /* the number of times a context switch was forced */
1420 } kmp_sys_info_t;
1421 
1422 #if USE_ITT_BUILD
1423 // We cannot include "kmp_itt.h" due to circular dependency. Declare the only
1424 // required type here. Later we will check the type meets requirements.
1425 typedef int kmp_itt_mark_t;
1426 #define KMP_ITT_DEBUG 0
1427 #endif /* USE_ITT_BUILD */
1428 
1429 typedef kmp_int32 kmp_critical_name[8];
1430 
1440 typedef void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid, ...);
1441 typedef void (*kmpc_micro_bound)(kmp_int32 *bound_tid, kmp_int32 *bound_nth,
1442  ...);
1443 
1448 /* ---------------------------------------------------------------------------
1449  */
1450 /* Threadprivate initialization/finalization function declarations */
1451 
1452 /* for non-array objects: __kmpc_threadprivate_register() */
1453 
1458 typedef void *(*kmpc_ctor)(void *);
1459 
1464 typedef void (*kmpc_dtor)(
1465  void * /*, size_t */); /* 2nd arg: magic number for KCC unused by Intel
1466  compiler */
1471 typedef void *(*kmpc_cctor)(void *, void *);
1472 
1473 /* for array objects: __kmpc_threadprivate_register_vec() */
1474 /* First arg: "this" pointer */
1475 /* Last arg: number of array elements */
1481 typedef void *(*kmpc_ctor_vec)(void *, size_t);
1487 typedef void (*kmpc_dtor_vec)(void *, size_t);
1493 typedef void *(*kmpc_cctor_vec)(void *, void *,
1494  size_t); /* function unused by compiler */
1495 
1500 /* keeps tracked of threadprivate cache allocations for cleanup later */
1501 typedef struct kmp_cached_addr {
1502  void **addr; /* address of allocated cache */
1503  void ***compiler_cache; /* pointer to compiler's cache */
1504  void *data; /* pointer to global data */
1505  struct kmp_cached_addr *next; /* pointer to next cached address */
1506 } kmp_cached_addr_t;
1507 
1508 struct private_data {
1509  struct private_data *next; /* The next descriptor in the list */
1510  void *data; /* The data buffer for this descriptor */
1511  int more; /* The repeat count for this descriptor */
1512  size_t size; /* The data size for this descriptor */
1513 };
1514 
1515 struct private_common {
1516  struct private_common *next;
1517  struct private_common *link;
1518  void *gbl_addr;
1519  void *par_addr; /* par_addr == gbl_addr for MASTER thread */
1520  size_t cmn_size;
1521 };
1522 
1523 struct shared_common {
1524  struct shared_common *next;
1525  struct private_data *pod_init;
1526  void *obj_init;
1527  void *gbl_addr;
1528  union {
1529  kmpc_ctor ctor;
1530  kmpc_ctor_vec ctorv;
1531  } ct;
1532  union {
1533  kmpc_cctor cctor;
1534  kmpc_cctor_vec cctorv;
1535  } cct;
1536  union {
1537  kmpc_dtor dtor;
1538  kmpc_dtor_vec dtorv;
1539  } dt;
1540  size_t vec_len;
1541  int is_vec;
1542  size_t cmn_size;
1543 };
1544 
1545 #define KMP_HASH_TABLE_LOG2 9 /* log2 of the hash table size */
1546 #define KMP_HASH_TABLE_SIZE \
1547  (1 << KMP_HASH_TABLE_LOG2) /* size of the hash table */
1548 #define KMP_HASH_SHIFT 3 /* throw away this many low bits from the address */
1549 #define KMP_HASH(x) \
1550  ((((kmp_uintptr_t)x) >> KMP_HASH_SHIFT) & (KMP_HASH_TABLE_SIZE - 1))
1551 
1552 struct common_table {
1553  struct private_common *data[KMP_HASH_TABLE_SIZE];
1554 };
1555 
1556 struct shared_table {
1557  struct shared_common *data[KMP_HASH_TABLE_SIZE];
1558 };
1559 
1560 /* ------------------------------------------------------------------------ */
1561 
1562 #if KMP_USE_HIER_SCHED
1563 // Shared barrier data that exists inside a single unit of the scheduling
1564 // hierarchy
1565 typedef struct kmp_hier_private_bdata_t {
1566  kmp_int32 num_active;
1567  kmp_uint64 index;
1568  kmp_uint64 wait_val[2];
1569 } kmp_hier_private_bdata_t;
1570 #endif
1571 
1572 typedef struct kmp_sched_flags {
1573  unsigned ordered : 1;
1574  unsigned nomerge : 1;
1575  unsigned contains_last : 1;
1576 #if KMP_USE_HIER_SCHED
1577  unsigned use_hier : 1;
1578  unsigned unused : 28;
1579 #else
1580  unsigned unused : 29;
1581 #endif
1582 } kmp_sched_flags_t;
1583 
1584 KMP_BUILD_ASSERT(sizeof(kmp_sched_flags_t) == 4);
1585 
1586 #if KMP_STATIC_STEAL_ENABLED
1587 typedef struct KMP_ALIGN_CACHE dispatch_private_info32 {
1588  kmp_int32 count;
1589  kmp_int32 ub;
1590  /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */
1591  kmp_int32 lb;
1592  kmp_int32 st;
1593  kmp_int32 tc;
1594  kmp_int32 static_steal_counter; /* for static_steal only; maybe better to put
1595  after ub */
1596 
1597  // KMP_ALIGN( 16 ) ensures ( if the KMP_ALIGN macro is turned on )
1598  // a) parm3 is properly aligned and
1599  // b) all parm1-4 are in the same cache line.
1600  // Because of parm1-4 are used together, performance seems to be better
1601  // if they are in the same line (not measured though).
1602 
1603  struct KMP_ALIGN(32) { // AC: changed 16 to 32 in order to simplify template
1604  kmp_int32 parm1; // structures in kmp_dispatch.cpp. This should
1605  kmp_int32 parm2; // make no real change at least while padding is off.
1606  kmp_int32 parm3;
1607  kmp_int32 parm4;
1608  };
1609 
1610  kmp_uint32 ordered_lower;
1611  kmp_uint32 ordered_upper;
1612 #if KMP_OS_WINDOWS
1613  // This var can be placed in the hole between 'tc' and 'parm1', instead of
1614  // 'static_steal_counter'. It would be nice to measure execution times.
1615  // Conditional if/endif can be removed at all.
1616  kmp_int32 last_upper;
1617 #endif /* KMP_OS_WINDOWS */
1618 } dispatch_private_info32_t;
1619 
1620 typedef struct KMP_ALIGN_CACHE dispatch_private_info64 {
1621  kmp_int64 count; // current chunk number for static & static-steal scheduling
1622  kmp_int64 ub; /* upper-bound */
1623  /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */
1624  kmp_int64 lb; /* lower-bound */
1625  kmp_int64 st; /* stride */
1626  kmp_int64 tc; /* trip count (number of iterations) */
1627  kmp_int64 static_steal_counter; /* for static_steal only; maybe better to put
1628  after ub */
1629 
1630  /* parm[1-4] are used in different ways by different scheduling algorithms */
1631 
1632  // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on )
1633  // a) parm3 is properly aligned and
1634  // b) all parm1-4 are in the same cache line.
1635  // Because of parm1-4 are used together, performance seems to be better
1636  // if they are in the same line (not measured though).
1637 
1638  struct KMP_ALIGN(32) {
1639  kmp_int64 parm1;
1640  kmp_int64 parm2;
1641  kmp_int64 parm3;
1642  kmp_int64 parm4;
1643  };
1644 
1645  kmp_uint64 ordered_lower;
1646  kmp_uint64 ordered_upper;
1647 #if KMP_OS_WINDOWS
1648  // This var can be placed in the hole between 'tc' and 'parm1', instead of
1649  // 'static_steal_counter'. It would be nice to measure execution times.
1650  // Conditional if/endif can be removed at all.
1651  kmp_int64 last_upper;
1652 #endif /* KMP_OS_WINDOWS */
1653 } dispatch_private_info64_t;
1654 #else /* KMP_STATIC_STEAL_ENABLED */
1655 typedef struct KMP_ALIGN_CACHE dispatch_private_info32 {
1656  kmp_int32 lb;
1657  kmp_int32 ub;
1658  kmp_int32 st;
1659  kmp_int32 tc;
1660 
1661  kmp_int32 parm1;
1662  kmp_int32 parm2;
1663  kmp_int32 parm3;
1664  kmp_int32 parm4;
1665 
1666  kmp_int32 count;
1667 
1668  kmp_uint32 ordered_lower;
1669  kmp_uint32 ordered_upper;
1670 #if KMP_OS_WINDOWS
1671  kmp_int32 last_upper;
1672 #endif /* KMP_OS_WINDOWS */
1673 } dispatch_private_info32_t;
1674 
1675 typedef struct KMP_ALIGN_CACHE dispatch_private_info64 {
1676  kmp_int64 lb; /* lower-bound */
1677  kmp_int64 ub; /* upper-bound */
1678  kmp_int64 st; /* stride */
1679  kmp_int64 tc; /* trip count (number of iterations) */
1680 
1681  /* parm[1-4] are used in different ways by different scheduling algorithms */
1682  kmp_int64 parm1;
1683  kmp_int64 parm2;
1684  kmp_int64 parm3;
1685  kmp_int64 parm4;
1686 
1687  kmp_int64 count; /* current chunk number for static scheduling */
1688 
1689  kmp_uint64 ordered_lower;
1690  kmp_uint64 ordered_upper;
1691 #if KMP_OS_WINDOWS
1692  kmp_int64 last_upper;
1693 #endif /* KMP_OS_WINDOWS */
1694 } dispatch_private_info64_t;
1695 #endif /* KMP_STATIC_STEAL_ENABLED */
1696 
1697 typedef struct KMP_ALIGN_CACHE dispatch_private_info {
1698  union private_info {
1699  dispatch_private_info32_t p32;
1700  dispatch_private_info64_t p64;
1701  } u;
1702  enum sched_type schedule; /* scheduling algorithm */
1703  kmp_sched_flags_t flags; /* flags (e.g., ordered, nomerge, etc.) */
1704  kmp_int32 ordered_bumped;
1705  // To retain the structure size after making ordered_iteration scalar
1706  kmp_int32 ordered_dummy[KMP_MAX_ORDERED - 3];
1707  // Stack of buffers for nest of serial regions
1708  struct dispatch_private_info *next;
1709  kmp_int32 type_size; /* the size of types in private_info */
1710 #if KMP_USE_HIER_SCHED
1711  kmp_int32 hier_id;
1712  void *parent; /* hierarchical scheduling parent pointer */
1713 #endif
1714  enum cons_type pushed_ws;
1715 } dispatch_private_info_t;
1716 
1717 typedef struct dispatch_shared_info32 {
1718  /* chunk index under dynamic, number of idle threads under static-steal;
1719  iteration index otherwise */
1720  volatile kmp_uint32 iteration;
1721  volatile kmp_uint32 num_done;
1722  volatile kmp_uint32 ordered_iteration;
1723  // Dummy to retain the structure size after making ordered_iteration scalar
1724  kmp_int32 ordered_dummy[KMP_MAX_ORDERED - 1];
1725 } dispatch_shared_info32_t;
1726 
1727 typedef struct dispatch_shared_info64 {
1728  /* chunk index under dynamic, number of idle threads under static-steal;
1729  iteration index otherwise */
1730  volatile kmp_uint64 iteration;
1731  volatile kmp_uint64 num_done;
1732  volatile kmp_uint64 ordered_iteration;
1733  // Dummy to retain the structure size after making ordered_iteration scalar
1734  kmp_int64 ordered_dummy[KMP_MAX_ORDERED - 3];
1735 } dispatch_shared_info64_t;
1736 
1737 typedef struct dispatch_shared_info {
1738  union shared_info {
1739  dispatch_shared_info32_t s32;
1740  dispatch_shared_info64_t s64;
1741  } u;
1742  volatile kmp_uint32 buffer_index;
1743 #if OMP_45_ENABLED
1744  volatile kmp_int32 doacross_buf_idx; // teamwise index
1745  volatile kmp_uint32 *doacross_flags; // shared array of iteration flags (0/1)
1746  kmp_int32 doacross_num_done; // count finished threads
1747 #endif
1748 #if KMP_USE_HIER_SCHED
1749  void *hier;
1750 #endif
1751 #if KMP_USE_HWLOC
1752  // When linking with libhwloc, the ORDERED EPCC test slows down on big
1753  // machines (> 48 cores). Performance analysis showed that a cache thrash
1754  // was occurring and this padding helps alleviate the problem.
1755  char padding[64];
1756 #endif
1757 } dispatch_shared_info_t;
1758 
1759 typedef struct kmp_disp {
1760  /* Vector for ORDERED SECTION */
1761  void (*th_deo_fcn)(int *gtid, int *cid, ident_t *);
1762  /* Vector for END ORDERED SECTION */
1763  void (*th_dxo_fcn)(int *gtid, int *cid, ident_t *);
1764 
1765  dispatch_shared_info_t *th_dispatch_sh_current;
1766  dispatch_private_info_t *th_dispatch_pr_current;
1767 
1768  dispatch_private_info_t *th_disp_buffer;
1769  kmp_int32 th_disp_index;
1770 #if OMP_45_ENABLED
1771  kmp_int32 th_doacross_buf_idx; // thread's doacross buffer index
1772  volatile kmp_uint32 *th_doacross_flags; // pointer to shared array of flags
1773  union { // we can use union here because doacross cannot be used in
1774  // nonmonotonic loops
1775  kmp_int64 *th_doacross_info; // info on loop bounds
1776  kmp_lock_t *th_steal_lock; // lock used for chunk stealing (8-byte variable)
1777  };
1778 #else
1779 #if KMP_STATIC_STEAL_ENABLED
1780  kmp_lock_t *th_steal_lock; // lock used for chunk stealing (8-byte variable)
1781  void *dummy_padding[1]; // make it 64 bytes on Intel(R) 64
1782 #else
1783  void *dummy_padding[2]; // make it 64 bytes on Intel(R) 64
1784 #endif
1785 #endif
1786 #if KMP_USE_INTERNODE_ALIGNMENT
1787  char more_padding[INTERNODE_CACHE_LINE];
1788 #endif
1789 } kmp_disp_t;
1790 
1791 /* ------------------------------------------------------------------------ */
1792 /* Barrier stuff */
1793 
1794 /* constants for barrier state update */
1795 #define KMP_INIT_BARRIER_STATE 0 /* should probably start from zero */
1796 #define KMP_BARRIER_SLEEP_BIT 0 /* bit used for suspend/sleep part of state */
1797 #define KMP_BARRIER_UNUSED_BIT 1 // bit that must never be set for valid state
1798 #define KMP_BARRIER_BUMP_BIT 2 /* lsb used for bump of go/arrived state */
1799 
1800 #define KMP_BARRIER_SLEEP_STATE (1 << KMP_BARRIER_SLEEP_BIT)
1801 #define KMP_BARRIER_UNUSED_STATE (1 << KMP_BARRIER_UNUSED_BIT)
1802 #define KMP_BARRIER_STATE_BUMP (1 << KMP_BARRIER_BUMP_BIT)
1803 
1804 #if (KMP_BARRIER_SLEEP_BIT >= KMP_BARRIER_BUMP_BIT)
1805 #error "Barrier sleep bit must be smaller than barrier bump bit"
1806 #endif
1807 #if (KMP_BARRIER_UNUSED_BIT >= KMP_BARRIER_BUMP_BIT)
1808 #error "Barrier unused bit must be smaller than barrier bump bit"
1809 #endif
1810 
1811 // Constants for release barrier wait state: currently, hierarchical only
1812 #define KMP_BARRIER_NOT_WAITING 0 // Normal state; worker not in wait_sleep
1813 #define KMP_BARRIER_OWN_FLAG \
1814  1 // Normal state; worker waiting on own b_go flag in release
1815 #define KMP_BARRIER_PARENT_FLAG \
1816  2 // Special state; worker waiting on parent's b_go flag in release
1817 #define KMP_BARRIER_SWITCH_TO_OWN_FLAG \
1818  3 // Special state; tells worker to shift from parent to own b_go
1819 #define KMP_BARRIER_SWITCHING \
1820  4 // Special state; worker resets appropriate flag on wake-up
1821 
1822 #define KMP_NOT_SAFE_TO_REAP \
1823  0 // Thread th_reap_state: not safe to reap (tasking)
1824 #define KMP_SAFE_TO_REAP 1 // Thread th_reap_state: safe to reap (not tasking)
1825 
1826 enum barrier_type {
1827  bs_plain_barrier = 0, /* 0, All non-fork/join barriers (except reduction
1828  barriers if enabled) */
1829  bs_forkjoin_barrier, /* 1, All fork/join (parallel region) barriers */
1830 #if KMP_FAST_REDUCTION_BARRIER
1831  bs_reduction_barrier, /* 2, All barriers that are used in reduction */
1832 #endif // KMP_FAST_REDUCTION_BARRIER
1833  bs_last_barrier /* Just a placeholder to mark the end */
1834 };
1835 
1836 // to work with reduction barriers just like with plain barriers
1837 #if !KMP_FAST_REDUCTION_BARRIER
1838 #define bs_reduction_barrier bs_plain_barrier
1839 #endif // KMP_FAST_REDUCTION_BARRIER
1840 
1841 typedef enum kmp_bar_pat { /* Barrier communication patterns */
1842  bp_linear_bar =
1843  0, /* Single level (degenerate) tree */
1844  bp_tree_bar =
1845  1, /* Balanced tree with branching factor 2^n */
1846  bp_hyper_bar =
1847  2, /* Hypercube-embedded tree with min branching
1848  factor 2^n */
1849  bp_hierarchical_bar = 3, /* Machine hierarchy tree */
1850  bp_last_bar /* Placeholder to mark the end */
1851 } kmp_bar_pat_e;
1852 
1853 #define KMP_BARRIER_ICV_PUSH 1
1854 
1855 /* Record for holding the values of the internal controls stack records */
1856 typedef struct kmp_internal_control {
1857  int serial_nesting_level; /* corresponds to the value of the
1858  th_team_serialized field */
1859  kmp_int8 dynamic; /* internal control for dynamic adjustment of threads (per
1860  thread) */
1861  kmp_int8
1862  bt_set; /* internal control for whether blocktime is explicitly set */
1863  int blocktime; /* internal control for blocktime */
1864 #if KMP_USE_MONITOR
1865  int bt_intervals; /* internal control for blocktime intervals */
1866 #endif
1867  int nproc; /* internal control for #threads for next parallel region (per
1868  thread) */
1869  int thread_limit; /* internal control for thread-limit-var */
1870  int max_active_levels; /* internal control for max_active_levels */
1871  kmp_r_sched_t
1872  sched; /* internal control for runtime schedule {sched,chunk} pair */
1873 #if OMP_40_ENABLED
1874  kmp_proc_bind_t proc_bind; /* internal control for affinity */
1875  kmp_int32 default_device; /* internal control for default device */
1876 #endif // OMP_40_ENABLED
1877  struct kmp_internal_control *next;
1878 } kmp_internal_control_t;
1879 
1880 static inline void copy_icvs(kmp_internal_control_t *dst,
1881  kmp_internal_control_t *src) {
1882  *dst = *src;
1883 }
1884 
1885 /* Thread barrier needs volatile barrier fields */
1886 typedef struct KMP_ALIGN_CACHE kmp_bstate {
1887  // th_fixed_icvs is aligned by virtue of kmp_bstate being aligned (and all
1888  // uses of it). It is not explicitly aligned below, because we *don't* want
1889  // it to be padded -- instead, we fit b_go into the same cache line with
1890  // th_fixed_icvs, enabling NGO cache lines stores in the hierarchical barrier.
1891  kmp_internal_control_t th_fixed_icvs; // Initial ICVs for the thread
1892  // Tuck b_go into end of th_fixed_icvs cache line, so it can be stored with
1893  // same NGO store
1894  volatile kmp_uint64 b_go; // STATE => task should proceed (hierarchical)
1895  KMP_ALIGN_CACHE volatile kmp_uint64
1896  b_arrived; // STATE => task reached synch point.
1897  kmp_uint32 *skip_per_level;
1898  kmp_uint32 my_level;
1899  kmp_int32 parent_tid;
1900  kmp_int32 old_tid;
1901  kmp_uint32 depth;
1902  struct kmp_bstate *parent_bar;
1903  kmp_team_t *team;
1904  kmp_uint64 leaf_state;
1905  kmp_uint32 nproc;
1906  kmp_uint8 base_leaf_kids;
1907  kmp_uint8 leaf_kids;
1908  kmp_uint8 offset;
1909  kmp_uint8 wait_flag;
1910  kmp_uint8 use_oncore_barrier;
1911 #if USE_DEBUGGER
1912  // The following field is intended for the debugger solely. Only the worker
1913  // thread itself accesses this field: the worker increases it by 1 when it
1914  // arrives to a barrier.
1915  KMP_ALIGN_CACHE kmp_uint b_worker_arrived;
1916 #endif /* USE_DEBUGGER */
1917 } kmp_bstate_t;
1918 
1919 union KMP_ALIGN_CACHE kmp_barrier_union {
1920  double b_align; /* use worst case alignment */
1921  char b_pad[KMP_PAD(kmp_bstate_t, CACHE_LINE)];
1922  kmp_bstate_t bb;
1923 };
1924 
1925 typedef union kmp_barrier_union kmp_balign_t;
1926 
1927 /* Team barrier needs only non-volatile arrived counter */
1928 union KMP_ALIGN_CACHE kmp_barrier_team_union {
1929  double b_align; /* use worst case alignment */
1930  char b_pad[CACHE_LINE];
1931  struct {
1932  kmp_uint64 b_arrived; /* STATE => task reached synch point. */
1933 #if USE_DEBUGGER
1934  // The following two fields are indended for the debugger solely. Only
1935  // master of the team accesses these fields: the first one is increased by
1936  // 1 when master arrives to a barrier, the second one is increased by one
1937  // when all the threads arrived.
1938  kmp_uint b_master_arrived;
1939  kmp_uint b_team_arrived;
1940 #endif
1941  };
1942 };
1943 
1944 typedef union kmp_barrier_team_union kmp_balign_team_t;
1945 
1946 /* Padding for Linux* OS pthreads condition variables and mutexes used to signal
1947  threads when a condition changes. This is to workaround an NPTL bug where
1948  padding was added to pthread_cond_t which caused the initialization routine
1949  to write outside of the structure if compiled on pre-NPTL threads. */
1950 #if KMP_OS_WINDOWS
1951 typedef struct kmp_win32_mutex {
1952  /* The Lock */
1953  CRITICAL_SECTION cs;
1954 } kmp_win32_mutex_t;
1955 
1956 typedef struct kmp_win32_cond {
1957  /* Count of the number of waiters. */
1958  int waiters_count_;
1959 
1960  /* Serialize access to <waiters_count_> */
1961  kmp_win32_mutex_t waiters_count_lock_;
1962 
1963  /* Number of threads to release via a <cond_broadcast> or a <cond_signal> */
1964  int release_count_;
1965 
1966  /* Keeps track of the current "generation" so that we don't allow */
1967  /* one thread to steal all the "releases" from the broadcast. */
1968  int wait_generation_count_;
1969 
1970  /* A manual-reset event that's used to block and release waiting threads. */
1971  HANDLE event_;
1972 } kmp_win32_cond_t;
1973 #endif
1974 
1975 #if KMP_OS_UNIX
1976 
1977 union KMP_ALIGN_CACHE kmp_cond_union {
1978  double c_align;
1979  char c_pad[CACHE_LINE];
1980  pthread_cond_t c_cond;
1981 };
1982 
1983 typedef union kmp_cond_union kmp_cond_align_t;
1984 
1985 union KMP_ALIGN_CACHE kmp_mutex_union {
1986  double m_align;
1987  char m_pad[CACHE_LINE];
1988  pthread_mutex_t m_mutex;
1989 };
1990 
1991 typedef union kmp_mutex_union kmp_mutex_align_t;
1992 
1993 #endif /* KMP_OS_UNIX */
1994 
1995 typedef struct kmp_desc_base {
1996  void *ds_stackbase;
1997  size_t ds_stacksize;
1998  int ds_stackgrow;
1999  kmp_thread_t ds_thread;
2000  volatile int ds_tid;
2001  int ds_gtid;
2002 #if KMP_OS_WINDOWS
2003  volatile int ds_alive;
2004  DWORD ds_thread_id;
2005 /* ds_thread keeps thread handle on Windows* OS. It is enough for RTL purposes.
2006  However, debugger support (libomp_db) cannot work with handles, because they
2007  uncomparable. For example, debugger requests info about thread with handle h.
2008  h is valid within debugger process, and meaningless within debugee process.
2009  Even if h is duped by call to DuplicateHandle(), so the result h' is valid
2010  within debugee process, but it is a *new* handle which does *not* equal to
2011  any other handle in debugee... The only way to compare handles is convert
2012  them to system-wide ids. GetThreadId() function is available only in
2013  Longhorn and Server 2003. :-( In contrast, GetCurrentThreadId() is available
2014  on all Windows* OS flavours (including Windows* 95). Thus, we have to get
2015  thread id by call to GetCurrentThreadId() from within the thread and save it
2016  to let libomp_db identify threads. */
2017 #endif /* KMP_OS_WINDOWS */
2018 } kmp_desc_base_t;
2019 
2020 typedef union KMP_ALIGN_CACHE kmp_desc {
2021  double ds_align; /* use worst case alignment */
2022  char ds_pad[KMP_PAD(kmp_desc_base_t, CACHE_LINE)];
2023  kmp_desc_base_t ds;
2024 } kmp_desc_t;
2025 
2026 typedef struct kmp_local {
2027  volatile int this_construct; /* count of single's encountered by thread */
2028  void *reduce_data;
2029 #if KMP_USE_BGET
2030  void *bget_data;
2031  void *bget_list;
2032 #if !USE_CMP_XCHG_FOR_BGET
2033 #ifdef USE_QUEUING_LOCK_FOR_BGET
2034  kmp_lock_t bget_lock; /* Lock for accessing bget free list */
2035 #else
2036  kmp_bootstrap_lock_t bget_lock; // Lock for accessing bget free list. Must be
2037 // bootstrap lock so we can use it at library
2038 // shutdown.
2039 #endif /* USE_LOCK_FOR_BGET */
2040 #endif /* ! USE_CMP_XCHG_FOR_BGET */
2041 #endif /* KMP_USE_BGET */
2042 
2043  PACKED_REDUCTION_METHOD_T
2044  packed_reduction_method; /* stored by __kmpc_reduce*(), used by
2045  __kmpc_end_reduce*() */
2046 
2047 } kmp_local_t;
2048 
2049 #define KMP_CHECK_UPDATE(a, b) \
2050  if ((a) != (b)) \
2051  (a) = (b)
2052 #define KMP_CHECK_UPDATE_SYNC(a, b) \
2053  if ((a) != (b)) \
2054  TCW_SYNC_PTR((a), (b))
2055 
2056 #define get__blocktime(xteam, xtid) \
2057  ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime)
2058 #define get__bt_set(xteam, xtid) \
2059  ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set)
2060 #if KMP_USE_MONITOR
2061 #define get__bt_intervals(xteam, xtid) \
2062  ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals)
2063 #endif
2064 
2065 #define get__dynamic_2(xteam, xtid) \
2066  ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.dynamic)
2067 #define get__nproc_2(xteam, xtid) \
2068  ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nproc)
2069 #define get__sched_2(xteam, xtid) \
2070  ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.sched)
2071 
2072 #define set__blocktime_team(xteam, xtid, xval) \
2073  (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime) = \
2074  (xval))
2075 
2076 #if KMP_USE_MONITOR
2077 #define set__bt_intervals_team(xteam, xtid, xval) \
2078  (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals) = \
2079  (xval))
2080 #endif
2081 
2082 #define set__bt_set_team(xteam, xtid, xval) \
2083  (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set) = (xval))
2084 
2085 #define set__dynamic(xthread, xval) \
2086  (((xthread)->th.th_current_task->td_icvs.dynamic) = (xval))
2087 #define get__dynamic(xthread) \
2088  (((xthread)->th.th_current_task->td_icvs.dynamic) ? (FTN_TRUE) : (FTN_FALSE))
2089 
2090 #define set__nproc(xthread, xval) \
2091  (((xthread)->th.th_current_task->td_icvs.nproc) = (xval))
2092 
2093 #define set__thread_limit(xthread, xval) \
2094  (((xthread)->th.th_current_task->td_icvs.thread_limit) = (xval))
2095 
2096 #define set__max_active_levels(xthread, xval) \
2097  (((xthread)->th.th_current_task->td_icvs.max_active_levels) = (xval))
2098 
2099 #define get__max_active_levels(xthread) \
2100  ((xthread)->th.th_current_task->td_icvs.max_active_levels)
2101 
2102 #define set__sched(xthread, xval) \
2103  (((xthread)->th.th_current_task->td_icvs.sched) = (xval))
2104 
2105 #if OMP_40_ENABLED
2106 
2107 #define set__proc_bind(xthread, xval) \
2108  (((xthread)->th.th_current_task->td_icvs.proc_bind) = (xval))
2109 #define get__proc_bind(xthread) \
2110  ((xthread)->th.th_current_task->td_icvs.proc_bind)
2111 
2112 #endif /* OMP_40_ENABLED */
2113 
2114 // OpenMP tasking data structures
2115 
2116 typedef enum kmp_tasking_mode {
2117  tskm_immediate_exec = 0,
2118  tskm_extra_barrier = 1,
2119  tskm_task_teams = 2,
2120  tskm_max = 2
2121 } kmp_tasking_mode_t;
2122 
2123 extern kmp_tasking_mode_t
2124  __kmp_tasking_mode; /* determines how/when to execute tasks */
2125 extern int __kmp_task_stealing_constraint;
2126 #if OMP_40_ENABLED
2127 extern kmp_int32 __kmp_default_device; // Set via OMP_DEFAULT_DEVICE if
2128 // specified, defaults to 0 otherwise
2129 #endif
2130 #if OMP_45_ENABLED
2131 // Set via OMP_MAX_TASK_PRIORITY if specified, defaults to 0 otherwise
2132 extern kmp_int32 __kmp_max_task_priority;
2133 // Set via KMP_TASKLOOP_MIN_TASKS if specified, defaults to 0 otherwise
2134 extern kmp_uint64 __kmp_taskloop_min_tasks;
2135 #endif
2136 
2137 /* NOTE: kmp_taskdata_t and kmp_task_t structures allocated in single block with
2138  taskdata first */
2139 #define KMP_TASK_TO_TASKDATA(task) (((kmp_taskdata_t *)task) - 1)
2140 #define KMP_TASKDATA_TO_TASK(taskdata) (kmp_task_t *)(taskdata + 1)
2141 
2142 // The tt_found_tasks flag is a signal to all threads in the team that tasks
2143 // were spawned and queued since the previous barrier release.
2144 #define KMP_TASKING_ENABLED(task_team) \
2145  (TCR_SYNC_4((task_team)->tt.tt_found_tasks) == TRUE)
2146 
2153 typedef kmp_int32 (*kmp_routine_entry_t)(kmp_int32, void *);
2154 
2155 #if OMP_40_ENABLED || OMP_45_ENABLED
2156 typedef union kmp_cmplrdata {
2157 #if OMP_45_ENABLED
2158  kmp_int32 priority;
2159 #endif // OMP_45_ENABLED
2160 #if OMP_40_ENABLED
2161  kmp_routine_entry_t
2162  destructors; /* pointer to function to invoke deconstructors of
2163  firstprivate C++ objects */
2164 #endif // OMP_40_ENABLED
2165  /* future data */
2166 } kmp_cmplrdata_t;
2167 #endif
2168 
2169 /* sizeof_kmp_task_t passed as arg to kmpc_omp_task call */
2172 typedef struct kmp_task { /* GEH: Shouldn't this be aligned somehow? */
2173  void *shareds;
2174  kmp_routine_entry_t
2175  routine;
2176  kmp_int32 part_id;
2177 #if OMP_40_ENABLED || OMP_45_ENABLED
2178  kmp_cmplrdata_t
2179  data1; /* Two known optional additions: destructors and priority */
2180  kmp_cmplrdata_t data2; /* Process destructors first, priority second */
2181 /* future data */
2182 #endif
2183  /* private vars */
2184 } kmp_task_t;
2185 
2190 #if OMP_40_ENABLED
2191 typedef struct kmp_taskgroup {
2192  std::atomic<kmp_int32> count; // number of allocated and incomplete tasks
2193  std::atomic<kmp_int32>
2194  cancel_request; // request for cancellation of this taskgroup
2195  struct kmp_taskgroup *parent; // parent taskgroup
2196 #if OMP_50_ENABLED
2197  // Block of data to perform task reduction
2198  void *reduce_data; // reduction related info
2199  kmp_int32 reduce_num_data; // number of data items to reduce
2200 #endif
2201 } kmp_taskgroup_t;
2202 
2203 // forward declarations
2204 typedef union kmp_depnode kmp_depnode_t;
2205 typedef struct kmp_depnode_list kmp_depnode_list_t;
2206 typedef struct kmp_dephash_entry kmp_dephash_entry_t;
2207 
2208 // Compiler sends us this info:
2209 typedef struct kmp_depend_info {
2210  kmp_intptr_t base_addr;
2211  size_t len;
2212  struct {
2213  bool in : 1;
2214  bool out : 1;
2215  bool mtx : 1;
2216  } flags;
2217 } kmp_depend_info_t;
2218 
2219 // Internal structures to work with task dependencies:
2220 struct kmp_depnode_list {
2221  kmp_depnode_t *node;
2222  kmp_depnode_list_t *next;
2223 };
2224 
2225 // Max number of mutexinoutset dependencies per node
2226 #define MAX_MTX_DEPS 4
2227 
2228 typedef struct kmp_base_depnode {
2229  kmp_depnode_list_t *successors; /* used under lock */
2230  kmp_task_t *task; /* non-NULL if depnode is active, used under lock */
2231  kmp_lock_t *mtx_locks[MAX_MTX_DEPS]; /* lock mutexinoutset dependent tasks */
2232  kmp_int32 mtx_num_locks; /* number of locks in mtx_locks array */
2233  kmp_lock_t lock; /* guards shared fields: task, successors */
2234 #if KMP_SUPPORT_GRAPH_OUTPUT
2235  kmp_uint32 id;
2236 #endif
2237  std::atomic<kmp_int32> npredecessors;
2238  std::atomic<kmp_int32> nrefs;
2239 } kmp_base_depnode_t;
2240 
2241 union KMP_ALIGN_CACHE kmp_depnode {
2242  double dn_align; /* use worst case alignment */
2243  char dn_pad[KMP_PAD(kmp_base_depnode_t, CACHE_LINE)];
2244  kmp_base_depnode_t dn;
2245 };
2246 
2247 struct kmp_dephash_entry {
2248  kmp_intptr_t addr;
2249  kmp_depnode_t *last_out;
2250  kmp_depnode_list_t *last_ins;
2251  kmp_depnode_list_t *last_mtxs;
2252  kmp_int32 last_flag;
2253  kmp_lock_t *mtx_lock; /* is referenced by depnodes w/mutexinoutset dep */
2254  kmp_dephash_entry_t *next_in_bucket;
2255 };
2256 
2257 typedef struct kmp_dephash {
2258  kmp_dephash_entry_t **buckets;
2259  size_t size;
2260 #ifdef KMP_DEBUG
2261  kmp_uint32 nelements;
2262  kmp_uint32 nconflicts;
2263 #endif
2264 } kmp_dephash_t;
2265 
2266 #if OMP_50_ENABLED
2267 typedef struct kmp_task_affinity_info {
2268  kmp_intptr_t base_addr;
2269  size_t len;
2270  struct {
2271  bool flag1 : 1;
2272  bool flag2 : 1;
2273  kmp_int32 reserved : 30;
2274  } flags;
2275 } kmp_task_affinity_info_t;
2276 #endif
2277 
2278 #endif
2279 
2280 #ifdef BUILD_TIED_TASK_STACK
2281 
2282 /* Tied Task stack definitions */
2283 typedef struct kmp_stack_block {
2284  kmp_taskdata_t *sb_block[TASK_STACK_BLOCK_SIZE];
2285  struct kmp_stack_block *sb_next;
2286  struct kmp_stack_block *sb_prev;
2287 } kmp_stack_block_t;
2288 
2289 typedef struct kmp_task_stack {
2290  kmp_stack_block_t ts_first_block; // first block of stack entries
2291  kmp_taskdata_t **ts_top; // pointer to the top of stack
2292  kmp_int32 ts_entries; // number of entries on the stack
2293 } kmp_task_stack_t;
2294 
2295 #endif // BUILD_TIED_TASK_STACK
2296 
2297 typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */
2298  /* Compiler flags */ /* Total compiler flags must be 16 bits */
2299  unsigned tiedness : 1; /* task is either tied (1) or untied (0) */
2300  unsigned final : 1; /* task is final(1) so execute immediately */
2301  unsigned merged_if0 : 1; /* no __kmpc_task_{begin/complete}_if0 calls in if0
2302  code path */
2303 #if OMP_40_ENABLED
2304  unsigned destructors_thunk : 1; /* set if the compiler creates a thunk to
2305  invoke destructors from the runtime */
2306 #if OMP_45_ENABLED
2307  unsigned proxy : 1; /* task is a proxy task (it will be executed outside the
2308  context of the RTL) */
2309  unsigned priority_specified : 1; /* set if the compiler provides priority
2310  setting for the task */
2311  unsigned reserved : 10; /* reserved for compiler use */
2312 #else
2313  unsigned reserved : 12; /* reserved for compiler use */
2314 #endif
2315 #else // OMP_40_ENABLED
2316  unsigned reserved : 13; /* reserved for compiler use */
2317 #endif // OMP_40_ENABLED
2318 
2319  /* Library flags */ /* Total library flags must be 16 bits */
2320  unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */
2321  unsigned task_serial : 1; // task is executed immediately (1) or deferred (0)
2322  unsigned tasking_ser : 1; // all tasks in team are either executed immediately
2323  // (1) or may be deferred (0)
2324  unsigned team_serial : 1; // entire team is serial (1) [1 thread] or parallel
2325  // (0) [>= 2 threads]
2326  /* If either team_serial or tasking_ser is set, task team may be NULL */
2327  /* Task State Flags: */
2328  unsigned started : 1; /* 1==started, 0==not started */
2329  unsigned executing : 1; /* 1==executing, 0==not executing */
2330  unsigned complete : 1; /* 1==complete, 0==not complete */
2331  unsigned freed : 1; /* 1==freed, 0==allocateed */
2332  unsigned native : 1; /* 1==gcc-compiled task, 0==intel */
2333  unsigned reserved31 : 7; /* reserved for library use */
2334 
2335 } kmp_tasking_flags_t;
2336 
2337 struct kmp_taskdata { /* aligned during dynamic allocation */
2338  kmp_int32 td_task_id; /* id, assigned by debugger */
2339  kmp_tasking_flags_t td_flags; /* task flags */
2340  kmp_team_t *td_team; /* team for this task */
2341  kmp_info_p *td_alloc_thread; /* thread that allocated data structures */
2342  /* Currently not used except for perhaps IDB */
2343  kmp_taskdata_t *td_parent; /* parent task */
2344  kmp_int32 td_level; /* task nesting level */
2345  std::atomic<kmp_int32> td_untied_count; // untied task active parts counter
2346  ident_t *td_ident; /* task identifier */
2347  // Taskwait data.
2348  ident_t *td_taskwait_ident;
2349  kmp_uint32 td_taskwait_counter;
2350  kmp_int32 td_taskwait_thread; /* gtid + 1 of thread encountered taskwait */
2351  KMP_ALIGN_CACHE kmp_internal_control_t
2352  td_icvs; /* Internal control variables for the task */
2353  KMP_ALIGN_CACHE std::atomic<kmp_int32>
2354  td_allocated_child_tasks; /* Child tasks (+ current task) not yet
2355  deallocated */
2356  std::atomic<kmp_int32>
2357  td_incomplete_child_tasks; /* Child tasks not yet complete */
2358 #if OMP_40_ENABLED
2359  kmp_taskgroup_t
2360  *td_taskgroup; // Each task keeps pointer to its current taskgroup
2361  kmp_dephash_t
2362  *td_dephash; // Dependencies for children tasks are tracked from here
2363  kmp_depnode_t
2364  *td_depnode; // Pointer to graph node if this task has dependencies
2365 #endif // OMP_40_ENABLED
2366 #if OMP_45_ENABLED
2367  kmp_task_team_t *td_task_team;
2368  kmp_int32 td_size_alloc; // The size of task structure, including shareds etc.
2369 #if defined(KMP_GOMP_COMPAT)
2370  // 4 or 8 byte integers for the loop bounds in GOMP_taskloop
2371  kmp_int32 td_size_loop_bounds;
2372 #endif
2373 #endif // OMP_45_ENABLED
2374  kmp_taskdata_t *td_last_tied; // keep tied task for task scheduling constraint
2375 #if defined(KMP_GOMP_COMPAT) && OMP_45_ENABLED
2376  // GOMP sends in a copy function for copy constructors
2377  void (*td_copy_func)(void *, void *);
2378 #endif
2379 #if OMPT_SUPPORT
2380  ompt_task_info_t ompt_task_info;
2381 #endif
2382 }; // struct kmp_taskdata
2383 
2384 // Make sure padding above worked
2385 KMP_BUILD_ASSERT(sizeof(kmp_taskdata_t) % sizeof(void *) == 0);
2386 
2387 // Data for task team but per thread
2388 typedef struct kmp_base_thread_data {
2389  kmp_info_p *td_thr; // Pointer back to thread info
2390  // Used only in __kmp_execute_tasks_template, maybe not avail until task is
2391  // queued?
2392  kmp_bootstrap_lock_t td_deque_lock; // Lock for accessing deque
2393  kmp_taskdata_t *
2394  *td_deque; // Deque of tasks encountered by td_thr, dynamically allocated
2395  kmp_int32 td_deque_size; // Size of deck
2396  kmp_uint32 td_deque_head; // Head of deque (will wrap)
2397  kmp_uint32 td_deque_tail; // Tail of deque (will wrap)
2398  kmp_int32 td_deque_ntasks; // Number of tasks in deque
2399  // GEH: shouldn't this be volatile since used in while-spin?
2400  kmp_int32 td_deque_last_stolen; // Thread number of last successful steal
2401 #ifdef BUILD_TIED_TASK_STACK
2402  kmp_task_stack_t td_susp_tied_tasks; // Stack of suspended tied tasks for task
2403 // scheduling constraint
2404 #endif // BUILD_TIED_TASK_STACK
2405 } kmp_base_thread_data_t;
2406 
2407 #define TASK_DEQUE_BITS 8 // Used solely to define INITIAL_TASK_DEQUE_SIZE
2408 #define INITIAL_TASK_DEQUE_SIZE (1 << TASK_DEQUE_BITS)
2409 
2410 #define TASK_DEQUE_SIZE(td) ((td).td_deque_size)
2411 #define TASK_DEQUE_MASK(td) ((td).td_deque_size - 1)
2412 
2413 typedef union KMP_ALIGN_CACHE kmp_thread_data {
2414  kmp_base_thread_data_t td;
2415  double td_align; /* use worst case alignment */
2416  char td_pad[KMP_PAD(kmp_base_thread_data_t, CACHE_LINE)];
2417 } kmp_thread_data_t;
2418 
2419 // Data for task teams which are used when tasking is enabled for the team
2420 typedef struct kmp_base_task_team {
2421  kmp_bootstrap_lock_t
2422  tt_threads_lock; /* Lock used to allocate per-thread part of task team */
2423  /* must be bootstrap lock since used at library shutdown*/
2424  kmp_task_team_t *tt_next; /* For linking the task team free list */
2425  kmp_thread_data_t
2426  *tt_threads_data; /* Array of per-thread structures for task team */
2427  /* Data survives task team deallocation */
2428  kmp_int32 tt_found_tasks; /* Have we found tasks and queued them while
2429  executing this team? */
2430  /* TRUE means tt_threads_data is set up and initialized */
2431  kmp_int32 tt_nproc; /* #threads in team */
2432  kmp_int32
2433  tt_max_threads; /* number of entries allocated for threads_data array */
2434 #if OMP_45_ENABLED
2435  kmp_int32
2436  tt_found_proxy_tasks; /* Have we found proxy tasks since last barrier */
2437 #endif
2438  kmp_int32 tt_untied_task_encountered;
2439 
2440  KMP_ALIGN_CACHE
2441  std::atomic<kmp_int32> tt_unfinished_threads; /* #threads still active */
2442 
2443  KMP_ALIGN_CACHE
2444  volatile kmp_uint32
2445  tt_active; /* is the team still actively executing tasks */
2446 } kmp_base_task_team_t;
2447 
2448 union KMP_ALIGN_CACHE kmp_task_team {
2449  kmp_base_task_team_t tt;
2450  double tt_align; /* use worst case alignment */
2451  char tt_pad[KMP_PAD(kmp_base_task_team_t, CACHE_LINE)];
2452 };
2453 
2454 #if (USE_FAST_MEMORY == 3) || (USE_FAST_MEMORY == 5)
2455 // Free lists keep same-size free memory slots for fast memory allocation
2456 // routines
2457 typedef struct kmp_free_list {
2458  void *th_free_list_self; // Self-allocated tasks free list
2459  void *th_free_list_sync; // Self-allocated tasks stolen/returned by other
2460  // threads
2461  void *th_free_list_other; // Non-self free list (to be returned to owner's
2462  // sync list)
2463 } kmp_free_list_t;
2464 #endif
2465 #if KMP_NESTED_HOT_TEAMS
2466 // Hot teams array keeps hot teams and their sizes for given thread. Hot teams
2467 // are not put in teams pool, and they don't put threads in threads pool.
2468 typedef struct kmp_hot_team_ptr {
2469  kmp_team_p *hot_team; // pointer to hot_team of given nesting level
2470  kmp_int32 hot_team_nth; // number of threads allocated for the hot_team
2471 } kmp_hot_team_ptr_t;
2472 #endif
2473 #if OMP_40_ENABLED
2474 typedef struct kmp_teams_size {
2475  kmp_int32 nteams; // number of teams in a league
2476  kmp_int32 nth; // number of threads in each team of the league
2477 } kmp_teams_size_t;
2478 #endif
2479 
2480 // This struct stores a thread that acts as a "root" for a contention
2481 // group. Contention groups are rooted at kmp_root threads, but also at
2482 // each master thread of each team created in the teams construct.
2483 // This struct therefore also stores a thread_limit associated with
2484 // that contention group, and a counter to track the number of threads
2485 // active in that contention group. Each thread has a list of these: CG
2486 // root threads have an entry in their list in which cg_root refers to
2487 // the thread itself, whereas other workers in the CG will have a
2488 // single entry where cg_root is same as the entry containing their CG
2489 // root. When a thread encounters a teams construct, it will add a new
2490 // entry to the front of its list, because it now roots a new CG.
2491 typedef struct kmp_cg_root {
2492  kmp_info_p *cg_root; // "root" thread for a contention group
2493  // The CG root's limit comes from OMP_THREAD_LIMIT for root threads, or
2494  // thread_limit clause for teams masters
2495  kmp_int32 cg_thread_limit;
2496  kmp_int32 cg_nthreads; // Count of active threads in CG rooted at cg_root
2497  struct kmp_cg_root *up; // pointer to higher level CG root in list
2498 } kmp_cg_root_t;
2499 
2500 // OpenMP thread data structures
2501 
2502 typedef struct KMP_ALIGN_CACHE kmp_base_info {
2503  /* Start with the readonly data which is cache aligned and padded. This is
2504  written before the thread starts working by the master. Uber masters may
2505  update themselves later. Usage does not consider serialized regions. */
2506  kmp_desc_t th_info;
2507  kmp_team_p *th_team; /* team we belong to */
2508  kmp_root_p *th_root; /* pointer to root of task hierarchy */
2509  kmp_info_p *th_next_pool; /* next available thread in the pool */
2510  kmp_disp_t *th_dispatch; /* thread's dispatch data */
2511  int th_in_pool; /* in thread pool (32 bits for TCR/TCW) */
2512 
2513  /* The following are cached from the team info structure */
2514  /* TODO use these in more places as determined to be needed via profiling */
2515  int th_team_nproc; /* number of threads in a team */
2516  kmp_info_p *th_team_master; /* the team's master thread */
2517  int th_team_serialized; /* team is serialized */
2518 #if OMP_40_ENABLED
2519  microtask_t th_teams_microtask; /* save entry address for teams construct */
2520  int th_teams_level; /* save initial level of teams construct */
2521 /* it is 0 on device but may be any on host */
2522 #endif
2523 
2524 /* The blocktime info is copied from the team struct to the thread sruct */
2525 /* at the start of a barrier, and the values stored in the team are used */
2526 /* at points in the code where the team struct is no longer guaranteed */
2527 /* to exist (from the POV of worker threads). */
2528 #if KMP_USE_MONITOR
2529  int th_team_bt_intervals;
2530  int th_team_bt_set;
2531 #else
2532  kmp_uint64 th_team_bt_intervals;
2533 #endif
2534 
2535 #if KMP_AFFINITY_SUPPORTED
2536  kmp_affin_mask_t *th_affin_mask; /* thread's current affinity mask */
2537 #endif
2538 #if OMP_50_ENABLED
2539  omp_allocator_handle_t th_def_allocator; /* default allocator */
2540 #endif
2541  /* The data set by the master at reinit, then R/W by the worker */
2542  KMP_ALIGN_CACHE int
2543  th_set_nproc; /* if > 0, then only use this request for the next fork */
2544 #if KMP_NESTED_HOT_TEAMS
2545  kmp_hot_team_ptr_t *th_hot_teams; /* array of hot teams */
2546 #endif
2547 #if OMP_40_ENABLED
2548  kmp_proc_bind_t
2549  th_set_proc_bind; /* if != proc_bind_default, use request for next fork */
2550  kmp_teams_size_t
2551  th_teams_size; /* number of teams/threads in teams construct */
2552 #if KMP_AFFINITY_SUPPORTED
2553  int th_current_place; /* place currently bound to */
2554  int th_new_place; /* place to bind to in par reg */
2555  int th_first_place; /* first place in partition */
2556  int th_last_place; /* last place in partition */
2557 #endif
2558 #endif
2559 #if OMP_50_ENABLED
2560  int th_prev_level; /* previous level for affinity format */
2561  int th_prev_num_threads; /* previous num_threads for affinity format */
2562 #endif
2563 #if USE_ITT_BUILD
2564  kmp_uint64 th_bar_arrive_time; /* arrival to barrier timestamp */
2565  kmp_uint64 th_bar_min_time; /* minimum arrival time at the barrier */
2566  kmp_uint64 th_frame_time; /* frame timestamp */
2567 #endif /* USE_ITT_BUILD */
2568  kmp_local_t th_local;
2569  struct private_common *th_pri_head;
2570 
2571  /* Now the data only used by the worker (after initial allocation) */
2572  /* TODO the first serial team should actually be stored in the info_t
2573  structure. this will help reduce initial allocation overhead */
2574  KMP_ALIGN_CACHE kmp_team_p
2575  *th_serial_team; /*serialized team held in reserve*/
2576 
2577 #if OMPT_SUPPORT
2578  ompt_thread_info_t ompt_thread_info;
2579 #endif
2580 
2581  /* The following are also read by the master during reinit */
2582  struct common_table *th_pri_common;
2583 
2584  volatile kmp_uint32 th_spin_here; /* thread-local location for spinning */
2585  /* while awaiting queuing lock acquire */
2586 
2587  volatile void *th_sleep_loc; // this points at a kmp_flag<T>
2588 
2589  ident_t *th_ident;
2590  unsigned th_x; // Random number generator data
2591  unsigned th_a; // Random number generator data
2592 
2593  /* Tasking-related data for the thread */
2594  kmp_task_team_t *th_task_team; // Task team struct
2595  kmp_taskdata_t *th_current_task; // Innermost Task being executed
2596  kmp_uint8 th_task_state; // alternating 0/1 for task team identification
2597  kmp_uint8 *th_task_state_memo_stack; // Stack holding memos of th_task_state
2598  // at nested levels
2599  kmp_uint32 th_task_state_top; // Top element of th_task_state_memo_stack
2600  kmp_uint32 th_task_state_stack_sz; // Size of th_task_state_memo_stack
2601  kmp_uint32 th_reap_state; // Non-zero indicates thread is not
2602  // tasking, thus safe to reap
2603 
2604  /* More stuff for keeping track of active/sleeping threads (this part is
2605  written by the worker thread) */
2606  kmp_uint8 th_active_in_pool; // included in count of #active threads in pool
2607  int th_active; // ! sleeping; 32 bits for TCR/TCW
2608  struct cons_header *th_cons; // used for consistency check
2609 #if KMP_USE_HIER_SCHED
2610  // used for hierarchical scheduling
2611  kmp_hier_private_bdata_t *th_hier_bar_data;
2612 #endif
2613 
2614  /* Add the syncronizing data which is cache aligned and padded. */
2615  KMP_ALIGN_CACHE kmp_balign_t th_bar[bs_last_barrier];
2616 
2617  KMP_ALIGN_CACHE volatile kmp_int32
2618  th_next_waiting; /* gtid+1 of next thread on lock wait queue, 0 if none */
2619 
2620 #if (USE_FAST_MEMORY == 3) || (USE_FAST_MEMORY == 5)
2621 #define NUM_LISTS 4
2622  kmp_free_list_t th_free_lists[NUM_LISTS]; // Free lists for fast memory
2623 // allocation routines
2624 #endif
2625 
2626 #if KMP_OS_WINDOWS
2627  kmp_win32_cond_t th_suspend_cv;
2628  kmp_win32_mutex_t th_suspend_mx;
2629  std::atomic<int> th_suspend_init;
2630 #endif
2631 #if KMP_OS_UNIX
2632  kmp_cond_align_t th_suspend_cv;
2633  kmp_mutex_align_t th_suspend_mx;
2634  std::atomic<int> th_suspend_init_count;
2635 #endif
2636 
2637 #if USE_ITT_BUILD
2638  kmp_itt_mark_t th_itt_mark_single;
2639 // alignment ???
2640 #endif /* USE_ITT_BUILD */
2641 #if KMP_STATS_ENABLED
2642  kmp_stats_list *th_stats;
2643 #endif
2644 #if KMP_OS_UNIX
2645  std::atomic<bool> th_blocking;
2646 #endif
2647  kmp_cg_root_t *th_cg_roots; // list of cg_roots associated with this thread
2648 } kmp_base_info_t;
2649 
2650 typedef union KMP_ALIGN_CACHE kmp_info {
2651  double th_align; /* use worst case alignment */
2652  char th_pad[KMP_PAD(kmp_base_info_t, CACHE_LINE)];
2653  kmp_base_info_t th;
2654 } kmp_info_t;
2655 
2656 // OpenMP thread team data structures
2657 
2658 typedef struct kmp_base_data { volatile kmp_uint32 t_value; } kmp_base_data_t;
2659 
2660 typedef union KMP_ALIGN_CACHE kmp_sleep_team {
2661  double dt_align; /* use worst case alignment */
2662  char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)];
2663  kmp_base_data_t dt;
2664 } kmp_sleep_team_t;
2665 
2666 typedef union KMP_ALIGN_CACHE kmp_ordered_team {
2667  double dt_align; /* use worst case alignment */
2668  char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)];
2669  kmp_base_data_t dt;
2670 } kmp_ordered_team_t;
2671 
2672 typedef int (*launch_t)(int gtid);
2673 
2674 /* Minimum number of ARGV entries to malloc if necessary */
2675 #define KMP_MIN_MALLOC_ARGV_ENTRIES 100
2676 
2677 // Set up how many argv pointers will fit in cache lines containing
2678 // t_inline_argv. Historically, we have supported at least 96 bytes. Using a
2679 // larger value for more space between the master write/worker read section and
2680 // read/write by all section seems to buy more performance on EPCC PARALLEL.
2681 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2682 #define KMP_INLINE_ARGV_BYTES \
2683  (4 * CACHE_LINE - \
2684  ((3 * KMP_PTR_SKIP + 2 * sizeof(int) + 2 * sizeof(kmp_int8) + \
2685  sizeof(kmp_int16) + sizeof(kmp_uint32)) % \
2686  CACHE_LINE))
2687 #else
2688 #define KMP_INLINE_ARGV_BYTES \
2689  (2 * CACHE_LINE - ((3 * KMP_PTR_SKIP + 2 * sizeof(int)) % CACHE_LINE))
2690 #endif
2691 #define KMP_INLINE_ARGV_ENTRIES (int)(KMP_INLINE_ARGV_BYTES / KMP_PTR_SKIP)
2692 
2693 typedef struct KMP_ALIGN_CACHE kmp_base_team {
2694  // Synchronization Data
2695  // ---------------------------------------------------------------------------
2696  KMP_ALIGN_CACHE kmp_ordered_team_t t_ordered;
2697  kmp_balign_team_t t_bar[bs_last_barrier];
2698  std::atomic<int> t_construct; // count of single directive encountered by team
2699  char pad[sizeof(kmp_lock_t)]; // padding to maintain performance on big iron
2700 
2701  // [0] - parallel / [1] - worksharing task reduction data shared by taskgroups
2702  std::atomic<void *> t_tg_reduce_data[2]; // to support task modifier
2703  std::atomic<int> t_tg_fini_counter[2]; // sync end of task reductions
2704 
2705  // Master only
2706  // ---------------------------------------------------------------------------
2707  KMP_ALIGN_CACHE int t_master_tid; // tid of master in parent team
2708  int t_master_this_cons; // "this_construct" single counter of master in parent
2709  // team
2710  ident_t *t_ident; // if volatile, have to change too much other crud to
2711  // volatile too
2712  kmp_team_p *t_parent; // parent team
2713  kmp_team_p *t_next_pool; // next free team in the team pool
2714  kmp_disp_t *t_dispatch; // thread's dispatch data
2715  kmp_task_team_t *t_task_team[2]; // Task team struct; switch between 2
2716 #if OMP_40_ENABLED
2717  kmp_proc_bind_t t_proc_bind; // bind type for par region
2718 #endif // OMP_40_ENABLED
2719 #if USE_ITT_BUILD
2720  kmp_uint64 t_region_time; // region begin timestamp
2721 #endif /* USE_ITT_BUILD */
2722 
2723  // Master write, workers read
2724  // --------------------------------------------------------------------------
2725  KMP_ALIGN_CACHE void **t_argv;
2726  int t_argc;
2727  int t_nproc; // number of threads in team
2728  microtask_t t_pkfn;
2729  launch_t t_invoke; // procedure to launch the microtask
2730 
2731 #if OMPT_SUPPORT
2732  ompt_team_info_t ompt_team_info;
2733  ompt_lw_taskteam_t *ompt_serialized_team_info;
2734 #endif
2735 
2736 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2737  kmp_int8 t_fp_control_saved;
2738  kmp_int8 t_pad2b;
2739  kmp_int16 t_x87_fpu_control_word; // FP control regs
2740  kmp_uint32 t_mxcsr;
2741 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
2742 
2743  void *t_inline_argv[KMP_INLINE_ARGV_ENTRIES];
2744 
2745  KMP_ALIGN_CACHE kmp_info_t **t_threads;
2746  kmp_taskdata_t
2747  *t_implicit_task_taskdata; // Taskdata for the thread's implicit task
2748  int t_level; // nested parallel level
2749 
2750  KMP_ALIGN_CACHE int t_max_argc;
2751  int t_max_nproc; // max threads this team can handle (dynamicly expandable)
2752  int t_serialized; // levels deep of serialized teams
2753  dispatch_shared_info_t *t_disp_buffer; // buffers for dispatch system
2754  int t_id; // team's id, assigned by debugger.
2755  int t_active_level; // nested active parallel level
2756  kmp_r_sched_t t_sched; // run-time schedule for the team
2757 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
2758  int t_first_place; // first & last place in parent thread's partition.
2759  int t_last_place; // Restore these values to master after par region.
2760 #endif // OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
2761 #if OMP_50_ENABLED
2762  int t_display_affinity;
2763 #endif
2764  int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via
2765 // omp_set_num_threads() call
2766 #if OMP_50_ENABLED
2767  omp_allocator_handle_t t_def_allocator; /* default allocator */
2768 #endif
2769 
2770 // Read/write by workers as well
2771 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
2772  // Using CACHE_LINE=64 reduces memory footprint, but causes a big perf
2773  // regression of epcc 'parallel' and 'barrier' on fxe256lin01. This extra
2774  // padding serves to fix the performance of epcc 'parallel' and 'barrier' when
2775  // CACHE_LINE=64. TODO: investigate more and get rid if this padding.
2776  char dummy_padding[1024];
2777 #endif
2778  // Internal control stack for additional nested teams.
2779  KMP_ALIGN_CACHE kmp_internal_control_t *t_control_stack_top;
2780 // for SERIALIZED teams nested 2 or more levels deep
2781 #if OMP_40_ENABLED
2782  // typed flag to store request state of cancellation
2783  std::atomic<kmp_int32> t_cancel_request;
2784 #endif
2785  int t_master_active; // save on fork, restore on join
2786  void *t_copypriv_data; // team specific pointer to copyprivate data array
2787 #if KMP_OS_WINDOWS
2788  std::atomic<kmp_uint32> t_copyin_counter;
2789 #endif
2790 #if USE_ITT_BUILD
2791  void *t_stack_id; // team specific stack stitching id (for ittnotify)
2792 #endif /* USE_ITT_BUILD */
2793 } kmp_base_team_t;
2794 
2795 union KMP_ALIGN_CACHE kmp_team {
2796  kmp_base_team_t t;
2797  double t_align; /* use worst case alignment */
2798  char t_pad[KMP_PAD(kmp_base_team_t, CACHE_LINE)];
2799 };
2800 
2801 typedef union KMP_ALIGN_CACHE kmp_time_global {
2802  double dt_align; /* use worst case alignment */
2803  char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)];
2804  kmp_base_data_t dt;
2805 } kmp_time_global_t;
2806 
2807 typedef struct kmp_base_global {
2808  /* cache-aligned */
2809  kmp_time_global_t g_time;
2810 
2811  /* non cache-aligned */
2812  volatile int g_abort;
2813  volatile int g_done;
2814 
2815  int g_dynamic;
2816  enum dynamic_mode g_dynamic_mode;
2817 } kmp_base_global_t;
2818 
2819 typedef union KMP_ALIGN_CACHE kmp_global {
2820  kmp_base_global_t g;
2821  double g_align; /* use worst case alignment */
2822  char g_pad[KMP_PAD(kmp_base_global_t, CACHE_LINE)];
2823 } kmp_global_t;
2824 
2825 typedef struct kmp_base_root {
2826  // TODO: GEH - combine r_active with r_in_parallel then r_active ==
2827  // (r_in_parallel>= 0)
2828  // TODO: GEH - then replace r_active with t_active_levels if we can to reduce
2829  // the synch overhead or keeping r_active
2830  volatile int r_active; /* TRUE if some region in a nest has > 1 thread */
2831  // keeps a count of active parallel regions per root
2832  std::atomic<int> r_in_parallel;
2833  // GEH: This is misnamed, should be r_active_levels
2834  kmp_team_t *r_root_team;
2835  kmp_team_t *r_hot_team;
2836  kmp_info_t *r_uber_thread;
2837  kmp_lock_t r_begin_lock;
2838  volatile int r_begin;
2839  int r_blocktime; /* blocktime for this root and descendants */
2840 } kmp_base_root_t;
2841 
2842 typedef union KMP_ALIGN_CACHE kmp_root {
2843  kmp_base_root_t r;
2844  double r_align; /* use worst case alignment */
2845  char r_pad[KMP_PAD(kmp_base_root_t, CACHE_LINE)];
2846 } kmp_root_t;
2847 
2848 struct fortran_inx_info {
2849  kmp_int32 data;
2850 };
2851 
2852 /* ------------------------------------------------------------------------ */
2853 
2854 extern int __kmp_settings;
2855 extern int __kmp_duplicate_library_ok;
2856 #if USE_ITT_BUILD
2857 extern int __kmp_forkjoin_frames;
2858 extern int __kmp_forkjoin_frames_mode;
2859 #endif
2860 extern PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method;
2861 extern int __kmp_determ_red;
2862 
2863 #ifdef KMP_DEBUG
2864 extern int kmp_a_debug;
2865 extern int kmp_b_debug;
2866 extern int kmp_c_debug;
2867 extern int kmp_d_debug;
2868 extern int kmp_e_debug;
2869 extern int kmp_f_debug;
2870 #endif /* KMP_DEBUG */
2871 
2872 /* For debug information logging using rotating buffer */
2873 #define KMP_DEBUG_BUF_LINES_INIT 512
2874 #define KMP_DEBUG_BUF_LINES_MIN 1
2875 
2876 #define KMP_DEBUG_BUF_CHARS_INIT 128
2877 #define KMP_DEBUG_BUF_CHARS_MIN 2
2878 
2879 extern int
2880  __kmp_debug_buf; /* TRUE means use buffer, FALSE means print to stderr */
2881 extern int __kmp_debug_buf_lines; /* How many lines of debug stored in buffer */
2882 extern int
2883  __kmp_debug_buf_chars; /* How many characters allowed per line in buffer */
2884 extern int __kmp_debug_buf_atomic; /* TRUE means use atomic update of buffer
2885  entry pointer */
2886 
2887 extern char *__kmp_debug_buffer; /* Debug buffer itself */
2888 extern std::atomic<int> __kmp_debug_count; /* Counter for number of lines
2889  printed in buffer so far */
2890 extern int __kmp_debug_buf_warn_chars; /* Keep track of char increase
2891  recommended in warnings */
2892 /* end rotating debug buffer */
2893 
2894 #ifdef KMP_DEBUG
2895 extern int __kmp_par_range; /* +1 => only go par for constructs in range */
2896 
2897 #define KMP_PAR_RANGE_ROUTINE_LEN 1024
2898 extern char __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN];
2899 #define KMP_PAR_RANGE_FILENAME_LEN 1024
2900 extern char __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN];
2901 extern int __kmp_par_range_lb;
2902 extern int __kmp_par_range_ub;
2903 #endif
2904 
2905 /* For printing out dynamic storage map for threads and teams */
2906 extern int
2907  __kmp_storage_map; /* True means print storage map for threads and teams */
2908 extern int __kmp_storage_map_verbose; /* True means storage map includes
2909  placement info */
2910 extern int __kmp_storage_map_verbose_specified;
2911 
2912 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2913 extern kmp_cpuinfo_t __kmp_cpuinfo;
2914 #endif
2915 
2916 extern volatile int __kmp_init_serial;
2917 extern volatile int __kmp_init_gtid;
2918 extern volatile int __kmp_init_common;
2919 extern volatile int __kmp_init_middle;
2920 extern volatile int __kmp_init_parallel;
2921 #if KMP_USE_MONITOR
2922 extern volatile int __kmp_init_monitor;
2923 #endif
2924 extern volatile int __kmp_init_user_locks;
2925 extern int __kmp_init_counter;
2926 extern int __kmp_root_counter;
2927 extern int __kmp_version;
2928 
2929 /* list of address of allocated caches for commons */
2930 extern kmp_cached_addr_t *__kmp_threadpriv_cache_list;
2931 
2932 /* Barrier algorithm types and options */
2933 extern kmp_uint32 __kmp_barrier_gather_bb_dflt;
2934 extern kmp_uint32 __kmp_barrier_release_bb_dflt;
2935 extern kmp_bar_pat_e __kmp_barrier_gather_pat_dflt;
2936 extern kmp_bar_pat_e __kmp_barrier_release_pat_dflt;
2937 extern kmp_uint32 __kmp_barrier_gather_branch_bits[bs_last_barrier];
2938 extern kmp_uint32 __kmp_barrier_release_branch_bits[bs_last_barrier];
2939 extern kmp_bar_pat_e __kmp_barrier_gather_pattern[bs_last_barrier];
2940 extern kmp_bar_pat_e __kmp_barrier_release_pattern[bs_last_barrier];
2941 extern char const *__kmp_barrier_branch_bit_env_name[bs_last_barrier];
2942 extern char const *__kmp_barrier_pattern_env_name[bs_last_barrier];
2943 extern char const *__kmp_barrier_type_name[bs_last_barrier];
2944 extern char const *__kmp_barrier_pattern_name[bp_last_bar];
2945 
2946 /* Global Locks */
2947 extern kmp_bootstrap_lock_t __kmp_initz_lock; /* control initialization */
2948 extern kmp_bootstrap_lock_t __kmp_forkjoin_lock; /* control fork/join access */
2949 extern kmp_bootstrap_lock_t __kmp_task_team_lock;
2950 extern kmp_bootstrap_lock_t
2951  __kmp_exit_lock; /* exit() is not always thread-safe */
2952 #if KMP_USE_MONITOR
2953 extern kmp_bootstrap_lock_t
2954  __kmp_monitor_lock; /* control monitor thread creation */
2955 #endif
2956 extern kmp_bootstrap_lock_t
2957  __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and
2958  __kmp_threads expansion to co-exist */
2959 
2960 extern kmp_lock_t __kmp_global_lock; /* control OS/global access */
2961 extern kmp_queuing_lock_t __kmp_dispatch_lock; /* control dispatch access */
2962 extern kmp_lock_t __kmp_debug_lock; /* control I/O access for KMP_DEBUG */
2963 
2964 extern enum library_type __kmp_library;
2965 
2966 extern enum sched_type __kmp_sched; /* default runtime scheduling */
2967 extern enum sched_type __kmp_static; /* default static scheduling method */
2968 extern enum sched_type __kmp_guided; /* default guided scheduling method */
2969 extern enum sched_type __kmp_auto; /* default auto scheduling method */
2970 extern int __kmp_chunk; /* default runtime chunk size */
2971 
2972 extern size_t __kmp_stksize; /* stack size per thread */
2973 #if KMP_USE_MONITOR
2974 extern size_t __kmp_monitor_stksize; /* stack size for monitor thread */
2975 #endif
2976 extern size_t __kmp_stkoffset; /* stack offset per thread */
2977 extern int __kmp_stkpadding; /* Should we pad root thread(s) stack */
2978 
2979 extern size_t
2980  __kmp_malloc_pool_incr; /* incremental size of pool for kmp_malloc() */
2981 extern int __kmp_env_stksize; /* was KMP_STACKSIZE specified? */
2982 extern int __kmp_env_blocktime; /* was KMP_BLOCKTIME specified? */
2983 extern int __kmp_env_checks; /* was KMP_CHECKS specified? */
2984 extern int __kmp_env_consistency_check; // was KMP_CONSISTENCY_CHECK specified?
2985 extern int __kmp_generate_warnings; /* should we issue warnings? */
2986 extern int __kmp_reserve_warn; /* have we issued reserve_threads warning? */
2987 
2988 #ifdef DEBUG_SUSPEND
2989 extern int __kmp_suspend_count; /* count inside __kmp_suspend_template() */
2990 #endif
2991 
2992 extern kmp_int32 __kmp_use_yield;
2993 extern kmp_int32 __kmp_use_yield_exp_set;
2994 extern kmp_uint32 __kmp_yield_init;
2995 extern kmp_uint32 __kmp_yield_next;
2996 
2997 /* ------------------------------------------------------------------------- */
2998 extern int __kmp_allThreadsSpecified;
2999 
3000 extern size_t __kmp_align_alloc;
3001 /* following data protected by initialization routines */
3002 extern int __kmp_xproc; /* number of processors in the system */
3003 extern int __kmp_avail_proc; /* number of processors available to the process */
3004 extern size_t __kmp_sys_min_stksize; /* system-defined minimum stack size */
3005 extern int __kmp_sys_max_nth; /* system-imposed maximum number of threads */
3006 // maximum total number of concurrently-existing threads on device
3007 extern int __kmp_max_nth;
3008 // maximum total number of concurrently-existing threads in a contention group
3009 extern int __kmp_cg_max_nth;
3010 extern int __kmp_teams_max_nth; // max threads used in a teams construct
3011 extern int __kmp_threads_capacity; /* capacity of the arrays __kmp_threads and
3012  __kmp_root */
3013 extern int __kmp_dflt_team_nth; /* default number of threads in a parallel
3014  region a la OMP_NUM_THREADS */
3015 extern int __kmp_dflt_team_nth_ub; /* upper bound on "" determined at serial
3016  initialization */
3017 extern int __kmp_tp_capacity; /* capacity of __kmp_threads if threadprivate is
3018  used (fixed) */
3019 extern int __kmp_tp_cached; /* whether threadprivate cache has been created
3020  (__kmpc_threadprivate_cached()) */
3021 extern int __kmp_dflt_blocktime; /* number of milliseconds to wait before
3022  blocking (env setting) */
3023 #if KMP_USE_MONITOR
3024 extern int
3025  __kmp_monitor_wakeups; /* number of times monitor wakes up per second */
3026 extern int __kmp_bt_intervals; /* number of monitor timestamp intervals before
3027  blocking */
3028 #endif
3029 #ifdef KMP_ADJUST_BLOCKTIME
3030 extern int __kmp_zero_bt; /* whether blocktime has been forced to zero */
3031 #endif /* KMP_ADJUST_BLOCKTIME */
3032 #ifdef KMP_DFLT_NTH_CORES
3033 extern int __kmp_ncores; /* Total number of cores for threads placement */
3034 #endif
3035 /* Number of millisecs to delay on abort for Intel(R) VTune(TM) tools */
3036 extern int __kmp_abort_delay;
3037 
3038 extern int __kmp_need_register_atfork_specified;
3039 extern int
3040  __kmp_need_register_atfork; /* At initialization, call pthread_atfork to
3041  install fork handler */
3042 extern int __kmp_gtid_mode; /* Method of getting gtid, values:
3043  0 - not set, will be set at runtime
3044  1 - using stack search
3045  2 - dynamic TLS (pthread_getspecific(Linux* OS/OS
3046  X*) or TlsGetValue(Windows* OS))
3047  3 - static TLS (__declspec(thread) __kmp_gtid),
3048  Linux* OS .so only. */
3049 extern int
3050  __kmp_adjust_gtid_mode; /* If true, adjust method based on #threads */
3051 #ifdef KMP_TDATA_GTID
3052 extern KMP_THREAD_LOCAL int __kmp_gtid;
3053 #endif
3054 extern int __kmp_tls_gtid_min; /* #threads below which use sp search for gtid */
3055 extern int __kmp_foreign_tp; // If true, separate TP var for each foreign thread
3056 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3057 extern int __kmp_inherit_fp_control; // copy fp creg(s) parent->workers at fork
3058 extern kmp_int16 __kmp_init_x87_fpu_control_word; // init thread's FP ctrl reg
3059 extern kmp_uint32 __kmp_init_mxcsr; /* init thread's mxscr */
3060 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
3061 
3062 // max_active_levels for nested parallelism enabled by default via
3063 // OMP_MAX_ACTIVE_LEVELS, OMP_NESTED, OMP_NUM_THREADS, and OMP_PROC_BIND
3064 extern int __kmp_dflt_max_active_levels;
3065 // Indicates whether value of __kmp_dflt_max_active_levels was already
3066 // explicitly set by OMP_MAX_ACTIVE_LEVELS or OMP_NESTED=false
3067 extern bool __kmp_dflt_max_active_levels_set;
3068 extern int __kmp_dispatch_num_buffers; /* max possible dynamic loops in
3069  concurrent execution per team */
3070 #if KMP_NESTED_HOT_TEAMS
3071 extern int __kmp_hot_teams_mode;
3072 extern int __kmp_hot_teams_max_level;
3073 #endif
3074 
3075 #if KMP_OS_LINUX
3076 extern enum clock_function_type __kmp_clock_function;
3077 extern int __kmp_clock_function_param;
3078 #endif /* KMP_OS_LINUX */
3079 
3080 #if KMP_MIC_SUPPORTED
3081 extern enum mic_type __kmp_mic_type;
3082 #endif
3083 
3084 #ifdef USE_LOAD_BALANCE
3085 extern double __kmp_load_balance_interval; // load balance algorithm interval
3086 #endif /* USE_LOAD_BALANCE */
3087 
3088 // OpenMP 3.1 - Nested num threads array
3089 typedef struct kmp_nested_nthreads_t {
3090  int *nth;
3091  int size;
3092  int used;
3093 } kmp_nested_nthreads_t;
3094 
3095 extern kmp_nested_nthreads_t __kmp_nested_nth;
3096 
3097 #if KMP_USE_ADAPTIVE_LOCKS
3098 
3099 // Parameters for the speculative lock backoff system.
3100 struct kmp_adaptive_backoff_params_t {
3101  // Number of soft retries before it counts as a hard retry.
3102  kmp_uint32 max_soft_retries;
3103  // Badness is a bit mask : 0,1,3,7,15,... on each hard failure we move one to
3104  // the right
3105  kmp_uint32 max_badness;
3106 };
3107 
3108 extern kmp_adaptive_backoff_params_t __kmp_adaptive_backoff_params;
3109 
3110 #if KMP_DEBUG_ADAPTIVE_LOCKS
3111 extern const char *__kmp_speculative_statsfile;
3112 #endif
3113 
3114 #endif // KMP_USE_ADAPTIVE_LOCKS
3115 
3116 #if OMP_40_ENABLED
3117 extern int __kmp_display_env; /* TRUE or FALSE */
3118 extern int __kmp_display_env_verbose; /* TRUE if OMP_DISPLAY_ENV=VERBOSE */
3119 extern int __kmp_omp_cancellation; /* TRUE or FALSE */
3120 #endif
3121 
3122 /* ------------------------------------------------------------------------- */
3123 
3124 /* the following are protected by the fork/join lock */
3125 /* write: lock read: anytime */
3126 extern kmp_info_t **__kmp_threads; /* Descriptors for the threads */
3127 /* read/write: lock */
3128 extern volatile kmp_team_t *__kmp_team_pool;
3129 extern volatile kmp_info_t *__kmp_thread_pool;
3130 extern kmp_info_t *__kmp_thread_pool_insert_pt;
3131 
3132 // total num threads reachable from some root thread including all root threads
3133 extern volatile int __kmp_nth;
3134 /* total number of threads reachable from some root thread including all root
3135  threads, and those in the thread pool */
3136 extern volatile int __kmp_all_nth;
3137 extern std::atomic<int> __kmp_thread_pool_active_nth;
3138 
3139 extern kmp_root_t **__kmp_root; /* root of thread hierarchy */
3140 /* end data protected by fork/join lock */
3141 /* ------------------------------------------------------------------------- */
3142 
3143 #define __kmp_get_gtid() __kmp_get_global_thread_id()
3144 #define __kmp_entry_gtid() __kmp_get_global_thread_id_reg()
3145 #define __kmp_get_tid() (__kmp_tid_from_gtid(__kmp_get_gtid()))
3146 #define __kmp_get_team() (__kmp_threads[(__kmp_get_gtid())]->th.th_team)
3147 #define __kmp_get_thread() (__kmp_thread_from_gtid(__kmp_get_gtid()))
3148 
3149 // AT: Which way is correct?
3150 // AT: 1. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc;
3151 // AT: 2. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team_nproc;
3152 #define __kmp_get_team_num_threads(gtid) \
3153  (__kmp_threads[(gtid)]->th.th_team->t.t_nproc)
3154 
3155 static inline bool KMP_UBER_GTID(int gtid) {
3156  KMP_DEBUG_ASSERT(gtid >= KMP_GTID_MIN);
3157  KMP_DEBUG_ASSERT(gtid < __kmp_threads_capacity);
3158  return (gtid >= 0 && __kmp_root[gtid] && __kmp_threads[gtid] &&
3159  __kmp_threads[gtid] == __kmp_root[gtid]->r.r_uber_thread);
3160 }
3161 
3162 static inline int __kmp_tid_from_gtid(int gtid) {
3163  KMP_DEBUG_ASSERT(gtid >= 0);
3164  return __kmp_threads[gtid]->th.th_info.ds.ds_tid;
3165 }
3166 
3167 static inline int __kmp_gtid_from_tid(int tid, const kmp_team_t *team) {
3168  KMP_DEBUG_ASSERT(tid >= 0 && team);
3169  return team->t.t_threads[tid]->th.th_info.ds.ds_gtid;
3170 }
3171 
3172 static inline int __kmp_gtid_from_thread(const kmp_info_t *thr) {
3173  KMP_DEBUG_ASSERT(thr);
3174  return thr->th.th_info.ds.ds_gtid;
3175 }
3176 
3177 static inline kmp_info_t *__kmp_thread_from_gtid(int gtid) {
3178  KMP_DEBUG_ASSERT(gtid >= 0);
3179  return __kmp_threads[gtid];
3180 }
3181 
3182 static inline kmp_team_t *__kmp_team_from_gtid(int gtid) {
3183  KMP_DEBUG_ASSERT(gtid >= 0);
3184  return __kmp_threads[gtid]->th.th_team;
3185 }
3186 
3187 /* ------------------------------------------------------------------------- */
3188 
3189 extern kmp_global_t __kmp_global; /* global status */
3190 
3191 extern kmp_info_t __kmp_monitor;
3192 // For Debugging Support Library
3193 extern std::atomic<kmp_int32> __kmp_team_counter;
3194 // For Debugging Support Library
3195 extern std::atomic<kmp_int32> __kmp_task_counter;
3196 
3197 #if USE_DEBUGGER
3198 #define _KMP_GEN_ID(counter) \
3199  (__kmp_debugging ? KMP_ATOMIC_INC(&counter) + 1 : ~0)
3200 #else
3201 #define _KMP_GEN_ID(counter) (~0)
3202 #endif /* USE_DEBUGGER */
3203 
3204 #define KMP_GEN_TASK_ID() _KMP_GEN_ID(__kmp_task_counter)
3205 #define KMP_GEN_TEAM_ID() _KMP_GEN_ID(__kmp_team_counter)
3206 
3207 /* ------------------------------------------------------------------------ */
3208 
3209 extern void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2,
3210  size_t size, char const *format, ...);
3211 
3212 extern void __kmp_serial_initialize(void);
3213 extern void __kmp_middle_initialize(void);
3214 extern void __kmp_parallel_initialize(void);
3215 
3216 extern void __kmp_internal_begin(void);
3217 extern void __kmp_internal_end_library(int gtid);
3218 extern void __kmp_internal_end_thread(int gtid);
3219 extern void __kmp_internal_end_atexit(void);
3220 extern void __kmp_internal_end_fini(void);
3221 extern void __kmp_internal_end_dtor(void);
3222 extern void __kmp_internal_end_dest(void *);
3223 
3224 extern int __kmp_register_root(int initial_thread);
3225 extern void __kmp_unregister_root(int gtid);
3226 
3227 extern int __kmp_ignore_mppbeg(void);
3228 extern int __kmp_ignore_mppend(void);
3229 
3230 extern int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws);
3231 extern void __kmp_exit_single(int gtid);
3232 
3233 extern void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref);
3234 extern void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref);
3235 
3236 #ifdef USE_LOAD_BALANCE
3237 extern int __kmp_get_load_balance(int);
3238 #endif
3239 
3240 extern int __kmp_get_global_thread_id(void);
3241 extern int __kmp_get_global_thread_id_reg(void);
3242 extern void __kmp_exit_thread(int exit_status);
3243 extern void __kmp_abort(char const *format, ...);
3244 extern void __kmp_abort_thread(void);
3245 KMP_NORETURN extern void __kmp_abort_process(void);
3246 extern void __kmp_warn(char const *format, ...);
3247 
3248 extern void __kmp_set_num_threads(int new_nth, int gtid);
3249 
3250 // Returns current thread (pointer to kmp_info_t). Current thread *must* be
3251 // registered.
3252 static inline kmp_info_t *__kmp_entry_thread() {
3253  int gtid = __kmp_entry_gtid();
3254 
3255  return __kmp_threads[gtid];
3256 }
3257 
3258 extern void __kmp_set_max_active_levels(int gtid, int new_max_active_levels);
3259 extern int __kmp_get_max_active_levels(int gtid);
3260 extern int __kmp_get_ancestor_thread_num(int gtid, int level);
3261 extern int __kmp_get_team_size(int gtid, int level);
3262 extern void __kmp_set_schedule(int gtid, kmp_sched_t new_sched, int chunk);
3263 extern void __kmp_get_schedule(int gtid, kmp_sched_t *sched, int *chunk);
3264 
3265 extern unsigned short __kmp_get_random(kmp_info_t *thread);
3266 extern void __kmp_init_random(kmp_info_t *thread);
3267 
3268 extern kmp_r_sched_t __kmp_get_schedule_global(void);
3269 extern void __kmp_adjust_num_threads(int new_nproc);
3270 extern void __kmp_check_stksize(size_t *val);
3271 
3272 extern void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL);
3273 extern void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL);
3274 extern void ___kmp_free(void *ptr KMP_SRC_LOC_DECL);
3275 #define __kmp_allocate(size) ___kmp_allocate((size)KMP_SRC_LOC_CURR)
3276 #define __kmp_page_allocate(size) ___kmp_page_allocate((size)KMP_SRC_LOC_CURR)
3277 #define __kmp_free(ptr) ___kmp_free((ptr)KMP_SRC_LOC_CURR)
3278 
3279 #if USE_FAST_MEMORY
3280 extern void *___kmp_fast_allocate(kmp_info_t *this_thr,
3281  size_t size KMP_SRC_LOC_DECL);
3282 extern void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL);
3283 extern void __kmp_free_fast_memory(kmp_info_t *this_thr);
3284 extern void __kmp_initialize_fast_memory(kmp_info_t *this_thr);
3285 #define __kmp_fast_allocate(this_thr, size) \
3286  ___kmp_fast_allocate((this_thr), (size)KMP_SRC_LOC_CURR)
3287 #define __kmp_fast_free(this_thr, ptr) \
3288  ___kmp_fast_free((this_thr), (ptr)KMP_SRC_LOC_CURR)
3289 #endif
3290 
3291 extern void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL);
3292 extern void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem,
3293  size_t elsize KMP_SRC_LOC_DECL);
3294 extern void *___kmp_thread_realloc(kmp_info_t *th, void *ptr,
3295  size_t size KMP_SRC_LOC_DECL);
3296 extern void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL);
3297 #define __kmp_thread_malloc(th, size) \
3298  ___kmp_thread_malloc((th), (size)KMP_SRC_LOC_CURR)
3299 #define __kmp_thread_calloc(th, nelem, elsize) \
3300  ___kmp_thread_calloc((th), (nelem), (elsize)KMP_SRC_LOC_CURR)
3301 #define __kmp_thread_realloc(th, ptr, size) \
3302  ___kmp_thread_realloc((th), (ptr), (size)KMP_SRC_LOC_CURR)
3303 #define __kmp_thread_free(th, ptr) \
3304  ___kmp_thread_free((th), (ptr)KMP_SRC_LOC_CURR)
3305 
3306 #define KMP_INTERNAL_MALLOC(sz) malloc(sz)
3307 #define KMP_INTERNAL_FREE(p) free(p)
3308 #define KMP_INTERNAL_REALLOC(p, sz) realloc((p), (sz))
3309 #define KMP_INTERNAL_CALLOC(n, sz) calloc((n), (sz))
3310 
3311 extern void __kmp_push_num_threads(ident_t *loc, int gtid, int num_threads);
3312 
3313 #if OMP_40_ENABLED
3314 extern void __kmp_push_proc_bind(ident_t *loc, int gtid,
3315  kmp_proc_bind_t proc_bind);
3316 extern void __kmp_push_num_teams(ident_t *loc, int gtid, int num_teams,
3317  int num_threads);
3318 #endif
3319 
3320 extern void __kmp_yield();
3321 
3322 extern void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
3323  enum sched_type schedule, kmp_int32 lb,
3324  kmp_int32 ub, kmp_int32 st, kmp_int32 chunk);
3325 extern void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
3326  enum sched_type schedule, kmp_uint32 lb,
3327  kmp_uint32 ub, kmp_int32 st,
3328  kmp_int32 chunk);
3329 extern void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
3330  enum sched_type schedule, kmp_int64 lb,
3331  kmp_int64 ub, kmp_int64 st, kmp_int64 chunk);
3332 extern void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
3333  enum sched_type schedule, kmp_uint64 lb,
3334  kmp_uint64 ub, kmp_int64 st,
3335  kmp_int64 chunk);
3336 
3337 extern int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid,
3338  kmp_int32 *p_last, kmp_int32 *p_lb,
3339  kmp_int32 *p_ub, kmp_int32 *p_st);
3340 extern int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid,
3341  kmp_int32 *p_last, kmp_uint32 *p_lb,
3342  kmp_uint32 *p_ub, kmp_int32 *p_st);
3343 extern int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid,
3344  kmp_int32 *p_last, kmp_int64 *p_lb,
3345  kmp_int64 *p_ub, kmp_int64 *p_st);
3346 extern int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid,
3347  kmp_int32 *p_last, kmp_uint64 *p_lb,
3348  kmp_uint64 *p_ub, kmp_int64 *p_st);
3349 
3350 extern void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid);
3351 extern void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid);
3352 extern void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid);
3353 extern void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid);
3354 
3355 #ifdef KMP_GOMP_COMPAT
3356 
3357 extern void __kmp_aux_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
3358  enum sched_type schedule, kmp_int32 lb,
3359  kmp_int32 ub, kmp_int32 st,
3360  kmp_int32 chunk, int push_ws);
3361 extern void __kmp_aux_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
3362  enum sched_type schedule, kmp_uint32 lb,
3363  kmp_uint32 ub, kmp_int32 st,
3364  kmp_int32 chunk, int push_ws);
3365 extern void __kmp_aux_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
3366  enum sched_type schedule, kmp_int64 lb,
3367  kmp_int64 ub, kmp_int64 st,
3368  kmp_int64 chunk, int push_ws);
3369 extern void __kmp_aux_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
3370  enum sched_type schedule, kmp_uint64 lb,
3371  kmp_uint64 ub, kmp_int64 st,
3372  kmp_int64 chunk, int push_ws);
3373 extern void __kmp_aux_dispatch_fini_chunk_4(ident_t *loc, kmp_int32 gtid);
3374 extern void __kmp_aux_dispatch_fini_chunk_8(ident_t *loc, kmp_int32 gtid);
3375 extern void __kmp_aux_dispatch_fini_chunk_4u(ident_t *loc, kmp_int32 gtid);
3376 extern void __kmp_aux_dispatch_fini_chunk_8u(ident_t *loc, kmp_int32 gtid);
3377 
3378 #endif /* KMP_GOMP_COMPAT */
3379 
3380 extern kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker);
3381 extern kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker);
3382 extern kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker);
3383 extern kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker);
3384 extern kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker);
3385 extern kmp_uint32 __kmp_wait_4(kmp_uint32 volatile *spinner, kmp_uint32 checker,
3386  kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
3387  void *obj);
3388 extern void __kmp_wait_4_ptr(void *spinner, kmp_uint32 checker,
3389  kmp_uint32 (*pred)(void *, kmp_uint32), void *obj);
3390 
3391 class kmp_flag_32;
3392 class kmp_flag_64;
3393 class kmp_flag_oncore;
3394 extern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag,
3395  int final_spin
3396 #if USE_ITT_BUILD
3397  ,
3398  void *itt_sync_obj
3399 #endif
3400  );
3401 extern void __kmp_release_64(kmp_flag_64 *flag);
3402 
3403 extern void __kmp_infinite_loop(void);
3404 
3405 extern void __kmp_cleanup(void);
3406 
3407 #if KMP_HANDLE_SIGNALS
3408 extern int __kmp_handle_signals;
3409 extern void __kmp_install_signals(int parallel_init);
3410 extern void __kmp_remove_signals(void);
3411 #endif
3412 
3413 extern void __kmp_clear_system_time(void);
3414 extern void __kmp_read_system_time(double *delta);
3415 
3416 extern void __kmp_check_stack_overlap(kmp_info_t *thr);
3417 
3418 extern void __kmp_expand_host_name(char *buffer, size_t size);
3419 extern void __kmp_expand_file_name(char *result, size_t rlen, char *pattern);
3420 
3421 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3422 extern void
3423 __kmp_initialize_system_tick(void); /* Initialize timer tick value */
3424 #endif
3425 
3426 extern void
3427 __kmp_runtime_initialize(void); /* machine specific initialization */
3428 extern void __kmp_runtime_destroy(void);
3429 
3430 #if KMP_AFFINITY_SUPPORTED
3431 extern char *__kmp_affinity_print_mask(char *buf, int buf_len,
3432  kmp_affin_mask_t *mask);
3433 extern kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf,
3434  kmp_affin_mask_t *mask);
3435 extern void __kmp_affinity_initialize(void);
3436 extern void __kmp_affinity_uninitialize(void);
3437 extern void __kmp_affinity_set_init_mask(
3438  int gtid, int isa_root); /* set affinity according to KMP_AFFINITY */
3439 #if OMP_40_ENABLED
3440 extern void __kmp_affinity_set_place(int gtid);
3441 #endif
3442 extern void __kmp_affinity_determine_capable(const char *env_var);
3443 extern int __kmp_aux_set_affinity(void **mask);
3444 extern int __kmp_aux_get_affinity(void **mask);
3445 extern int __kmp_aux_get_affinity_max_proc();
3446 extern int __kmp_aux_set_affinity_mask_proc(int proc, void **mask);
3447 extern int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask);
3448 extern int __kmp_aux_get_affinity_mask_proc(int proc, void **mask);
3449 extern void __kmp_balanced_affinity(kmp_info_t *th, int team_size);
3450 #if KMP_OS_LINUX
3451 extern int kmp_set_thread_affinity_mask_initial(void);
3452 #endif
3453 #endif /* KMP_AFFINITY_SUPPORTED */
3454 #if OMP_50_ENABLED
3455 // No need for KMP_AFFINITY_SUPPORTED guard as only one field in the
3456 // format string is for affinity, so platforms that do not support
3457 // affinity can still use the other fields, e.g., %n for num_threads
3458 extern size_t __kmp_aux_capture_affinity(int gtid, const char *format,
3459  kmp_str_buf_t *buffer);
3460 extern void __kmp_aux_display_affinity(int gtid, const char *format);
3461 #endif
3462 
3463 extern void __kmp_cleanup_hierarchy();
3464 extern void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar);
3465 
3466 #if KMP_USE_FUTEX
3467 
3468 extern int __kmp_futex_determine_capable(void);
3469 
3470 #endif // KMP_USE_FUTEX
3471 
3472 extern void __kmp_gtid_set_specific(int gtid);
3473 extern int __kmp_gtid_get_specific(void);
3474 
3475 extern double __kmp_read_cpu_time(void);
3476 
3477 extern int __kmp_read_system_info(struct kmp_sys_info *info);
3478 
3479 #if KMP_USE_MONITOR
3480 extern void __kmp_create_monitor(kmp_info_t *th);
3481 #endif
3482 
3483 extern void *__kmp_launch_thread(kmp_info_t *thr);
3484 
3485 extern void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size);
3486 
3487 #if KMP_OS_WINDOWS
3488 extern int __kmp_still_running(kmp_info_t *th);
3489 extern int __kmp_is_thread_alive(kmp_info_t *th, DWORD *exit_val);
3490 extern void __kmp_free_handle(kmp_thread_t tHandle);
3491 #endif
3492 
3493 #if KMP_USE_MONITOR
3494 extern void __kmp_reap_monitor(kmp_info_t *th);
3495 #endif
3496 extern void __kmp_reap_worker(kmp_info_t *th);
3497 extern void __kmp_terminate_thread(int gtid);
3498 
3499 extern int __kmp_try_suspend_mx(kmp_info_t *th);
3500 extern void __kmp_lock_suspend_mx(kmp_info_t *th);
3501 extern void __kmp_unlock_suspend_mx(kmp_info_t *th);
3502 
3503 extern void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag);
3504 extern void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag);
3505 extern void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag);
3506 extern void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag);
3507 extern void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag);
3508 extern void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag);
3509 
3510 extern void __kmp_elapsed(double *);
3511 extern void __kmp_elapsed_tick(double *);
3512 
3513 extern void __kmp_enable(int old_state);
3514 extern void __kmp_disable(int *old_state);
3515 
3516 extern void __kmp_thread_sleep(int millis);
3517 
3518 extern void __kmp_common_initialize(void);
3519 extern void __kmp_common_destroy(void);
3520 extern void __kmp_common_destroy_gtid(int gtid);
3521 
3522 #if KMP_OS_UNIX
3523 extern void __kmp_register_atfork(void);
3524 #endif
3525 extern void __kmp_suspend_initialize(void);
3526 extern void __kmp_suspend_initialize_thread(kmp_info_t *th);
3527 extern void __kmp_suspend_uninitialize_thread(kmp_info_t *th);
3528 
3529 extern kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
3530  int tid);
3531 #if OMP_40_ENABLED
3532 extern kmp_team_t *
3533 __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
3534 #if OMPT_SUPPORT
3535  ompt_data_t ompt_parallel_data,
3536 #endif
3537  kmp_proc_bind_t proc_bind, kmp_internal_control_t *new_icvs,
3538  int argc USE_NESTED_HOT_ARG(kmp_info_t *thr));
3539 #else
3540 extern kmp_team_t *
3541 __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
3542 #if OMPT_SUPPORT
3543  ompt_id_t ompt_parallel_id,
3544 #endif
3545  kmp_internal_control_t *new_icvs,
3546  int argc USE_NESTED_HOT_ARG(kmp_info_t *thr));
3547 #endif // OMP_40_ENABLED
3548 extern void __kmp_free_thread(kmp_info_t *);
3549 extern void __kmp_free_team(kmp_root_t *,
3550  kmp_team_t *USE_NESTED_HOT_ARG(kmp_info_t *));
3551 extern kmp_team_t *__kmp_reap_team(kmp_team_t *);
3552 
3553 /* ------------------------------------------------------------------------ */
3554 
3555 extern void __kmp_initialize_bget(kmp_info_t *th);
3556 extern void __kmp_finalize_bget(kmp_info_t *th);
3557 
3558 KMP_EXPORT void *kmpc_malloc(size_t size);
3559 KMP_EXPORT void *kmpc_aligned_malloc(size_t size, size_t alignment);
3560 KMP_EXPORT void *kmpc_calloc(size_t nelem, size_t elsize);
3561 KMP_EXPORT void *kmpc_realloc(void *ptr, size_t size);
3562 KMP_EXPORT void kmpc_free(void *ptr);
3563 
3564 /* declarations for internal use */
3565 
3566 extern int __kmp_barrier(enum barrier_type bt, int gtid, int is_split,
3567  size_t reduce_size, void *reduce_data,
3568  void (*reduce)(void *, void *));
3569 extern void __kmp_end_split_barrier(enum barrier_type bt, int gtid);
3570 extern int __kmp_barrier_gomp_cancel(int gtid);
3571 
3576 enum fork_context_e {
3577  fork_context_gnu,
3579  fork_context_intel,
3580  fork_context_last
3581 };
3582 extern int __kmp_fork_call(ident_t *loc, int gtid,
3583  enum fork_context_e fork_context, kmp_int32 argc,
3584  microtask_t microtask, launch_t invoker,
3585 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
3586 #if (KMP_ARCH_ARM || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64) && KMP_OS_LINUX
3587  va_list *ap
3588 #else
3589  va_list ap
3590 #endif
3591  );
3592 
3593 extern void __kmp_join_call(ident_t *loc, int gtid
3594 #if OMPT_SUPPORT
3595  ,
3596  enum fork_context_e fork_context
3597 #endif
3598 #if OMP_40_ENABLED
3599  ,
3600  int exit_teams = 0
3601 #endif
3602  );
3603 
3604 extern void __kmp_serialized_parallel(ident_t *id, kmp_int32 gtid);
3605 extern void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team);
3606 extern void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team);
3607 extern int __kmp_invoke_task_func(int gtid);
3608 extern void __kmp_run_before_invoked_task(int gtid, int tid,
3609  kmp_info_t *this_thr,
3610  kmp_team_t *team);
3611 extern void __kmp_run_after_invoked_task(int gtid, int tid,
3612  kmp_info_t *this_thr,
3613  kmp_team_t *team);
3614 
3615 // should never have been exported
3616 KMP_EXPORT int __kmpc_invoke_task_func(int gtid);
3617 #if OMP_40_ENABLED
3618 extern int __kmp_invoke_teams_master(int gtid);
3619 extern void __kmp_teams_master(int gtid);
3620 extern int __kmp_aux_get_team_num();
3621 extern int __kmp_aux_get_num_teams();
3622 #endif
3623 extern void __kmp_save_internal_controls(kmp_info_t *thread);
3624 extern void __kmp_user_set_library(enum library_type arg);
3625 extern void __kmp_aux_set_library(enum library_type arg);
3626 extern void __kmp_aux_set_stacksize(size_t arg);
3627 extern void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid);
3628 extern void __kmp_aux_set_defaults(char const *str, int len);
3629 
3630 /* Functions called from __kmp_aux_env_initialize() in kmp_settings.cpp */
3631 void kmpc_set_blocktime(int arg);
3632 void ompc_set_nested(int flag);
3633 void ompc_set_dynamic(int flag);
3634 void ompc_set_num_threads(int arg);
3635 
3636 extern void __kmp_push_current_task_to_thread(kmp_info_t *this_thr,
3637  kmp_team_t *team, int tid);
3638 extern void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr);
3639 extern kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
3640  kmp_tasking_flags_t *flags,
3641  size_t sizeof_kmp_task_t,
3642  size_t sizeof_shareds,
3643  kmp_routine_entry_t task_entry);
3644 extern void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr,
3645  kmp_team_t *team, int tid,
3646  int set_curr_task);
3647 extern void __kmp_finish_implicit_task(kmp_info_t *this_thr);
3648 extern void __kmp_free_implicit_task(kmp_info_t *this_thr);
3649 int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid,
3650  kmp_flag_32 *flag, int final_spin,
3651  int *thread_finished,
3652 #if USE_ITT_BUILD
3653  void *itt_sync_obj,
3654 #endif /* USE_ITT_BUILD */
3655  kmp_int32 is_constrained);
3656 int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid,
3657  kmp_flag_64 *flag, int final_spin,
3658  int *thread_finished,
3659 #if USE_ITT_BUILD
3660  void *itt_sync_obj,
3661 #endif /* USE_ITT_BUILD */
3662  kmp_int32 is_constrained);
3663 int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid,
3664  kmp_flag_oncore *flag, int final_spin,
3665  int *thread_finished,
3666 #if USE_ITT_BUILD
3667  void *itt_sync_obj,
3668 #endif /* USE_ITT_BUILD */
3669  kmp_int32 is_constrained);
3670 
3671 extern void __kmp_free_task_team(kmp_info_t *thread,
3672  kmp_task_team_t *task_team);
3673 extern void __kmp_reap_task_teams(void);
3674 extern void __kmp_wait_to_unref_task_teams(void);
3675 extern void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team,
3676  int always);
3677 extern void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team);
3678 extern void __kmp_task_team_wait(kmp_info_t *this_thr, kmp_team_t *team
3679 #if USE_ITT_BUILD
3680  ,
3681  void *itt_sync_obj
3682 #endif /* USE_ITT_BUILD */
3683  ,
3684  int wait = 1);
3685 extern void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread,
3686  int gtid);
3687 
3688 extern int __kmp_is_address_mapped(void *addr);
3689 extern kmp_uint64 __kmp_hardware_timestamp(void);
3690 
3691 #if KMP_OS_UNIX
3692 extern int __kmp_read_from_file(char const *path, char const *format, ...);
3693 #endif
3694 
3695 /* ------------------------------------------------------------------------ */
3696 //
3697 // Assembly routines that have no compiler intrinsic replacement
3698 //
3699 
3700 extern int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int npr, int argc,
3701  void *argv[]
3702 #if OMPT_SUPPORT
3703  ,
3704  void **exit_frame_ptr
3705 #endif
3706  );
3707 
3708 /* ------------------------------------------------------------------------ */
3709 
3710 KMP_EXPORT void __kmpc_begin(ident_t *, kmp_int32 flags);
3711 KMP_EXPORT void __kmpc_end(ident_t *);
3712 
3713 KMP_EXPORT void __kmpc_threadprivate_register_vec(ident_t *, void *data,
3714  kmpc_ctor_vec ctor,
3715  kmpc_cctor_vec cctor,
3716  kmpc_dtor_vec dtor,
3717  size_t vector_length);
3718 KMP_EXPORT void __kmpc_threadprivate_register(ident_t *, void *data,
3719  kmpc_ctor ctor, kmpc_cctor cctor,
3720  kmpc_dtor dtor);
3721 KMP_EXPORT void *__kmpc_threadprivate(ident_t *, kmp_int32 global_tid,
3722  void *data, size_t size);
3723 
3724 KMP_EXPORT kmp_int32 __kmpc_global_thread_num(ident_t *);
3725 KMP_EXPORT kmp_int32 __kmpc_global_num_threads(ident_t *);
3726 KMP_EXPORT kmp_int32 __kmpc_bound_thread_num(ident_t *);
3727 KMP_EXPORT kmp_int32 __kmpc_bound_num_threads(ident_t *);
3728 
3729 KMP_EXPORT kmp_int32 __kmpc_ok_to_fork(ident_t *);
3730 KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs,
3731  kmpc_micro microtask, ...);
3732 
3733 KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid);
3734 KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid);
3735 
3736 KMP_EXPORT void __kmpc_flush(ident_t *);
3737 KMP_EXPORT void __kmpc_barrier(ident_t *, kmp_int32 global_tid);
3738 KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
3739 KMP_EXPORT void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
3740 KMP_EXPORT void __kmpc_ordered(ident_t *, kmp_int32 global_tid);
3741 KMP_EXPORT void __kmpc_end_ordered(ident_t *, kmp_int32 global_tid);
3742 KMP_EXPORT void __kmpc_critical(ident_t *, kmp_int32 global_tid,
3743  kmp_critical_name *);
3744 KMP_EXPORT void __kmpc_end_critical(ident_t *, kmp_int32 global_tid,
3745  kmp_critical_name *);
3746 
3747 #if OMP_45_ENABLED
3748 KMP_EXPORT void __kmpc_critical_with_hint(ident_t *, kmp_int32 global_tid,
3749  kmp_critical_name *, uint32_t hint);
3750 #endif
3751 
3752 KMP_EXPORT kmp_int32 __kmpc_barrier_master(ident_t *, kmp_int32 global_tid);
3753 KMP_EXPORT void __kmpc_end_barrier_master(ident_t *, kmp_int32 global_tid);
3754 
3755 KMP_EXPORT kmp_int32 __kmpc_barrier_master_nowait(ident_t *,
3756  kmp_int32 global_tid);
3757 
3758 KMP_EXPORT kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
3759 KMP_EXPORT void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
3760 
3761 KMP_EXPORT void KMPC_FOR_STATIC_INIT(ident_t *loc, kmp_int32 global_tid,
3762  kmp_int32 schedtype, kmp_int32 *plastiter,
3763  kmp_int *plower, kmp_int *pupper,
3764  kmp_int *pstride, kmp_int incr,
3765  kmp_int chunk);
3766 
3767 KMP_EXPORT void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
3768 
3769 KMP_EXPORT void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
3770  size_t cpy_size, void *cpy_data,
3771  void (*cpy_func)(void *, void *),
3772  kmp_int32 didit);
3773 
3774 extern void KMPC_SET_NUM_THREADS(int arg);
3775 extern void KMPC_SET_DYNAMIC(int flag);
3776 extern void KMPC_SET_NESTED(int flag);
3777 
3778 /* OMP 3.0 tasking interface routines */
3779 KMP_EXPORT kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid,
3780  kmp_task_t *new_task);
3781 KMP_EXPORT kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
3782  kmp_int32 flags,
3783  size_t sizeof_kmp_task_t,
3784  size_t sizeof_shareds,
3785  kmp_routine_entry_t task_entry);
3786 KMP_EXPORT void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid,
3787  kmp_task_t *task);
3788 KMP_EXPORT void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid,
3789  kmp_task_t *task);
3790 KMP_EXPORT kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid,
3791  kmp_task_t *new_task);
3792 KMP_EXPORT kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid);
3793 
3794 KMP_EXPORT kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid,
3795  int end_part);
3796 
3797 #if TASK_UNUSED
3798 void __kmpc_omp_task_begin(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task);
3799 void __kmpc_omp_task_complete(ident_t *loc_ref, kmp_int32 gtid,
3800  kmp_task_t *task);
3801 #endif // TASK_UNUSED
3802 
3803 /* ------------------------------------------------------------------------ */
3804 
3805 #if OMP_40_ENABLED
3806 
3807 KMP_EXPORT void __kmpc_taskgroup(ident_t *loc, int gtid);
3808 KMP_EXPORT void __kmpc_end_taskgroup(ident_t *loc, int gtid);
3809 
3810 KMP_EXPORT kmp_int32 __kmpc_omp_task_with_deps(
3811  ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps,
3812  kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
3813  kmp_depend_info_t *noalias_dep_list);
3814 KMP_EXPORT void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid,
3815  kmp_int32 ndeps,
3816  kmp_depend_info_t *dep_list,
3817  kmp_int32 ndeps_noalias,
3818  kmp_depend_info_t *noalias_dep_list);
3819 
3820 extern kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
3821  bool serialize_immediate);
3822 
3823 KMP_EXPORT kmp_int32 __kmpc_cancel(ident_t *loc_ref, kmp_int32 gtid,
3824  kmp_int32 cncl_kind);
3825 KMP_EXPORT kmp_int32 __kmpc_cancellationpoint(ident_t *loc_ref, kmp_int32 gtid,
3826  kmp_int32 cncl_kind);
3827 KMP_EXPORT kmp_int32 __kmpc_cancel_barrier(ident_t *loc_ref, kmp_int32 gtid);
3828 KMP_EXPORT int __kmp_get_cancellation_status(int cancel_kind);
3829 
3830 #if OMP_45_ENABLED
3831 
3832 KMP_EXPORT void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask);
3833 KMP_EXPORT void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask);
3834 KMP_EXPORT void __kmpc_taskloop(ident_t *loc, kmp_int32 gtid, kmp_task_t *task,
3835  kmp_int32 if_val, kmp_uint64 *lb,
3836  kmp_uint64 *ub, kmp_int64 st, kmp_int32 nogroup,
3837  kmp_int32 sched, kmp_uint64 grainsize,
3838  void *task_dup);
3839 #endif
3840 #if OMP_50_ENABLED
3841 KMP_EXPORT void *__kmpc_task_reduction_init(int gtid, int num_data, void *data);
3842 KMP_EXPORT void *__kmpc_taskred_init(int gtid, int num_data, void *data);
3843 KMP_EXPORT void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *d);
3844 KMP_EXPORT void *__kmpc_task_reduction_modifier_init(ident_t *loc, int gtid,
3845  int is_ws, int num,
3846  void *data);
3847 KMP_EXPORT void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int is_ws,
3848  int num, void *data);
3849 KMP_EXPORT void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid,
3850  int is_ws);
3851 KMP_EXPORT kmp_int32 __kmpc_omp_reg_task_with_affinity(
3852  ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 naffins,
3853  kmp_task_affinity_info_t *affin_list);
3854 #endif
3855 
3856 #endif
3857 
3858 /* Lock interface routines (fast versions with gtid passed in) */
3859 KMP_EXPORT void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid,
3860  void **user_lock);
3861 KMP_EXPORT void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid,
3862  void **user_lock);
3863 KMP_EXPORT void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid,
3864  void **user_lock);
3865 KMP_EXPORT void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid,
3866  void **user_lock);
3867 KMP_EXPORT void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock);
3868 KMP_EXPORT void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid,
3869  void **user_lock);
3870 KMP_EXPORT void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid,
3871  void **user_lock);
3872 KMP_EXPORT void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid,
3873  void **user_lock);
3874 KMP_EXPORT int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock);
3875 KMP_EXPORT int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid,
3876  void **user_lock);
3877 
3878 #if OMP_45_ENABLED
3879 KMP_EXPORT void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid,
3880  void **user_lock, uintptr_t hint);
3881 KMP_EXPORT void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
3882  void **user_lock,
3883  uintptr_t hint);
3884 #endif
3885 
3886 /* Interface to fast scalable reduce methods routines */
3887 
3888 KMP_EXPORT kmp_int32 __kmpc_reduce_nowait(
3889  ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
3890  void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
3891  kmp_critical_name *lck);
3892 KMP_EXPORT void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3893  kmp_critical_name *lck);
3894 KMP_EXPORT kmp_int32 __kmpc_reduce(
3895  ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
3896  void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
3897  kmp_critical_name *lck);
3898 KMP_EXPORT void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3899  kmp_critical_name *lck);
3900 
3901 /* Internal fast reduction routines */
3902 
3903 extern PACKED_REDUCTION_METHOD_T __kmp_determine_reduction_method(
3904  ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
3905  void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
3906  kmp_critical_name *lck);
3907 
3908 // this function is for testing set/get/determine reduce method
3909 KMP_EXPORT kmp_int32 __kmp_get_reduce_method(void);
3910 
3911 KMP_EXPORT kmp_uint64 __kmpc_get_taskid();
3912 KMP_EXPORT kmp_uint64 __kmpc_get_parent_taskid();
3913 
3914 // C++ port
3915 // missing 'extern "C"' declarations
3916 
3917 KMP_EXPORT kmp_int32 __kmpc_in_parallel(ident_t *loc);
3918 KMP_EXPORT void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid);
3919 KMP_EXPORT void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
3920  kmp_int32 num_threads);
3921 
3922 #if OMP_40_ENABLED
3923 KMP_EXPORT void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
3924  int proc_bind);
3925 KMP_EXPORT void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
3926  kmp_int32 num_teams,
3927  kmp_int32 num_threads);
3928 KMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc,
3929  kmpc_micro microtask, ...);
3930 #endif
3931 #if OMP_45_ENABLED
3932 struct kmp_dim { // loop bounds info casted to kmp_int64
3933  kmp_int64 lo; // lower
3934  kmp_int64 up; // upper
3935  kmp_int64 st; // stride
3936 };
3937 KMP_EXPORT void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
3938  kmp_int32 num_dims,
3939  const struct kmp_dim *dims);
3940 KMP_EXPORT void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid,
3941  const kmp_int64 *vec);
3942 KMP_EXPORT void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid,
3943  const kmp_int64 *vec);
3944 KMP_EXPORT void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
3945 #endif
3946 
3947 KMP_EXPORT void *__kmpc_threadprivate_cached(ident_t *loc, kmp_int32 global_tid,
3948  void *data, size_t size,
3949  void ***cache);
3950 
3951 // Symbols for MS mutual detection.
3952 extern int _You_must_link_with_exactly_one_OpenMP_library;
3953 extern int _You_must_link_with_Intel_OpenMP_library;
3954 #if KMP_OS_WINDOWS && (KMP_VERSION_MAJOR > 4)
3955 extern int _You_must_link_with_Microsoft_OpenMP_library;
3956 #endif
3957 
3958 // The routines below are not exported.
3959 // Consider making them 'static' in corresponding source files.
3960 void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
3961  void *data_addr, size_t pc_size);
3962 struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
3963  void *data_addr,
3964  size_t pc_size);
3965 void __kmp_threadprivate_resize_cache(int newCapacity);
3966 void __kmp_cleanup_threadprivate_caches();
3967 
3968 // ompc_, kmpc_ entries moved from omp.h.
3969 #if KMP_OS_WINDOWS
3970 #define KMPC_CONVENTION __cdecl
3971 #else
3972 #define KMPC_CONVENTION
3973 #endif
3974 
3975 #ifndef __OMP_H
3976 typedef enum omp_sched_t {
3977  omp_sched_static = 1,
3978  omp_sched_dynamic = 2,
3979  omp_sched_guided = 3,
3980  omp_sched_auto = 4
3981 } omp_sched_t;
3982 typedef void *kmp_affinity_mask_t;
3983 #endif
3984 
3985 KMP_EXPORT void KMPC_CONVENTION ompc_set_max_active_levels(int);
3986 KMP_EXPORT void KMPC_CONVENTION ompc_set_schedule(omp_sched_t, int);
3987 KMP_EXPORT int KMPC_CONVENTION ompc_get_ancestor_thread_num(int);
3988 KMP_EXPORT int KMPC_CONVENTION ompc_get_team_size(int);
3989 KMP_EXPORT int KMPC_CONVENTION
3990 kmpc_set_affinity_mask_proc(int, kmp_affinity_mask_t *);
3991 KMP_EXPORT int KMPC_CONVENTION
3992 kmpc_unset_affinity_mask_proc(int, kmp_affinity_mask_t *);
3993 KMP_EXPORT int KMPC_CONVENTION
3994 kmpc_get_affinity_mask_proc(int, kmp_affinity_mask_t *);
3995 
3996 KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize(int);
3997 KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize_s(size_t);
3998 KMP_EXPORT void KMPC_CONVENTION kmpc_set_library(int);
3999 KMP_EXPORT void KMPC_CONVENTION kmpc_set_defaults(char const *);
4000 KMP_EXPORT void KMPC_CONVENTION kmpc_set_disp_num_buffers(int);
4001 
4002 #if OMP_50_ENABLED
4003 enum kmp_target_offload_kind {
4004  tgt_disabled = 0,
4005  tgt_default = 1,
4006  tgt_mandatory = 2
4007 };
4008 typedef enum kmp_target_offload_kind kmp_target_offload_kind_t;
4009 // Set via OMP_TARGET_OFFLOAD if specified, defaults to tgt_default otherwise
4010 extern kmp_target_offload_kind_t __kmp_target_offload;
4011 extern int __kmpc_get_target_offload();
4012 #endif
4013 
4014 #if OMP_40_ENABLED
4015 // Constants used in libomptarget
4016 #define KMP_DEVICE_DEFAULT -1 // This is libomptarget's default device.
4017 #define KMP_HOST_DEVICE -10 // This is what it is in libomptarget, go figure.
4018 #define KMP_DEVICE_ALL -11 // This is libomptarget's "all devices".
4019 #endif // OMP_40_ENABLED
4020 
4021 #if OMP_50_ENABLED
4022 // OMP Pause Resource
4023 
4024 // The following enum is used both to set the status in __kmp_pause_status, and
4025 // as the internal equivalent of the externally-visible omp_pause_resource_t.
4026 typedef enum kmp_pause_status_t {
4027  kmp_not_paused = 0, // status is not paused, or, requesting resume
4028  kmp_soft_paused = 1, // status is soft-paused, or, requesting soft pause
4029  kmp_hard_paused = 2 // status is hard-paused, or, requesting hard pause
4030 } kmp_pause_status_t;
4031 
4032 // This stores the pause state of the runtime
4033 extern kmp_pause_status_t __kmp_pause_status;
4034 extern int __kmpc_pause_resource(kmp_pause_status_t level);
4035 extern int __kmp_pause_resource(kmp_pause_status_t level);
4036 // Soft resume sets __kmp_pause_status, and wakes up all threads.
4037 extern void __kmp_resume_if_soft_paused();
4038 // Hard resume simply resets the status to not paused. Library will appear to
4039 // be uninitialized after hard pause. Let OMP constructs trigger required
4040 // initializations.
4041 static inline void __kmp_resume_if_hard_paused() {
4042  if (__kmp_pause_status == kmp_hard_paused) {
4043  __kmp_pause_status = kmp_not_paused;
4044  }
4045 }
4046 #endif // OMP_50_ENABLED
4047 
4048 #ifdef __cplusplus
4049 }
4050 #endif
4051 
4052 #endif /* KMP_H */
KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid)
KMP_EXPORT kmp_int32 __kmpc_barrier_master(ident_t *, kmp_int32 global_tid)
kmp_int32 reserved_2
Definition: kmp.h:233
void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid)
KMP_EXPORT void __kmpc_end_single(ident_t *, kmp_int32 global_tid)
void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1440
KMP_EXPORT kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
KMP_EXPORT kmp_int32 __kmpc_global_thread_num(ident_t *)
int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st)
KMP_EXPORT void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void *(* kmpc_ctor_vec)(void *, size_t)
Definition: kmp.h:1481
KMP_EXPORT void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list)
kmp_int32 reserved_1
Definition: kmp.h:230
KMP_EXPORT void * __kmpc_threadprivate_cached(ident_t *loc, kmp_int32 global_tid, void *data, size_t size, void ***cache)
kmp_int32 reserved_3
Definition: kmp.h:238
KMP_EXPORT void __kmpc_flush(ident_t *)
void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk)
KMP_EXPORT kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid)
int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st)
KMP_EXPORT void __kmpc_end(ident_t *)
KMP_EXPORT void __kmpc_end_ordered(ident_t *, kmp_int32 global_tid)
void *(* kmpc_cctor_vec)(void *, void *, size_t)
Definition: kmp.h:1493
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_threadprivate_register(ident_t *, void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor)
KMP_EXPORT kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list)
KMP_EXPORT void __kmpc_begin(ident_t *, kmp_int32 flags)
KMP_EXPORT kmp_int32 __kmpc_bound_thread_num(ident_t *)
KMP_EXPORT kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st)
KMP_EXPORT void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
KMP_EXPORT void __kmpc_ordered(ident_t *, kmp_int32 global_tid)
void(* kmpc_dtor)(void *)
Definition: kmp.h:1464
sched_type
Definition: kmp.h:343
KMP_EXPORT void __kmpc_critical(ident_t *, kmp_int32 global_tid, kmp_critical_name *)
void *(* kmpc_cctor)(void *, void *)
Definition: kmp.h:1471
Definition: kmp.h:229
KMP_EXPORT void __kmpc_end_barrier_master(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_end_master(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
KMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void *(* kmpc_ctor)(void *)
Definition: kmp.h:1458
KMP_EXPORT kmp_int32 __kmpc_in_parallel(ident_t *loc)
KMP_EXPORT kmp_int32 __kmpc_ok_to_fork(ident_t *)
KMP_EXPORT kmp_int32 __kmpc_global_num_threads(ident_t *)
void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid)
KMP_EXPORT kmp_int32 __kmpc_bound_num_threads(ident_t *)
KMP_EXPORT void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid)
KMP_EXPORT void __kmpc_barrier(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
KMP_EXPORT void __kmpc_end_critical(ident_t *, kmp_int32 global_tid, kmp_critical_name *)
KMP_EXPORT void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk)
KMP_EXPORT kmp_int32 __kmpc_barrier_master_nowait(ident_t *, kmp_int32 global_tid)
int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st)
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, kmpc_micro microtask,...)
void(* kmpc_dtor_vec)(void *, size_t)
Definition: kmp.h:1487
KMP_EXPORT void __kmpc_threadprivate_register_vec(ident_t *, void *data, kmpc_ctor_vec ctor, kmpc_cctor_vec cctor, kmpc_dtor_vec dtor, size_t vector_length)
char const * psource
Definition: kmp.h:239
void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk)
kmp_int32 flags
Definition: kmp.h:231
struct ident ident_t