/build/source/openmp/runtime/src/kmp

Bug Summary

File:	build/source/openmp/runtime/src/kmp_affinity.cpp
Warning:	line 3321, column 5 Value stored to 'idx' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name kmp_affinity.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-16/lib/clang/16 -I projects/openmp/runtime/src -I /build/source/openmp/runtime/src -I include -I /build/source/llvm/include -I /build/source/openmp/runtime/src/i18n -I /build/source/openmp/runtime/src/include -I /build/source/openmp/runtime/src/thirdparty/ittnotify -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D omp_EXPORTS -D _FORTIFY_SOURCE=2 -D NDEBUG -D _GNU_SOURCE -D _REENTRANT -D _FORTIFY_SOURCE=2 -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-16/lib/clang/16/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/source/= -source-date-epoch 1670584389 -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -Wno-enum-constexpr-conversion -Wno-extra -Wno-pedantic -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-covered-switch-default -Wno-frame-address -Wno-strict-aliasing -Wno-stringop-truncation -Wno-switch -Wno-uninitialized -Wno-return-type-c-linkage -Wno-cast-qual -Wno-int-to-void-pointer-cast -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fno-rtti -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-12-09-134624-15957-1 -x c++ /build/source/openmp/runtime/src/kmp_affinity.cpp

1	/*
2	* kmp_affinity.cpp -- affinity management
3	*/
4
5	//===----------------------------------------------------------------------===//
6	//
7	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8	// See https://llvm.org/LICENSE.txt for license information.
9	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "kmp.h"
14	#include "kmp_affinity.h"
15	#include "kmp_i18n.h"
16	#include "kmp_io.h"
17	#include "kmp_str.h"
18	#include "kmp_wrapper_getpid.h"
19	#if KMP_USE_HIER_SCHED0
20	#include "kmp_dispatch_hier.h"
21	#endif
22	#if KMP_USE_HWLOC0
23	// Copied from hwloc
24	#define HWLOC_GROUP_KIND_INTEL_MODULE 102
25	#define HWLOC_GROUP_KIND_INTEL_TILE 103
26	#define HWLOC_GROUP_KIND_INTEL_DIE 104
27	#define HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP 220
28	#endif
29	#include <ctype.h>
30
31	// The machine topology
32	kmp_topology_t *__kmp_topology = nullptr;
33	// KMP_HW_SUBSET environment variable
34	kmp_hw_subset_t *__kmp_hw_subset = nullptr;
35
36	// Store the real or imagined machine hierarchy here
37	static hierarchy_info machine_hierarchy;
38
39	void __kmp_cleanup_hierarchy() { machine_hierarchy.fini(); }
40
41	void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
42	kmp_uint32 depth;
43	// The test below is true if affinity is available, but set to "none". Need to
44	// init on first use of hierarchical barrier.
45	if (TCR_1(machine_hierarchy.uninitialized)(machine_hierarchy.uninitialized))
46	machine_hierarchy.init(nproc);
47
48	// Adjust the hierarchy in case num threads exceeds original
49	if (nproc > machine_hierarchy.base_num_threads)
50	machine_hierarchy.resize(nproc);
51
52	depth = machine_hierarchy.depth;
53	KMP_DEBUG_ASSERT(depth > 0)if (!(depth > 0)) { __kmp_debug_assert("depth > 0", "openmp/runtime/src/kmp_affinity.cpp" , 53); };
54
55	thr_bar->depth = depth;
56	__kmp_type_convert(machine_hierarchy.numPerLevel[0] - 1,
57	&(thr_bar->base_leaf_kids));
58	thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
59	}
60
61	static int nCoresPerPkg, nPackages;
62	static int __kmp_nThreadsPerCore;
63	#ifndef KMP_DFLT_NTH_CORES
64	static int __kmp_ncores;
65	#endif
66
67	const char *__kmp_hw_get_catalog_string(kmp_hw_t type, bool plural) {
68	switch (type) {
69	case KMP_HW_SOCKET:
70	return ((plural) ? KMP_I18N_STR(Sockets)__kmp_i18n_catgets(kmp_i18n_str_Sockets) : KMP_I18N_STR(Socket)__kmp_i18n_catgets(kmp_i18n_str_Socket));
71	case KMP_HW_DIE:
72	return ((plural) ? KMP_I18N_STR(Dice)__kmp_i18n_catgets(kmp_i18n_str_Dice) : KMP_I18N_STR(Die)__kmp_i18n_catgets(kmp_i18n_str_Die));
73	case KMP_HW_MODULE:
74	return ((plural) ? KMP_I18N_STR(Modules)__kmp_i18n_catgets(kmp_i18n_str_Modules) : KMP_I18N_STR(Module)__kmp_i18n_catgets(kmp_i18n_str_Module));
75	case KMP_HW_TILE:
76	return ((plural) ? KMP_I18N_STR(Tiles)__kmp_i18n_catgets(kmp_i18n_str_Tiles) : KMP_I18N_STR(Tile)__kmp_i18n_catgets(kmp_i18n_str_Tile));
77	case KMP_HW_NUMA:
78	return ((plural) ? KMP_I18N_STR(NumaDomains)__kmp_i18n_catgets(kmp_i18n_str_NumaDomains) : KMP_I18N_STR(NumaDomain)__kmp_i18n_catgets(kmp_i18n_str_NumaDomain));
79	case KMP_HW_L3:
80	return ((plural) ? KMP_I18N_STR(L3Caches)__kmp_i18n_catgets(kmp_i18n_str_L3Caches) : KMP_I18N_STR(L3Cache)__kmp_i18n_catgets(kmp_i18n_str_L3Cache));
81	case KMP_HW_L2:
82	return ((plural) ? KMP_I18N_STR(L2Caches)__kmp_i18n_catgets(kmp_i18n_str_L2Caches) : KMP_I18N_STR(L2Cache)__kmp_i18n_catgets(kmp_i18n_str_L2Cache));
83	case KMP_HW_L1:
84	return ((plural) ? KMP_I18N_STR(L1Caches)__kmp_i18n_catgets(kmp_i18n_str_L1Caches) : KMP_I18N_STR(L1Cache)__kmp_i18n_catgets(kmp_i18n_str_L1Cache));
85	case KMP_HW_LLC:
86	return ((plural) ? KMP_I18N_STR(LLCaches)__kmp_i18n_catgets(kmp_i18n_str_LLCaches) : KMP_I18N_STR(LLCache)__kmp_i18n_catgets(kmp_i18n_str_LLCache));
87	case KMP_HW_CORE:
88	return ((plural) ? KMP_I18N_STR(Cores)__kmp_i18n_catgets(kmp_i18n_str_Cores) : KMP_I18N_STR(Core)__kmp_i18n_catgets(kmp_i18n_str_Core));
89	case KMP_HW_THREAD:
90	return ((plural) ? KMP_I18N_STR(Threads)__kmp_i18n_catgets(kmp_i18n_str_Threads) : KMP_I18N_STR(Thread)__kmp_i18n_catgets(kmp_i18n_str_Thread));
91	case KMP_HW_PROC_GROUP:
92	return ((plural) ? KMP_I18N_STR(ProcGroups)__kmp_i18n_catgets(kmp_i18n_str_ProcGroups) : KMP_I18N_STR(ProcGroup)__kmp_i18n_catgets(kmp_i18n_str_ProcGroup));
93	}
94	return KMP_I18N_STR(Unknown)__kmp_i18n_catgets(kmp_i18n_str_Unknown);
95	}
96
97	const char *__kmp_hw_get_keyword(kmp_hw_t type, bool plural) {
98	switch (type) {
99	case KMP_HW_SOCKET:
100	return ((plural) ? "sockets" : "socket");
101	case KMP_HW_DIE:
102	return ((plural) ? "dice" : "die");
103	case KMP_HW_MODULE:
104	return ((plural) ? "modules" : "module");
105	case KMP_HW_TILE:
106	return ((plural) ? "tiles" : "tile");
107	case KMP_HW_NUMA:
108	return ((plural) ? "numa_domains" : "numa_domain");
109	case KMP_HW_L3:
110	return ((plural) ? "l3_caches" : "l3_cache");
111	case KMP_HW_L2:
112	return ((plural) ? "l2_caches" : "l2_cache");
113	case KMP_HW_L1:
114	return ((plural) ? "l1_caches" : "l1_cache");
115	case KMP_HW_LLC:
116	return ((plural) ? "ll_caches" : "ll_cache");
117	case KMP_HW_CORE:
118	return ((plural) ? "cores" : "core");
119	case KMP_HW_THREAD:
120	return ((plural) ? "threads" : "thread");
121	case KMP_HW_PROC_GROUP:
122	return ((plural) ? "proc_groups" : "proc_group");
123	}
124	return ((plural) ? "unknowns" : "unknown");
125	}
126
127	const char *__kmp_hw_get_core_type_string(kmp_hw_core_type_t type) {
128	switch (type) {
129	case KMP_HW_CORE_TYPE_UNKNOWN:
130	return "unknown";
131	#if KMP_ARCH_X860 \|\| KMP_ARCH_X86_641
132	case KMP_HW_CORE_TYPE_ATOM:
133	return "Intel Atom(R) processor";
134	case KMP_HW_CORE_TYPE_CORE:
135	return "Intel(R) Core(TM) processor";
136	#endif
137	}
138	return "unknown";
139	}
140
141	#if KMP_AFFINITY_SUPPORTED1
142	// If affinity is supported, check the affinity
143	// verbose and warning flags before printing warning
144	#define KMP_AFF_WARNING(s, ...)if (s.flags.verbose \|\| (s.flags.warnings && (s.type != affinity_none))) { __kmp_msg(kmp_ms_warning, __kmp_msg_format (kmp_i18n_msg_...), __kmp_msg_null); } \
145	if (s.flags.verbose \|\| (s.flags.warnings && (s.type != affinity_none))) { \
146	KMP_WARNING(__VA_ARGS__)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg___VA_ARGS__ ), __kmp_msg_null); \
147	}
148	#else
149	#define KMP_AFF_WARNING(s, ...)if (s.flags.verbose \|\| (s.flags.warnings && (s.type != affinity_none))) { __kmp_msg(kmp_ms_warning, __kmp_msg_format (kmp_i18n_msg_...), __kmp_msg_null); } KMP_WARNING(__VA_ARGS__)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg___VA_ARGS__ ), __kmp_msg_null)
150	#endif
151
152	////////////////////////////////////////////////////////////////////////////////
153	// kmp_hw_thread_t methods
154	int kmp_hw_thread_t::compare_ids(const void a, const void b) {
155	const kmp_hw_thread_t ahwthread = (const kmp_hw_thread_t )a;
156	const kmp_hw_thread_t bhwthread = (const kmp_hw_thread_t )b;
157	int depth = __kmp_topology->get_depth();
158	for (int level = 0; level < depth; ++level) {
159	if (ahwthread->ids[level] < bhwthread->ids[level])
160	return -1;
161	else if (ahwthread->ids[level] > bhwthread->ids[level])
162	return 1;
163	}
164	if (ahwthread->os_id < bhwthread->os_id)
165	return -1;
166	else if (ahwthread->os_id > bhwthread->os_id)
167	return 1;
168	return 0;
169	}
170
171	#if KMP_AFFINITY_SUPPORTED1
172	int kmp_hw_thread_t::compare_compact(const void a, const void b) {
173	int i;
174	const kmp_hw_thread_t aa = (const kmp_hw_thread_t )a;
175	const kmp_hw_thread_t bb = (const kmp_hw_thread_t )b;
176	int depth = __kmp_topology->get_depth();
177	int compact = __kmp_topology->compact;
178	KMP_DEBUG_ASSERT(compact >= 0)if (!(compact >= 0)) { __kmp_debug_assert("compact >= 0" , "openmp/runtime/src/kmp_affinity.cpp", 178); };
179	KMP_DEBUG_ASSERT(compact <= depth)if (!(compact <= depth)) { __kmp_debug_assert("compact <= depth" , "openmp/runtime/src/kmp_affinity.cpp", 179); };
180	for (i = 0; i < compact; i++) {
181	int j = depth - i - 1;
182	if (aa->sub_ids[j] < bb->sub_ids[j])
183	return -1;
184	if (aa->sub_ids[j] > bb->sub_ids[j])
185	return 1;
186	}
187	for (; i < depth; i++) {
188	int j = i - compact;
189	if (aa->sub_ids[j] < bb->sub_ids[j])
190	return -1;
191	if (aa->sub_ids[j] > bb->sub_ids[j])
192	return 1;
193	}
194	return 0;
195	}
196	#endif
197
198	void kmp_hw_thread_t::print() const {
199	int depth = __kmp_topology->get_depth();
200	printf("%4d ", os_id);
201	for (int i = 0; i < depth; ++i) {
202	printf("%4d ", ids[i]);
203	}
204	if (attrs) {
205	if (attrs.is_core_type_valid())
206	printf(" (%s)", __kmp_hw_get_core_type_string(attrs.get_core_type()));
207	if (attrs.is_core_eff_valid())
208	printf(" (eff=%d)", attrs.get_core_eff());
209	}
210	printf("\n");
211	}
212
213	////////////////////////////////////////////////////////////////////////////////
214	// kmp_topology_t methods
215
216	// Add a layer to the topology based on the ids. Assume the topology
217	// is perfectly nested (i.e., so no object has more than one parent)
218	void kmp_topology_t::_insert_layer(kmp_hw_t type, const int *ids) {
219	// Figure out where the layer should go by comparing the ids of the current
220	// layers with the new ids
221	int target_layer;
222	int previous_id = kmp_hw_thread_t::UNKNOWN_ID;
223	int previous_new_id = kmp_hw_thread_t::UNKNOWN_ID;
224
225	// Start from the highest layer and work down to find target layer
226	// If new layer is equal to another layer then put the new layer above
227	for (target_layer = 0; target_layer < depth; ++target_layer) {
228	bool layers_equal = true;
229	bool strictly_above_target_layer = false;
230	for (int i = 0; i < num_hw_threads; ++i) {
231	int id = hw_threads[i].ids[target_layer];
232	int new_id = ids[i];
233	if (id != previous_id && new_id == previous_new_id) {
234	// Found the layer we are strictly above
235	strictly_above_target_layer = true;
236	layers_equal = false;
237	break;
238	} else if (id == previous_id && new_id != previous_new_id) {
239	// Found a layer we are below. Move to next layer and check.
240	layers_equal = false;
241	break;
242	}
243	previous_id = id;
244	previous_new_id = new_id;
245	}
246	if (strictly_above_target_layer \|\| layers_equal)
247	break;
248	}
249
250	// Found the layer we are above. Now move everything to accommodate the new
251	// layer. And put the new ids and type into the topology.
252	for (int i = depth - 1, j = depth; i >= target_layer; --i, --j)
253	types[j] = types[i];
254	types[target_layer] = type;
255	for (int k = 0; k < num_hw_threads; ++k) {
256	for (int i = depth - 1, j = depth; i >= target_layer; --i, --j)
257	hw_threads[k].ids[j] = hw_threads[k].ids[i];
258	hw_threads[k].ids[target_layer] = ids[k];
259	}
260	equivalent[type] = type;
261	depth++;
262	}
263
264	#if KMP_GROUP_AFFINITY0
265	// Insert the Windows Processor Group structure into the topology
266	void kmp_topology_t::_insert_windows_proc_groups() {
267	// Do not insert the processor group structure for a single group
268	if (__kmp_num_proc_groups == 1)
269	return;
270	kmp_affin_mask_t *mask;
271	int ids = (int )__kmp_allocate(sizeof(int) * num_hw_threads)___kmp_allocate((sizeof(int) * num_hw_threads), "openmp/runtime/src/kmp_affinity.cpp" , 271);
272	KMP_CPU_ALLOC(mask)(mask = __kmp_affinity_dispatch->allocate_mask());
273	for (int i = 0; i < num_hw_threads; ++i) {
274	KMP_CPU_ZERO(mask)(mask)->zero();
275	KMP_CPU_SET(hw_threads[i].os_id, mask)(mask)->set(hw_threads[i].os_id);
276	ids[i] = __kmp_get_proc_group(mask)(mask)->get_proc_group();
277	}
278	KMP_CPU_FREE(mask)__kmp_affinity_dispatch->deallocate_mask(mask);
279	_insert_layer(KMP_HW_PROC_GROUP, ids);
280	__kmp_free(ids)___kmp_free((ids), "openmp/runtime/src/kmp_affinity.cpp", 280 );
281	}
282	#endif
283
284	// Remove layers that don't add information to the topology.
285	// This is done by having the layer take on the id = UNKNOWN_ID (-1)
286	void kmp_topology_t::_remove_radix1_layers() {
287	int preference[KMP_HW_LAST];
288	int top_index1, top_index2;
289	// Set up preference associative array
290	preference[KMP_HW_SOCKET] = 110;
291	preference[KMP_HW_PROC_GROUP] = 100;
292	preference[KMP_HW_CORE] = 95;
293	preference[KMP_HW_THREAD] = 90;
294	preference[KMP_HW_NUMA] = 85;
295	preference[KMP_HW_DIE] = 80;
296	preference[KMP_HW_TILE] = 75;
297	preference[KMP_HW_MODULE] = 73;
298	preference[KMP_HW_L3] = 70;
299	preference[KMP_HW_L2] = 65;
300	preference[KMP_HW_L1] = 60;
301	preference[KMP_HW_LLC] = 5;
302	top_index1 = 0;
303	top_index2 = 1;
304	while (top_index1 < depth - 1 && top_index2 < depth) {
305	kmp_hw_t type1 = types[top_index1];
306	kmp_hw_t type2 = types[top_index2];
307	KMP_ASSERT_VALID_HW_TYPE(type1)if (!(type1 >= (kmp_hw_t)0 && type1 < KMP_HW_LAST )) { __kmp_debug_assert("type1 >= (kmp_hw_t)0 && type1 < KMP_HW_LAST" , "openmp/runtime/src/kmp_affinity.cpp", 307); };
308	KMP_ASSERT_VALID_HW_TYPE(type2)if (!(type2 >= (kmp_hw_t)0 && type2 < KMP_HW_LAST )) { __kmp_debug_assert("type2 >= (kmp_hw_t)0 && type2 < KMP_HW_LAST" , "openmp/runtime/src/kmp_affinity.cpp", 308); };
309	// Do not allow the three main topology levels (sockets, cores, threads) to
310	// be compacted down
311	if ((type1 == KMP_HW_THREAD \|\| type1 == KMP_HW_CORE \|\|
312	type1 == KMP_HW_SOCKET) &&
313	(type2 == KMP_HW_THREAD \|\| type2 == KMP_HW_CORE \|\|
314	type2 == KMP_HW_SOCKET)) {
315	top_index1 = top_index2++;
316	continue;
317	}
318	bool radix1 = true;
319	bool all_same = true;
320	int id1 = hw_threads[0].ids[top_index1];
321	int id2 = hw_threads[0].ids[top_index2];
322	int pref1 = preference[type1];
323	int pref2 = preference[type2];
324	for (int hwidx = 1; hwidx < num_hw_threads; ++hwidx) {
325	if (hw_threads[hwidx].ids[top_index1] == id1 &&
326	hw_threads[hwidx].ids[top_index2] != id2) {
327	radix1 = false;
328	break;
329	}
330	if (hw_threads[hwidx].ids[top_index2] != id2)
331	all_same = false;
332	id1 = hw_threads[hwidx].ids[top_index1];
333	id2 = hw_threads[hwidx].ids[top_index2];
334	}
335	if (radix1) {
336	// Select the layer to remove based on preference
337	kmp_hw_t remove_type, keep_type;
338	int remove_layer, remove_layer_ids;
339	if (pref1 > pref2) {
340	remove_type = type2;
341	remove_layer = remove_layer_ids = top_index2;
342	keep_type = type1;
343	} else {
344	remove_type = type1;
345	remove_layer = remove_layer_ids = top_index1;
346	keep_type = type2;
347	}
348	// If all the indexes for the second (deeper) layer are the same.
349	// e.g., all are zero, then make sure to keep the first layer's ids
350	if (all_same)
351	remove_layer_ids = top_index2;
352	// Remove radix one type by setting the equivalence, removing the id from
353	// the hw threads and removing the layer from types and depth
354	set_equivalent_type(remove_type, keep_type);
355	for (int idx = 0; idx < num_hw_threads; ++idx) {
356	kmp_hw_thread_t &hw_thread = hw_threads[idx];
357	for (int d = remove_layer_ids; d < depth - 1; ++d)
358	hw_thread.ids[d] = hw_thread.ids[d + 1];
359	}
360	for (int idx = remove_layer; idx < depth - 1; ++idx)
361	types[idx] = types[idx + 1];
362	depth--;
363	} else {
364	top_index1 = top_index2++;
365	}
366	}
367	KMP_ASSERT(depth > 0)if (!(depth > 0)) { __kmp_debug_assert("depth > 0", "openmp/runtime/src/kmp_affinity.cpp" , 367); };
368	}
369
370	void kmp_topology_t::_set_last_level_cache() {
371	if (get_equivalent_type(KMP_HW_L3) != KMP_HW_UNKNOWN)
372	set_equivalent_type(KMP_HW_LLC, KMP_HW_L3);
373	else if (get_equivalent_type(KMP_HW_L2) != KMP_HW_UNKNOWN)
374	set_equivalent_type(KMP_HW_LLC, KMP_HW_L2);
375	#if KMP_MIC_SUPPORTED((0 \|\| 1) && (1 \|\| 0))
376	else if (__kmp_mic_type == mic3) {
377	if (get_equivalent_type(KMP_HW_L2) != KMP_HW_UNKNOWN)
378	set_equivalent_type(KMP_HW_LLC, KMP_HW_L2);
379	else if (get_equivalent_type(KMP_HW_TILE) != KMP_HW_UNKNOWN)
380	set_equivalent_type(KMP_HW_LLC, KMP_HW_TILE);
381	// L2/Tile wasn't detected so just say L1
382	else
383	set_equivalent_type(KMP_HW_LLC, KMP_HW_L1);
384	}
385	#endif
386	else if (get_equivalent_type(KMP_HW_L1) != KMP_HW_UNKNOWN)
387	set_equivalent_type(KMP_HW_LLC, KMP_HW_L1);
388	// Fallback is to set last level cache to socket or core
389	if (get_equivalent_type(KMP_HW_LLC) == KMP_HW_UNKNOWN) {
390	if (get_equivalent_type(KMP_HW_SOCKET) != KMP_HW_UNKNOWN)
391	set_equivalent_type(KMP_HW_LLC, KMP_HW_SOCKET);
392	else if (get_equivalent_type(KMP_HW_CORE) != KMP_HW_UNKNOWN)
393	set_equivalent_type(KMP_HW_LLC, KMP_HW_CORE);
394	}
395	KMP_ASSERT(get_equivalent_type(KMP_HW_LLC) != KMP_HW_UNKNOWN)if (!(get_equivalent_type(KMP_HW_LLC) != KMP_HW_UNKNOWN)) { __kmp_debug_assert ("get_equivalent_type(KMP_HW_LLC) != KMP_HW_UNKNOWN", "openmp/runtime/src/kmp_affinity.cpp" , 395); };
396	}
397
398	// Gather the count of each topology layer and the ratio
399	void kmp_topology_t::_gather_enumeration_information() {
400	int previous_id[KMP_HW_LAST];
401	int max[KMP_HW_LAST];
402
403	for (int i = 0; i < depth; ++i) {
404	previous_id[i] = kmp_hw_thread_t::UNKNOWN_ID;
405	max[i] = 0;
406	count[i] = 0;
407	ratio[i] = 0;
408	}
409	int core_level = get_level(KMP_HW_CORE);
410	for (int i = 0; i < num_hw_threads; ++i) {
411	kmp_hw_thread_t &hw_thread = hw_threads[i];
412	for (int layer = 0; layer < depth; ++layer) {
413	int id = hw_thread.ids[layer];
414	if (id != previous_id[layer]) {
415	// Add an additional increment to each count
416	for (int l = layer; l < depth; ++l)
417	count[l]++;
418	// Keep track of topology layer ratio statistics
419	max[layer]++;
420	for (int l = layer + 1; l < depth; ++l) {
421	if (max[l] > ratio[l])
422	ratio[l] = max[l];
423	max[l] = 1;
424	}
425	// Figure out the number of different core types
426	// and efficiencies for hybrid CPUs
427	if (__kmp_is_hybrid_cpu() && core_level >= 0 && layer <= core_level) {
428	if (hw_thread.attrs.is_core_eff_valid() &&
429	hw_thread.attrs.core_eff >= num_core_efficiencies) {
430	// Because efficiencies can range from 0 to max efficiency - 1,
431	// the number of efficiencies is max efficiency + 1
432	num_core_efficiencies = hw_thread.attrs.core_eff + 1;
433	}
434	if (hw_thread.attrs.is_core_type_valid()) {
435	bool found = false;
436	for (int j = 0; j < num_core_types; ++j) {
437	if (hw_thread.attrs.get_core_type() == core_types[j]) {
438	found = true;
439	break;
440	}
441	}
442	if (!found) {
443	KMP_ASSERT(num_core_types < KMP_HW_MAX_NUM_CORE_TYPES)if (!(num_core_types < KMP_HW_MAX_NUM_CORE_TYPES)) { __kmp_debug_assert ("num_core_types < KMP_HW_MAX_NUM_CORE_TYPES", "openmp/runtime/src/kmp_affinity.cpp" , 443); };
444	core_types[num_core_types++] = hw_thread.attrs.get_core_type();
445	}
446	}
447	}
448	break;
449	}
450	}
451	for (int layer = 0; layer < depth; ++layer) {
452	previous_id[layer] = hw_thread.ids[layer];
453	}
454	}
455	for (int layer = 0; layer < depth; ++layer) {
456	if (max[layer] > ratio[layer])
457	ratio[layer] = max[layer];
458	}
459	}
460
461	int kmp_topology_t::_get_ncores_with_attr(const kmp_hw_attr_t &attr,
462	int above_level,
463	bool find_all) const {
464	int current, current_max;
465	int previous_id[KMP_HW_LAST];
466	for (int i = 0; i < depth; ++i)
467	previous_id[i] = kmp_hw_thread_t::UNKNOWN_ID;
468	int core_level = get_level(KMP_HW_CORE);
469	if (find_all)
470	above_level = -1;
471	KMP_ASSERT(above_level < core_level)if (!(above_level < core_level)) { __kmp_debug_assert("above_level < core_level" , "openmp/runtime/src/kmp_affinity.cpp", 471); };
472	current_max = 0;
473	current = 0;
474	for (int i = 0; i < num_hw_threads; ++i) {
475	kmp_hw_thread_t &hw_thread = hw_threads[i];
476	if (!find_all && hw_thread.ids[above_level] != previous_id[above_level]) {
477	if (current > current_max)
478	current_max = current;
479	current = hw_thread.attrs.contains(attr);
480	} else {
481	for (int level = above_level + 1; level <= core_level; ++level) {
482	if (hw_thread.ids[level] != previous_id[level]) {
483	if (hw_thread.attrs.contains(attr))
484	current++;
485	break;
486	}
487	}
488	}
489	for (int level = 0; level < depth; ++level)
490	previous_id[level] = hw_thread.ids[level];
491	}
492	if (current > current_max)
493	current_max = current;
494	return current_max;
495	}
496
497	// Find out if the topology is uniform
498	void kmp_topology_t::_discover_uniformity() {
499	int num = 1;
500	for (int level = 0; level < depth; ++level)
501	num *= ratio[level];
502	flags.uniform = (num == count[depth - 1]);
503	}
504
505	// Set all the sub_ids for each hardware thread
506	void kmp_topology_t::_set_sub_ids() {
507	int previous_id[KMP_HW_LAST];
508	int sub_id[KMP_HW_LAST];
509
510	for (int i = 0; i < depth; ++i) {
511	previous_id[i] = -1;
512	sub_id[i] = -1;
513	}
514	for (int i = 0; i < num_hw_threads; ++i) {
515	kmp_hw_thread_t &hw_thread = hw_threads[i];
516	// Setup the sub_id
517	for (int j = 0; j < depth; ++j) {
518	if (hw_thread.ids[j] != previous_id[j]) {
519	sub_id[j]++;
520	for (int k = j + 1; k < depth; ++k) {
521	sub_id[k] = 0;
522	}
523	break;
524	}
525	}
526	// Set previous_id
527	for (int j = 0; j < depth; ++j) {
528	previous_id[j] = hw_thread.ids[j];
529	}
530	// Set the sub_ids field
531	for (int j = 0; j < depth; ++j) {
532	hw_thread.sub_ids[j] = sub_id[j];
533	}
534	}
535	}
536
537	void kmp_topology_t::_set_globals() {
538	// Set nCoresPerPkg, nPackages, __kmp_nThreadsPerCore, __kmp_ncores
539	int core_level, thread_level, package_level;
540	package_level = get_level(KMP_HW_SOCKET);
541	#if KMP_GROUP_AFFINITY0
542	if (package_level == -1)
543	package_level = get_level(KMP_HW_PROC_GROUP);
544	#endif
545	core_level = get_level(KMP_HW_CORE);
546	thread_level = get_level(KMP_HW_THREAD);
547
548	KMP_ASSERT(core_level != -1)if (!(core_level != -1)) { __kmp_debug_assert("core_level != -1" , "openmp/runtime/src/kmp_affinity.cpp", 548); };
549	KMP_ASSERT(thread_level != -1)if (!(thread_level != -1)) { __kmp_debug_assert("thread_level != -1" , "openmp/runtime/src/kmp_affinity.cpp", 549); };
550
551	__kmp_nThreadsPerCore = calculate_ratio(thread_level, core_level);
552	if (package_level != -1) {
553	nCoresPerPkg = calculate_ratio(core_level, package_level);
554	nPackages = get_count(package_level);
555	} else {
556	// assume one socket
557	nCoresPerPkg = get_count(core_level);
558	nPackages = 1;
559	}
560	#ifndef KMP_DFLT_NTH_CORES
561	__kmp_ncores = get_count(core_level);
562	#endif
563	}
564
565	kmp_topology_t *kmp_topology_t::allocate(int nproc, int ndepth,
566	const kmp_hw_t *types) {
567	kmp_topology_t *retval;
568	// Allocate all data in one large allocation
569	size_t size = sizeof(kmp_topology_t) + sizeof(kmp_hw_thread_t) * nproc +
570	sizeof(int) * (size_t)KMP_HW_LAST * 3;
571	char bytes = (char )__kmp_allocate(size)___kmp_allocate((size), "openmp/runtime/src/kmp_affinity.cpp" , 571);
572	retval = (kmp_topology_t *)bytes;
573	if (nproc > 0) {
574	retval->hw_threads = (kmp_hw_thread_t *)(bytes + sizeof(kmp_topology_t));
575	} else {
576	retval->hw_threads = nullptr;
577	}
578	retval->num_hw_threads = nproc;
579	retval->depth = ndepth;
580	int *arr =
581	(int )(bytes + sizeof(kmp_topology_t) + sizeof(kmp_hw_thread_t) nproc);
582	retval->types = (kmp_hw_t *)arr;
583	retval->ratio = arr + (size_t)KMP_HW_LAST;
584	retval->count = arr + 2 * (size_t)KMP_HW_LAST;
585	retval->num_core_efficiencies = 0;
586	retval->num_core_types = 0;
587	retval->compact = 0;
588	for (int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i)
589	retval->core_types[i] = KMP_HW_CORE_TYPE_UNKNOWN;
590	KMP_FOREACH_HW_TYPE(type)for (kmp_hw_t type = (kmp_hw_t)0; type < KMP_HW_LAST; type = (kmp_hw_t)((int)type + 1)) { retval->equivalent[type] = KMP_HW_UNKNOWN; }
591	for (int i = 0; i < ndepth; ++i) {
592	retval->types[i] = types[i];
593	retval->equivalent[types[i]] = types[i];
594	}
595	return retval;
596	}
597
598	void kmp_topology_t::deallocate(kmp_topology_t *topology) {
599	if (topology)
600	__kmp_free(topology)___kmp_free((topology), "openmp/runtime/src/kmp_affinity.cpp" , 600);
601	}
602
603	bool kmp_topology_t::check_ids() const {
604	// Assume ids have been sorted
605	if (num_hw_threads == 0)
606	return true;
607	for (int i = 1; i < num_hw_threads; ++i) {
608	kmp_hw_thread_t &current_thread = hw_threads[i];
609	kmp_hw_thread_t &previous_thread = hw_threads[i - 1];
610	bool unique = false;
611	for (int j = 0; j < depth; ++j) {
612	if (previous_thread.ids[j] != current_thread.ids[j]) {
613	unique = true;
614	break;
615	}
616	}
617	if (unique)
618	continue;
619	return false;
620	}
621	return true;
622	}
623
624	void kmp_topology_t::dump() const {
625	printf("***********************\n");
626	printf("* __kmp_topology: *\n");
627	printf("***********************\n");
628	printf("* depth: %d\n", depth);
629
630	printf("* types: ");
631	for (int i = 0; i < depth; ++i)
632	printf("%15s ", __kmp_hw_get_keyword(types[i]));
633	printf("\n");
634
635	printf("* ratio: ");
636	for (int i = 0; i < depth; ++i) {
637	printf("%15d ", ratio[i]);
638	}
639	printf("\n");
640
641	printf("* count: ");
642	for (int i = 0; i < depth; ++i) {
643	printf("%15d ", count[i]);
644	}
645	printf("\n");
646
647	printf("* num_core_eff: %d\n", num_core_efficiencies);
648	printf("* num_core_types: %d\n", num_core_types);
649	printf("* core_types: ");
650	for (int i = 0; i < num_core_types; ++i)
651	printf("%3d ", core_types[i]);
652	printf("\n");
653
654	printf("* equivalent map:\n");
655	KMP_FOREACH_HW_TYPE(i)for (kmp_hw_t i = (kmp_hw_t)0; i < KMP_HW_LAST; i = (kmp_hw_t )((int)i + 1)) {
656	const char *key = __kmp_hw_get_keyword(i);
657	const char *value = __kmp_hw_get_keyword(equivalent[i]);
658	printf("%-15s -> %-15s\n", key, value);
659	}
660
661	printf("* uniform: %s\n", (is_uniform() ? "Yes" : "No"));
662
663	printf("* num_hw_threads: %d\n", num_hw_threads);
664	printf("* hw_threads:\n");
665	for (int i = 0; i < num_hw_threads; ++i) {
666	hw_threads[i].print();
667	}
668	printf("***********************\n");
669	}
670
671	void kmp_topology_t::print(const char *env_var) const {
672	kmp_str_buf_t buf;
673	int print_types_depth;
674	__kmp_str_buf_init(&buf){ (&buf)->str = (&buf)->bulk; (&buf)->size = sizeof((&buf)->bulk); (&buf)->used = 0; (& buf)->bulk[0] = 0; };
675	kmp_hw_t print_types[KMP_HW_LAST + 2];
676
677	// Num Available Threads
678	if (num_hw_threads) {
679	KMP_INFORM(AvailableOSProc, env_var, num_hw_threads)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc , env_var, num_hw_threads), __kmp_msg_null);
680	} else {
681	KMP_INFORM(AvailableOSProc, env_var, __kmp_xproc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc , env_var, __kmp_xproc), __kmp_msg_null);
682	}
683
684	// Uniform or not
685	if (is_uniform()) {
686	KMP_INFORM(Uniform, env_var)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Uniform , env_var), __kmp_msg_null);
687	} else {
688	KMP_INFORM(NonUniform, env_var)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_NonUniform , env_var), __kmp_msg_null);
689	}
690
691	// Equivalent types
692	KMP_FOREACH_HW_TYPE(type)for (kmp_hw_t type = (kmp_hw_t)0; type < KMP_HW_LAST; type = (kmp_hw_t)((int)type + 1)) {
693	kmp_hw_t eq_type = equivalent[type];
694	if (eq_type != KMP_HW_UNKNOWN && eq_type != type) {
695	KMP_INFORM(AffEqualTopologyTypes, env_var,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffEqualTopologyTypes , env_var, __kmp_hw_get_catalog_string(type), __kmp_hw_get_catalog_string (eq_type)), __kmp_msg_null)
696	__kmp_hw_get_catalog_string(type),__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffEqualTopologyTypes , env_var, __kmp_hw_get_catalog_string(type), __kmp_hw_get_catalog_string (eq_type)), __kmp_msg_null)
697	__kmp_hw_get_catalog_string(eq_type))__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffEqualTopologyTypes , env_var, __kmp_hw_get_catalog_string(type), __kmp_hw_get_catalog_string (eq_type)), __kmp_msg_null);
698	}
699	}
700
701	// Quick topology
702	KMP_ASSERT(depth > 0 && depth <= (int)KMP_HW_LAST)if (!(depth > 0 && depth <= (int)KMP_HW_LAST)) { __kmp_debug_assert("depth > 0 && depth <= (int)KMP_HW_LAST" , "openmp/runtime/src/kmp_affinity.cpp", 702); };
703	// Create a print types array that always guarantees printing
704	// the core and thread level
705	print_types_depth = 0;
706	for (int level = 0; level < depth; ++level)
707	print_types[print_types_depth++] = types[level];
708	if (equivalent[KMP_HW_CORE] != KMP_HW_CORE) {
709	// Force in the core level for quick topology
710	if (print_types[print_types_depth - 1] == KMP_HW_THREAD) {
711	// Force core before thread e.g., 1 socket X 2 threads/socket
712	// becomes 1 socket X 1 core/socket X 2 threads/socket
713	print_types[print_types_depth - 1] = KMP_HW_CORE;
714	print_types[print_types_depth++] = KMP_HW_THREAD;
715	} else {
716	print_types[print_types_depth++] = KMP_HW_CORE;
717	}
718	}
719	// Always put threads at very end of quick topology
720	if (equivalent[KMP_HW_THREAD] != KMP_HW_THREAD)
721	print_types[print_types_depth++] = KMP_HW_THREAD;
722
723	__kmp_str_buf_clear(&buf);
724	kmp_hw_t numerator_type;
725	kmp_hw_t denominator_type = KMP_HW_UNKNOWN;
726	int core_level = get_level(KMP_HW_CORE);
727	int ncores = get_count(core_level);
728
729	for (int plevel = 0, level = 0; plevel < print_types_depth; ++plevel) {
730	int c;
731	bool plural;
732	numerator_type = print_types[plevel];
733	KMP_ASSERT_VALID_HW_TYPE(numerator_type)if (!(numerator_type >= (kmp_hw_t)0 && numerator_type < KMP_HW_LAST)) { __kmp_debug_assert("numerator_type >= (kmp_hw_t)0 && numerator_type < KMP_HW_LAST" , "openmp/runtime/src/kmp_affinity.cpp", 733); };
734	if (equivalent[numerator_type] != numerator_type)
735	c = 1;
736	else
737	c = get_ratio(level++);
738	plural = (c > 1);
739	if (plevel == 0) {
740	__kmp_str_buf_print(&buf, "%d %s", c,
741	__kmp_hw_get_catalog_string(numerator_type, plural));
742	} else {
743	__kmp_str_buf_print(&buf, " x %d %s/%s", c,
744	__kmp_hw_get_catalog_string(numerator_type, plural),
745	__kmp_hw_get_catalog_string(denominator_type));
746	}
747	denominator_type = numerator_type;
748	}
749	KMP_INFORM(TopologyGeneric, env_var, buf.str, ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyGeneric , env_var, buf.str, ncores), __kmp_msg_null);
750
751	// Hybrid topology information
752	if (__kmp_is_hybrid_cpu()) {
753	for (int i = 0; i < num_core_types; ++i) {
754	kmp_hw_core_type_t core_type = core_types[i];
755	kmp_hw_attr_t attr;
756	attr.clear();
757	attr.set_core_type(core_type);
758	int ncores = get_ncores_with_attr(attr);
759	if (ncores > 0) {
760	KMP_INFORM(TopologyHybrid, env_var, ncores,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyHybrid , env_var, ncores, __kmp_hw_get_core_type_string(core_type)), __kmp_msg_null)
761	__kmp_hw_get_core_type_string(core_type))__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyHybrid , env_var, ncores, __kmp_hw_get_core_type_string(core_type)), __kmp_msg_null);
762	KMP_ASSERT(num_core_efficiencies <= KMP_HW_MAX_NUM_CORE_EFFS)if (!(num_core_efficiencies <= 8)) { __kmp_debug_assert("num_core_efficiencies <= KMP_HW_MAX_NUM_CORE_EFFS" , "openmp/runtime/src/kmp_affinity.cpp", 762); }
763	for (int eff = 0; eff < num_core_efficiencies; ++eff) {
764	attr.set_core_eff(eff);
765	int ncores_with_eff = get_ncores_with_attr(attr);
766	if (ncores_with_eff > 0) {
767	KMP_INFORM(TopologyHybridCoreEff, env_var, ncores_with_eff, eff)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyHybridCoreEff , env_var, ncores_with_eff, eff), __kmp_msg_null);
768	}
769	}
770	}
771	}
772	}
773
774	if (num_hw_threads <= 0) {
775	__kmp_str_buf_free(&buf);
776	return;
777	}
778
779	// Full OS proc to hardware thread map
780	KMP_INFORM(OSProcToPhysicalThreadMap, env_var)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_OSProcToPhysicalThreadMap , env_var), __kmp_msg_null);
781	for (int i = 0; i < num_hw_threads; i++) {
782	__kmp_str_buf_clear(&buf);
783	for (int level = 0; level < depth; ++level) {
784	kmp_hw_t type = types[level];
785	__kmp_str_buf_print(&buf, "%s ", __kmp_hw_get_catalog_string(type));
786	__kmp_str_buf_print(&buf, "%d ", hw_threads[i].ids[level]);
787	}
788	if (__kmp_is_hybrid_cpu())
789	__kmp_str_buf_print(
790	&buf, "(%s)",
791	__kmp_hw_get_core_type_string(hw_threads[i].attrs.get_core_type()));
792	KMP_INFORM(OSProcMapToPack, env_var, hw_threads[i].os_id, buf.str)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_OSProcMapToPack , env_var, hw_threads[i].os_id, buf.str), __kmp_msg_null);
793	}
794
795	__kmp_str_buf_free(&buf);
796	}
797
798	#if KMP_AFFINITY_SUPPORTED1
799	void kmp_topology_t::set_granularity(kmp_affinity_t &affinity) const {
800	const char *env_var = affinity.env_var;
801	// Set the number of affinity granularity levels
802	if (affinity.gran_levels < 0) {
803	kmp_hw_t gran_type = get_equivalent_type(affinity.gran);
804	// Check if user's granularity request is valid
805	if (gran_type == KMP_HW_UNKNOWN) {
806	// First try core, then thread, then package
807	kmp_hw_t gran_types[3] = {KMP_HW_CORE, KMP_HW_THREAD, KMP_HW_SOCKET};
808	for (auto g : gran_types) {
809	if (get_equivalent_type(g) != KMP_HW_UNKNOWN) {
810	gran_type = g;
811	break;
812	}
813	}
814	KMP_ASSERT(gran_type != KMP_HW_UNKNOWN)if (!(gran_type != KMP_HW_UNKNOWN)) { __kmp_debug_assert("gran_type != KMP_HW_UNKNOWN" , "openmp/runtime/src/kmp_affinity.cpp", 814); };
815	// Warn user what granularity setting will be used instead
816	KMP_AFF_WARNING(affinity, AffGranularityBad, env_var,if (affinity.flags.verbose \|\| (affinity.flags.warnings && (affinity.type != affinity_none))) { __kmp_msg(kmp_ms_warning , __kmp_msg_format(kmp_i18n_msg_AffGranularityBad, env_var, __kmp_hw_get_catalog_string (affinity.gran), __kmp_hw_get_catalog_string(gran_type)), __kmp_msg_null ); }
817	__kmp_hw_get_catalog_string(affinity.gran),if (affinity.flags.verbose \|\| (affinity.flags.warnings && (affinity.type != affinity_none))) { __kmp_msg(kmp_ms_warning , __kmp_msg_format(kmp_i18n_msg_AffGranularityBad, env_var, __kmp_hw_get_catalog_string (affinity.gran), __kmp_hw_get_catalog_string(gran_type)), __kmp_msg_null ); }
818	__kmp_hw_get_catalog_string(gran_type))if (affinity.flags.verbose \|\| (affinity.flags.warnings && (affinity.type != affinity_none))) { __kmp_msg(kmp_ms_warning , __kmp_msg_format(kmp_i18n_msg_AffGranularityBad, env_var, __kmp_hw_get_catalog_string (affinity.gran), __kmp_hw_get_catalog_string(gran_type)), __kmp_msg_null ); };
819	affinity.gran = gran_type;
820	}
821	#if KMP_GROUP_AFFINITY0
822	// If more than one processor group exists, and the level of
823	// granularity specified by the user is too coarse, then the
824	// granularity must be adjusted "down" to processor group affinity
825	// because threads can only exist within one processor group.
826	// For example, if a user sets granularity=socket and there are two
827	// processor groups that cover a socket, then the runtime must
828	// restrict the granularity down to the processor group level.
829	if (__kmp_num_proc_groups > 1) {
830	int gran_depth = get_level(gran_type);
831	int proc_group_depth = get_level(KMP_HW_PROC_GROUP);
832	if (gran_depth >= 0 && proc_group_depth >= 0 &&
833	gran_depth < proc_group_depth) {
834	KMP_AFF_WARNING(affinity, AffGranTooCoarseProcGroup, env_var,if (affinity.flags.verbose \|\| (affinity.flags.warnings && (affinity.type != affinity_none))) { __kmp_msg(kmp_ms_warning , __kmp_msg_format(kmp_i18n_msg_AffGranTooCoarseProcGroup, env_var , __kmp_hw_get_catalog_string(affinity.gran)), __kmp_msg_null ); }
835	__kmp_hw_get_catalog_string(affinity.gran))if (affinity.flags.verbose \|\| (affinity.flags.warnings && (affinity.type != affinity_none))) { __kmp_msg(kmp_ms_warning , __kmp_msg_format(kmp_i18n_msg_AffGranTooCoarseProcGroup, env_var , __kmp_hw_get_catalog_string(affinity.gran)), __kmp_msg_null ); };
836	affinity.gran = gran_type = KMP_HW_PROC_GROUP;
837	}
838	}
839	#endif
840	affinity.gran_levels = 0;
841	for (int i = depth - 1; i >= 0 && get_type(i) != gran_type; --i)
842	affinity.gran_levels++;
843	}
844	}
845	#endif
846
847	void kmp_topology_t::canonicalize() {
848	#if KMP_GROUP_AFFINITY0
849	_insert_windows_proc_groups();
850	#endif
851	_remove_radix1_layers();
852	_gather_enumeration_information();
853	_discover_uniformity();
854	_set_sub_ids();
855	_set_globals();
856	_set_last_level_cache();
857
858	#if KMP_MIC_SUPPORTED((0 \|\| 1) && (1 \|\| 0))
859	// Manually Add L2 = Tile equivalence
860	if (__kmp_mic_type == mic3) {
861	if (get_level(KMP_HW_L2) != -1)
862	set_equivalent_type(KMP_HW_TILE, KMP_HW_L2);
863	else if (get_level(KMP_HW_TILE) != -1)
864	set_equivalent_type(KMP_HW_L2, KMP_HW_TILE);
865	}
866	#endif
867
868	// Perform post canonicalization checking
869	KMP_ASSERT(depth > 0)if (!(depth > 0)) { __kmp_debug_assert("depth > 0", "openmp/runtime/src/kmp_affinity.cpp" , 869); };
870	for (int level = 0; level < depth; ++level) {
871	// All counts, ratios, and types must be valid
872	KMP_ASSERT(count[level] > 0 && ratio[level] > 0)if (!(count[level] > 0 && ratio[level] > 0)) { __kmp_debug_assert ("count[level] > 0 && ratio[level] > 0", "openmp/runtime/src/kmp_affinity.cpp" , 872); };
873	KMP_ASSERT_VALID_HW_TYPE(types[level])if (!(types[level] >= (kmp_hw_t)0 && types[level] < KMP_HW_LAST)) { __kmp_debug_assert("types[level] >= (kmp_hw_t)0 && types[level] < KMP_HW_LAST" , "openmp/runtime/src/kmp_affinity.cpp", 873); };
874	// Detected types must point to themselves
875	KMP_ASSERT(equivalent[types[level]] == types[level])if (!(equivalent[types[level]] == types[level])) { __kmp_debug_assert ("equivalent[types[level]] == types[level]", "openmp/runtime/src/kmp_affinity.cpp" , 875); };
876	}
877	}
878
879	// Canonicalize an explicit packages X cores/pkg X threads/core topology
880	void kmp_topology_t::canonicalize(int npackages, int ncores_per_pkg,
881	int nthreads_per_core, int ncores) {
882	int ndepth = 3;
883	depth = ndepth;
884	KMP_FOREACH_HW_TYPE(i)for (kmp_hw_t i = (kmp_hw_t)0; i < KMP_HW_LAST; i = (kmp_hw_t )((int)i + 1)) { equivalent[i] = KMP_HW_UNKNOWN; }
885	for (int level = 0; level < depth; ++level) {
886	count[level] = 0;
887	ratio[level] = 0;
888	}
889	count[0] = npackages;
890	count[1] = ncores;
891	count[2] = __kmp_xproc;
892	ratio[0] = npackages;
893	ratio[1] = ncores_per_pkg;
894	ratio[2] = nthreads_per_core;
895	equivalent[KMP_HW_SOCKET] = KMP_HW_SOCKET;
896	equivalent[KMP_HW_CORE] = KMP_HW_CORE;
897	equivalent[KMP_HW_THREAD] = KMP_HW_THREAD;
898	types[0] = KMP_HW_SOCKET;
899	types[1] = KMP_HW_CORE;
900	types[2] = KMP_HW_THREAD;
901	//__kmp_avail_proc = __kmp_xproc;
902	_discover_uniformity();
903	}
904
905	// Represents running sub IDs for a single core attribute where
906	// attribute values have SIZE possibilities.
907	template <size_t SIZE, typename IndexFunc> struct kmp_sub_ids_t {
908	int last_level; // last level in topology to consider for sub_ids
909	int sub_id[SIZE]; // The sub ID for a given attribute value
910	int prev_sub_id[KMP_HW_LAST];
911	IndexFunc indexer;
912
913	public:
914	kmp_sub_ids_t(int last_level) : last_level(last_level) {
915	KMP_ASSERT(last_level < KMP_HW_LAST)if (!(last_level < KMP_HW_LAST)) { __kmp_debug_assert("last_level < KMP_HW_LAST" , "openmp/runtime/src/kmp_affinity.cpp", 915); };
916	for (size_t i = 0; i < SIZE; ++i)
917	sub_id[i] = -1;
918	for (size_t i = 0; i < KMP_HW_LAST; ++i)
919	prev_sub_id[i] = -1;
920	}
921	void update(const kmp_hw_thread_t &hw_thread) {
922	int idx = indexer(hw_thread);
923	KMP_ASSERT(idx < (int)SIZE)if (!(idx < (int)SIZE)) { __kmp_debug_assert("idx < (int)SIZE" , "openmp/runtime/src/kmp_affinity.cpp", 923); };
924	for (int level = 0; level <= last_level; ++level) {
925	if (hw_thread.sub_ids[level] != prev_sub_id[level]) {
926	if (level < last_level)
927	sub_id[idx] = -1;
928	sub_id[idx]++;
929	break;
930	}
931	}
932	for (int level = 0; level <= last_level; ++level)
933	prev_sub_id[level] = hw_thread.sub_ids[level];
934	}
935	int get_sub_id(const kmp_hw_thread_t &hw_thread) const {
936	return sub_id[indexer(hw_thread)];
937	}
938	};
939
940	static kmp_str_buf_t *
941	__kmp_hw_get_catalog_core_string(const kmp_hw_attr_t &attr, kmp_str_buf_t *buf,
942	bool plural) {
943	__kmp_str_buf_init(buf){ (buf)->str = (buf)->bulk; (buf)->size = sizeof((buf )->bulk); (buf)->used = 0; (buf)->bulk[0] = 0; };
944	if (attr.is_core_type_valid())
945	__kmp_str_buf_print(buf, "%s %s",
946	__kmp_hw_get_core_type_string(attr.get_core_type()),
947	__kmp_hw_get_catalog_string(KMP_HW_CORE, plural));
948	else
949	__kmp_str_buf_print(buf, "%s eff=%d",
950	__kmp_hw_get_catalog_string(KMP_HW_CORE, plural),
951	attr.get_core_eff());
952	return buf;
953	}
954
955	// Apply the KMP_HW_SUBSET envirable to the topology
956	// Returns true if KMP_HW_SUBSET filtered any processors
957	// otherwise, returns false
958	bool kmp_topology_t::filter_hw_subset() {
959	// If KMP_HW_SUBSET wasn't requested, then do nothing.
960	if (!__kmp_hw_subset)
961	return false;
962
963	// First, sort the KMP_HW_SUBSET items by the machine topology
964	__kmp_hw_subset->sort();
965
966	// Check to see if KMP_HW_SUBSET is a valid subset of the detected topology
967	bool using_core_types = false;
968	bool using_core_effs = false;
969	int hw_subset_depth = __kmp_hw_subset->get_depth();
970	kmp_hw_t specified[KMP_HW_LAST];
971	int topology_levels = (int )KMP_ALLOCA(sizeof(int) * hw_subset_depth)__builtin_alloca (sizeof(int) * hw_subset_depth);
972	KMP_ASSERT(hw_subset_depth > 0)if (!(hw_subset_depth > 0)) { __kmp_debug_assert("hw_subset_depth > 0" , "openmp/runtime/src/kmp_affinity.cpp", 972); };
973	KMP_FOREACH_HW_TYPE(i)for (kmp_hw_t i = (kmp_hw_t)0; i < KMP_HW_LAST; i = (kmp_hw_t )((int)i + 1)) { specified[i] = KMP_HW_UNKNOWN; }
974	int core_level = get_level(KMP_HW_CORE);
975	for (int i = 0; i < hw_subset_depth; ++i) {
976	int max_count;
977	const kmp_hw_subset_t::item_t &item = __kmp_hw_subset->at(i);
978	int num = item.num[0];
979	int offset = item.offset[0];
980	kmp_hw_t type = item.type;
981	kmp_hw_t equivalent_type = equivalent[type];
982	int level = get_level(type);
983	topology_levels[i] = level;
984
985	// Check to see if current layer is in detected machine topology
986	if (equivalent_type != KMP_HW_UNKNOWN) {
987	__kmp_hw_subset->at(i).type = equivalent_type;
988	} else {
989	KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetNotExistGeneric,if (__kmp_affinity.flags.verbose \|\| (__kmp_affinity.flags.warnings && (__kmp_affinity.type != affinity_none))) { __kmp_msg (kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetNotExistGeneric , __kmp_hw_get_catalog_string(type)), __kmp_msg_null); }
990	__kmp_hw_get_catalog_string(type))if (__kmp_affinity.flags.verbose \|\| (__kmp_affinity.flags.warnings && (__kmp_affinity.type != affinity_none))) { __kmp_msg (kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetNotExistGeneric , __kmp_hw_get_catalog_string(type)), __kmp_msg_null); };
991	return false;
992	}
993
994	// Check to see if current layer has already been
995	// specified either directly or through an equivalent type
996	if (specified[equivalent_type] != KMP_HW_UNKNOWN) {
997	KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetEqvLayers,if (__kmp_affinity.flags.verbose \|\| (__kmp_affinity.flags.warnings && (__kmp_affinity.type != affinity_none))) { __kmp_msg (kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetEqvLayers , __kmp_hw_get_catalog_string(type), __kmp_hw_get_catalog_string (specified[equivalent_type])), __kmp_msg_null); }
998	__kmp_hw_get_catalog_string(type),if (__kmp_affinity.flags.verbose \|\| (__kmp_affinity.flags.warnings && (__kmp_affinity.type != affinity_none))) { __kmp_msg (kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetEqvLayers , __kmp_hw_get_catalog_string(type), __kmp_hw_get_catalog_string (specified[equivalent_type])), __kmp_msg_null); }
999	__kmp_hw_get_catalog_string(specified[equivalent_type]))if (__kmp_affinity.flags.verbose \|\| (__kmp_affinity.flags.warnings && (__kmp_affinity.type != affinity_none))) { __kmp_msg (kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetEqvLayers , __kmp_hw_get_catalog_string(type), __kmp_hw_get_catalog_string (specified[equivalent_type])), __kmp_msg_null); };
1000	return false;
1001	}
1002	specified[equivalent_type] = type;
1003
1004	// Check to see if each layer's num & offset parameters are valid
1005	max_count = get_ratio(level);
1006	if (max_count < 0 \|\|
1007	(num != kmp_hw_subset_t::USE_ALL && num + offset > max_count)) {
1008	bool plural = (num > 1);
1009	KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetManyGeneric,if (__kmp_affinity.flags.verbose \|\| (__kmp_affinity.flags.warnings && (__kmp_affinity.type != affinity_none))) { __kmp_msg (kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetManyGeneric , __kmp_hw_get_catalog_string(type, plural)), __kmp_msg_null) ; }
1010	__kmp_hw_get_catalog_string(type, plural))if (__kmp_affinity.flags.verbose \|\| (__kmp_affinity.flags.warnings && (__kmp_affinity.type != affinity_none))) { __kmp_msg (kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetManyGeneric , __kmp_hw_get_catalog_string(type, plural)), __kmp_msg_null) ; };
1011	return false;
1012	}
1013
1014	// Check to see if core attributes are consistent
1015	if (core_level == level) {
1016	// Determine which core attributes are specified
1017	for (int j = 0; j < item.num_attrs; ++j) {
1018	if (item.attr[j].is_core_type_valid())
1019	using_core_types = true;
1020	if (item.attr[j].is_core_eff_valid())
1021	using_core_effs = true;
1022	}
1023
1024	// Check if using a single core attribute on non-hybrid arch.
1025	// Do not ignore all of KMP_HW_SUBSET, just ignore the attribute.
1026	//
1027	// Check if using multiple core attributes on non-hyrbid arch.
1028	// Ignore all of KMP_HW_SUBSET if this is the case.
1029	if ((using_core_effs \|\| using_core_types) && !__kmp_is_hybrid_cpu()) {
1030	if (item.num_attrs == 1) {
1031	if (using_core_effs) {
1032	KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetIgnoringAttr,if (__kmp_affinity.flags.verbose \|\| (__kmp_affinity.flags.warnings && (__kmp_affinity.type != affinity_none))) { __kmp_msg (kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetIgnoringAttr , "efficiency"), __kmp_msg_null); }
1033	"efficiency")if (__kmp_affinity.flags.verbose \|\| (__kmp_affinity.flags.warnings && (__kmp_affinity.type != affinity_none))) { __kmp_msg (kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetIgnoringAttr , "efficiency"), __kmp_msg_null); };
1034	} else {
1035	KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetIgnoringAttr,if (__kmp_affinity.flags.verbose \|\| (__kmp_affinity.flags.warnings && (__kmp_affinity.type != affinity_none))) { __kmp_msg (kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetIgnoringAttr , "core_type"), __kmp_msg_null); }
1036	"core_type")if (__kmp_affinity.flags.verbose \|\| (__kmp_affinity.flags.warnings && (__kmp_affinity.type != affinity_none))) { __kmp_msg (kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetIgnoringAttr , "core_type"), __kmp_msg_null); };
1037	}
1038	using_core_effs = false;
1039	using_core_types = false;
1040	} else {
1041	KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetAttrsNonHybrid)if (__kmp_affinity.flags.verbose \|\| (__kmp_affinity.flags.warnings && (__kmp_affinity.type != affinity_none))) { __kmp_msg (kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetAttrsNonHybrid ), __kmp_msg_null); };
1042	return false;
1043	}
1044	}
1045
1046	// Check if using both core types and core efficiencies together
1047	if (using_core_types && using_core_effs) {
1048	KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetIncompat, "core_type",if (__kmp_affinity.flags.verbose \|\| (__kmp_affinity.flags.warnings && (__kmp_affinity.type != affinity_none))) { __kmp_msg (kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetIncompat , "core_type", "efficiency"), __kmp_msg_null); }
1049	"efficiency")if (__kmp_affinity.flags.verbose \|\| (__kmp_affinity.flags.warnings && (__kmp_affinity.type != affinity_none))) { __kmp_msg (kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetIncompat , "core_type", "efficiency"), __kmp_msg_null); };
1050	return false;
1051	}
1052
1053	// Check that core efficiency values are valid
1054	if (using_core_effs) {
1055	for (int j = 0; j < item.num_attrs; ++j) {
1056	if (item.attr[j].is_core_eff_valid()) {
1057	int core_eff = item.attr[j].get_core_eff();
1058	if (core_eff < 0 \|\| core_eff >= num_core_efficiencies) {
1059	kmp_str_buf_t buf;
1060	__kmp_str_buf_init(&buf){ (&buf)->str = (&buf)->bulk; (&buf)->size = sizeof((&buf)->bulk); (&buf)->used = 0; (& buf)->bulk[0] = 0; };
1061	__kmp_str_buf_print(&buf, "%d", item.attr[j].get_core_eff());
1062	__kmp_msg(kmp_ms_warning,
1063	KMP_MSG(AffHWSubsetAttrInvalid, "efficiency", buf.str)__kmp_msg_format(kmp_i18n_msg_AffHWSubsetAttrInvalid, "efficiency" , buf.str),
1064	KMP_HNT(ValidValuesRange, 0, num_core_efficiencies - 1)__kmp_msg_format(kmp_i18n_hnt_ValidValuesRange, 0, num_core_efficiencies - 1),
1065	__kmp_msg_null);
1066	__kmp_str_buf_free(&buf);
1067	return false;
1068	}
1069	}
1070	}
1071	}
1072
1073	// Check that the number of requested cores with attributes is valid
1074	if (using_core_types \|\| using_core_effs) {
1075	for (int j = 0; j < item.num_attrs; ++j) {
1076	int num = item.num[j];
1077	int offset = item.offset[j];
1078	int level_above = core_level - 1;
1079	if (level_above >= 0) {
1080	max_count = get_ncores_with_attr_per(item.attr[j], level_above);
1081	if (max_count <= 0 \|\|
1082	(num != kmp_hw_subset_t::USE_ALL && num + offset > max_count)) {
1083	kmp_str_buf_t buf;
1084	__kmp_hw_get_catalog_core_string(item.attr[j], &buf, num > 0);
1085	KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetManyGeneric, buf.str)if (__kmp_affinity.flags.verbose \|\| (__kmp_affinity.flags.warnings && (__kmp_affinity.type != affinity_none))) { __kmp_msg (kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetManyGeneric , buf.str), __kmp_msg_null); };
1086	__kmp_str_buf_free(&buf);
1087	return false;
1088	}
1089	}
1090	}
1091	}
1092
1093	if ((using_core_types \|\| using_core_effs) && item.num_attrs > 1) {
1094	for (int j = 0; j < item.num_attrs; ++j) {
1095	// Ambiguous use of specific core attribute + generic core
1096	// e.g., 4c & 3c:intel_core or 4c & 3c:eff1
1097	if (!item.attr[j]) {
1098	kmp_hw_attr_t other_attr;
1099	for (int k = 0; k < item.num_attrs; ++k) {
1100	if (item.attr[k] != item.attr[j]) {
1101	other_attr = item.attr[k];
1102	break;
1103	}
1104	}
1105	kmp_str_buf_t buf;
1106	__kmp_hw_get_catalog_core_string(other_attr, &buf, item.num[j] > 0);
1107	KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetIncompat,if (__kmp_affinity.flags.verbose \|\| (__kmp_affinity.flags.warnings && (__kmp_affinity.type != affinity_none))) { __kmp_msg (kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetIncompat , __kmp_hw_get_catalog_string(KMP_HW_CORE), buf.str), __kmp_msg_null ); }
1108	__kmp_hw_get_catalog_string(KMP_HW_CORE), buf.str)if (__kmp_affinity.flags.verbose \|\| (__kmp_affinity.flags.warnings && (__kmp_affinity.type != affinity_none))) { __kmp_msg (kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetIncompat , __kmp_hw_get_catalog_string(KMP_HW_CORE), buf.str), __kmp_msg_null ); };
1109	__kmp_str_buf_free(&buf);
1110	return false;
1111	}
1112	// Allow specifying a specific core type or core eff exactly once
1113	for (int k = 0; k < j; ++k) {
1114	if (!item.attr[j] \|\| !item.attr[k])
1115	continue;
1116	if (item.attr[k] == item.attr[j]) {
1117	kmp_str_buf_t buf;
1118	__kmp_hw_get_catalog_core_string(item.attr[j], &buf,
1119	item.num[j] > 0);
1120	KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetAttrRepeat, buf.str)if (__kmp_affinity.flags.verbose \|\| (__kmp_affinity.flags.warnings && (__kmp_affinity.type != affinity_none))) { __kmp_msg (kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetAttrRepeat , buf.str), __kmp_msg_null); };
1121	__kmp_str_buf_free(&buf);
1122	return false;
1123	}
1124	}
1125	}
1126	}
1127	}
1128	}
1129
1130	struct core_type_indexer {
1131	int operator()(const kmp_hw_thread_t &t) const {
1132	switch (t.attrs.get_core_type()) {
1133	#if KMP_ARCH_X860 \|\| KMP_ARCH_X86_641
1134	case KMP_HW_CORE_TYPE_ATOM:
1135	return 1;
1136	case KMP_HW_CORE_TYPE_CORE:
1137	return 2;
1138	#endif
1139	case KMP_HW_CORE_TYPE_UNKNOWN:
1140	return 0;
1141	}
1142	KMP_ASSERT(0)if (!(0)) { __kmp_debug_assert("0", "openmp/runtime/src/kmp_affinity.cpp" , 1142); };
1143	return 0;
1144	}
1145	};
1146	struct core_eff_indexer {
1147	int operator()(const kmp_hw_thread_t &t) const {
1148	return t.attrs.get_core_eff();
1149	}
1150	};
1151
1152	kmp_sub_ids_t<KMP_HW_MAX_NUM_CORE_TYPES, core_type_indexer> core_type_sub_ids(
1153	core_level);
1154	kmp_sub_ids_t<KMP_HW_MAX_NUM_CORE_EFFS8, core_eff_indexer> core_eff_sub_ids(
1155	core_level);
1156
1157	// Determine which hardware threads should be filtered.
1158	int num_filtered = 0;
1159	bool filtered = (bool )__kmp_allocate(sizeof(bool) * num_hw_threads)___kmp_allocate((sizeof(bool) * num_hw_threads), "openmp/runtime/src/kmp_affinity.cpp" , 1159);
1160	for (int i = 0; i < num_hw_threads; ++i) {
1161	kmp_hw_thread_t &hw_thread = hw_threads[i];
1162	// Update type_sub_id
1163	if (using_core_types)
1164	core_type_sub_ids.update(hw_thread);
1165	if (using_core_effs)
1166	core_eff_sub_ids.update(hw_thread);
1167
1168	// Check to see if this hardware thread should be filtered
1169	bool should_be_filtered = false;
1170	for (int hw_subset_index = 0; hw_subset_index < hw_subset_depth;
1171	++hw_subset_index) {
1172	const auto &hw_subset_item = __kmp_hw_subset->at(hw_subset_index);
1173	int level = topology_levels[hw_subset_index];
1174	if (level == -1)
1175	continue;
1176	if ((using_core_effs \|\| using_core_types) && level == core_level) {
1177	// Look for the core attribute in KMP_HW_SUBSET which corresponds
1178	// to this hardware thread's core attribute. Use this num,offset plus
1179	// the running sub_id for the particular core attribute of this hardware
1180	// thread to determine if the hardware thread should be filtered or not.
1181	int attr_idx;
1182	kmp_hw_core_type_t core_type = hw_thread.attrs.get_core_type();
1183	int core_eff = hw_thread.attrs.get_core_eff();
1184	for (attr_idx = 0; attr_idx < hw_subset_item.num_attrs; ++attr_idx) {
1185	if (using_core_types &&
1186	hw_subset_item.attr[attr_idx].get_core_type() == core_type)
1187	break;
1188	if (using_core_effs &&
1189	hw_subset_item.attr[attr_idx].get_core_eff() == core_eff)
1190	break;
1191	}
1192	// This core attribute isn't in the KMP_HW_SUBSET so always filter it.
1193	if (attr_idx == hw_subset_item.num_attrs) {
1194	should_be_filtered = true;
1195	break;
1196	}
1197	int sub_id;
1198	int num = hw_subset_item.num[attr_idx];
1199	int offset = hw_subset_item.offset[attr_idx];
1200	if (using_core_types)
1201	sub_id = core_type_sub_ids.get_sub_id(hw_thread);
1202	else
1203	sub_id = core_eff_sub_ids.get_sub_id(hw_thread);
1204	if (sub_id < offset \|\|
1205	(num != kmp_hw_subset_t::USE_ALL && sub_id >= offset + num)) {
1206	should_be_filtered = true;
1207	break;
1208	}
1209	} else {
1210	int num = hw_subset_item.num[0];
1211	int offset = hw_subset_item.offset[0];
1212	if (hw_thread.sub_ids[level] < offset \|\|
1213	(num != kmp_hw_subset_t::USE_ALL &&
1214	hw_thread.sub_ids[level] >= offset + num)) {
1215	should_be_filtered = true;
1216	break;
1217	}
1218	}
1219	}
1220	// Collect filtering information
1221	filtered[i] = should_be_filtered;
1222	if (should_be_filtered)
1223	num_filtered++;
1224	}
1225
1226	// One last check that we shouldn't allow filtering entire machine
1227	if (num_filtered == num_hw_threads) {
1228	KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetAllFiltered)if (__kmp_affinity.flags.verbose \|\| (__kmp_affinity.flags.warnings && (__kmp_affinity.type != affinity_none))) { __kmp_msg (kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetAllFiltered ), __kmp_msg_null); };
1229	__kmp_free(filtered)___kmp_free((filtered), "openmp/runtime/src/kmp_affinity.cpp" , 1229);
1230	return false;
1231	}
1232
1233	// Apply the filter
1234	int new_index = 0;
1235	for (int i = 0; i < num_hw_threads; ++i) {
1236	if (!filtered[i]) {
1237	if (i != new_index)
1238	hw_threads[new_index] = hw_threads[i];
1239	new_index++;
1240	} else {
1241	#if KMP_AFFINITY_SUPPORTED1
1242	KMP_CPU_CLR(hw_threads[i].os_id, __kmp_affin_fullMask)(__kmp_affin_fullMask)->clear(hw_threads[i].os_id);
1243	#endif
1244	__kmp_avail_proc--;
1245	}
1246	}
1247
1248	KMP_DEBUG_ASSERT(new_index <= num_hw_threads)if (!(new_index <= num_hw_threads)) { __kmp_debug_assert("new_index <= num_hw_threads" , "openmp/runtime/src/kmp_affinity.cpp", 1248); };
1249	num_hw_threads = new_index;
1250
1251	// Post hardware subset canonicalization
1252	_gather_enumeration_information();
1253	_discover_uniformity();
1254	_set_globals();
1255	_set_last_level_cache();
1256	__kmp_free(filtered)___kmp_free((filtered), "openmp/runtime/src/kmp_affinity.cpp" , 1256);
1257	return true;
1258	}
1259
1260	bool kmp_topology_t::is_close(int hwt1, int hwt2, int hw_level) const {
1261	if (hw_level >= depth)
1262	return true;
1263	bool retval = true;
1264	const kmp_hw_thread_t &t1 = hw_threads[hwt1];
1265	const kmp_hw_thread_t &t2 = hw_threads[hwt2];
1266	for (int i = 0; i < (depth - hw_level); ++i) {
1267	if (t1.ids[i] != t2.ids[i])
1268	return false;
1269	}
1270	return retval;
1271	}
1272
1273	////////////////////////////////////////////////////////////////////////////////
1274
1275	#if KMP_AFFINITY_SUPPORTED1
1276	class kmp_affinity_raii_t {
1277	kmp_affin_mask_t *mask;
1278	bool restored;
1279
1280	public:
1281	kmp_affinity_raii_t() : restored(false) {
1282	KMP_CPU_ALLOC(mask)(mask = __kmp_affinity_dispatch->allocate_mask());
1283	KMP_ASSERT(mask != NULL)if (!(mask != __null)) { __kmp_debug_assert("mask != NULL", "openmp/runtime/src/kmp_affinity.cpp" , 1283); };
1284	__kmp_get_system_affinity(mask, TRUE)(mask)->get_system_affinity((!0));
1285	}
1286	void restore() {
1287	__kmp_set_system_affinity(mask, TRUE)(mask)->set_system_affinity((!0));
1288	KMP_CPU_FREE(mask)__kmp_affinity_dispatch->deallocate_mask(mask);
1289	restored = true;
1290	}
1291	~kmp_affinity_raii_t() {
1292	if (!restored) {
1293	__kmp_set_system_affinity(mask, TRUE)(mask)->set_system_affinity((!0));
1294	KMP_CPU_FREE(mask)__kmp_affinity_dispatch->deallocate_mask(mask);
1295	}
1296	}
1297	};
1298
1299	bool KMPAffinity::picked_api = false;
1300
1301	void *KMPAffinity::Mask::operator new(size_t n) { return __kmp_allocate(n)___kmp_allocate((n), "openmp/runtime/src/kmp_affinity.cpp", 1301 ); }
1302	void *KMPAffinity::Mask::operator new[](size_t n) { return __kmp_allocate(n)___kmp_allocate((n), "openmp/runtime/src/kmp_affinity.cpp", 1302 ); }
1303	void KMPAffinity::Mask::operator delete(void *p) { __kmp_free(p)___kmp_free((p), "openmp/runtime/src/kmp_affinity.cpp", 1303); }
1304	void KMPAffinity::Mask::operator delete[](void *p) { __kmp_free(p)___kmp_free((p), "openmp/runtime/src/kmp_affinity.cpp", 1304); }
1305	void *KMPAffinity::operator new(size_t n) { return __kmp_allocate(n)___kmp_allocate((n), "openmp/runtime/src/kmp_affinity.cpp", 1305 ); }
1306	void KMPAffinity::operator delete(void *p) { __kmp_free(p)___kmp_free((p), "openmp/runtime/src/kmp_affinity.cpp", 1306); }
1307
1308	void KMPAffinity::pick_api() {
1309	KMPAffinity *affinity_dispatch;
1310	if (picked_api)
1311	return;
1312	#if KMP_USE_HWLOC0
1313	// Only use Hwloc if affinity isn't explicitly disabled and
1314	// user requests Hwloc topology method
1315	if (__kmp_affinity_top_method == affinity_top_method_hwloc &&
1316	__kmp_affinity.type != affinity_disabled) {
1317	affinity_dispatch = new KMPHwlocAffinity();
1318	} else
1319	#endif
1320	{
1321	affinity_dispatch = new KMPNativeAffinity();
1322	}
1323	__kmp_affinity_dispatch = affinity_dispatch;
1324	picked_api = true;
1325	}
1326
1327	void KMPAffinity::destroy_api() {
1328	if (__kmp_affinity_dispatch != NULL__null) {
1329	delete __kmp_affinity_dispatch;
1330	__kmp_affinity_dispatch = NULL__null;
1331	picked_api = false;
1332	}
1333	}
1334
1335	#define KMP_ADVANCE_SCAN(scan) \
1336	while (*scan != '\0') { \
1337	scan++; \
1338	}
1339
1340	// Print the affinity mask to the character array in a pretty format.
1341	// The format is a comma separated list of non-negative integers or integer
1342	// ranges: e.g., 1,2,3-5,7,9-15
1343	// The format can also be the string "{<empty>}" if no bits are set in mask
1344	char __kmp_affinity_print_mask(char buf, int buf_len,
1345	kmp_affin_mask_t *mask) {
1346	int start = 0, finish = 0, previous = 0;
1347	bool first_range;
1348	KMP_ASSERT(buf)if (!(buf)) { __kmp_debug_assert("buf", "openmp/runtime/src/kmp_affinity.cpp" , 1348); };
1349	KMP_ASSERT(buf_len >= 40)if (!(buf_len >= 40)) { __kmp_debug_assert("buf_len >= 40" , "openmp/runtime/src/kmp_affinity.cpp", 1349); };
1350	KMP_ASSERT(mask)if (!(mask)) { __kmp_debug_assert("mask", "openmp/runtime/src/kmp_affinity.cpp" , 1350); };
1351	char *scan = buf;
1352	char *end = buf + buf_len - 1;
1353
1354	// Check for empty set.
1355	if (mask->begin() == mask->end()) {
1356	KMP_SNPRINTFsnprintf(scan, end - scan + 1, "{<empty>}");
1357	KMP_ADVANCE_SCAN(scan);
1358	KMP_ASSERT(scan <= end)if (!(scan <= end)) { __kmp_debug_assert("scan <= end", "openmp/runtime/src/kmp_affinity.cpp", 1358); };
1359	return buf;
1360	}
1361
1362	first_range = true;
1363	start = mask->begin();
1364	while (1) {
1365	// Find next range
1366	// [start, previous] is inclusive range of contiguous bits in mask
1367	for (finish = mask->next(start), previous = start;
1368	finish == previous + 1 && finish != mask->end();
1369	finish = mask->next(finish)) {
1370	previous = finish;
1371	}
1372
1373	// The first range does not need a comma printed before it, but the rest
1374	// of the ranges do need a comma beforehand
1375	if (!first_range) {
1376	KMP_SNPRINTFsnprintf(scan, end - scan + 1, "%s", ",");
1377	KMP_ADVANCE_SCAN(scan);
1378	} else {
1379	first_range = false;
1380	}
1381	// Range with three or more contiguous bits in the affinity mask
1382	if (previous - start > 1) {
1383	KMP_SNPRINTFsnprintf(scan, end - scan + 1, "%u-%u", start, previous);
1384	} else {
1385	// Range with one or two contiguous bits in the affinity mask
1386	KMP_SNPRINTFsnprintf(scan, end - scan + 1, "%u", start);
1387	KMP_ADVANCE_SCAN(scan);
1388	if (previous - start > 0) {
1389	KMP_SNPRINTFsnprintf(scan, end - scan + 1, ",%u", previous);
1390	}
1391	}
1392	KMP_ADVANCE_SCAN(scan);
1393	// Start over with new start point
1394	start = finish;
1395	if (start == mask->end())
1396	break;
1397	// Check for overflow
1398	if (end - scan < 2)
1399	break;
1400	}
1401
1402	// Check for overflow
1403	KMP_ASSERT(scan <= end)if (!(scan <= end)) { __kmp_debug_assert("scan <= end", "openmp/runtime/src/kmp_affinity.cpp", 1403); };
1404	return buf;
1405	}
1406	#undef KMP_ADVANCE_SCAN
1407
1408	// Print the affinity mask to the string buffer object in a pretty format
1409	// The format is a comma separated list of non-negative integers or integer
1410	// ranges: e.g., 1,2,3-5,7,9-15
1411	// The format can also be the string "{<empty>}" if no bits are set in mask
1412	kmp_str_buf_t __kmp_affinity_str_buf_mask(kmp_str_buf_t buf,
1413	kmp_affin_mask_t *mask) {
1414	int start = 0, finish = 0, previous = 0;
1415	bool first_range;
1416	KMP_ASSERT(buf)if (!(buf)) { __kmp_debug_assert("buf", "openmp/runtime/src/kmp_affinity.cpp" , 1416); };
1417	KMP_ASSERT(mask)if (!(mask)) { __kmp_debug_assert("mask", "openmp/runtime/src/kmp_affinity.cpp" , 1417); };
1418
1419	__kmp_str_buf_clear(buf);
1420
1421	// Check for empty set.
1422	if (mask->begin() == mask->end()) {
1423	__kmp_str_buf_print(buf, "%s", "{<empty>}");
1424	return buf;
1425	}
1426
1427	first_range = true;
1428	start = mask->begin();
1429	while (1) {
1430	// Find next range
1431	// [start, previous] is inclusive range of contiguous bits in mask
1432	for (finish = mask->next(start), previous = start;
1433	finish == previous + 1 && finish != mask->end();
1434	finish = mask->next(finish)) {
1435	previous = finish;
1436	}
1437
1438	// The first range does not need a comma printed before it, but the rest
1439	// of the ranges do need a comma beforehand
1440	if (!first_range) {
1441	__kmp_str_buf_print(buf, "%s", ",");
1442	} else {
1443	first_range = false;
1444	}
1445	// Range with three or more contiguous bits in the affinity mask
1446	if (previous - start > 1) {
1447	__kmp_str_buf_print(buf, "%u-%u", start, previous);
1448	} else {
1449	// Range with one or two contiguous bits in the affinity mask
1450	__kmp_str_buf_print(buf, "%u", start);
1451	if (previous - start > 0) {
1452	__kmp_str_buf_print(buf, ",%u", previous);
1453	}
1454	}
1455	// Start over with new start point
1456	start = finish;
1457	if (start == mask->end())
1458	break;
1459	}
1460	return buf;
1461	}
1462
1463	// Return (possibly empty) affinity mask representing the offline CPUs
1464	// Caller must free the mask
1465	kmp_affin_mask_t *__kmp_affinity_get_offline_cpus() {
1466	kmp_affin_mask_t *offline;
1467	KMP_CPU_ALLOC(offline)(offline = __kmp_affinity_dispatch->allocate_mask());
1468	KMP_CPU_ZERO(offline)(offline)->zero();
1469	#if KMP_OS_LINUX1
1470	int n, begin_cpu, end_cpu;
1471	kmp_safe_raii_file_t offline_file;
1472	auto skip_ws = [](FILE *f) {
1473	int c;
1474	do {
1475	c = fgetc(f);
1476	} while (isspace(c));
1477	if (c != EOF(-1))
1478	ungetc(c, f);
1479	};
1480	// File contains CSV of integer ranges representing the offline CPUs
1481	// e.g., 1,2,4-7,9,11-15
1482	int status = offline_file.try_open("/sys/devices/system/cpu/offline", "r");
1483	if (status != 0)
1484	return offline;
1485	while (!feof(offline_file)) {
1486	skip_ws(offline_file);
1487	n = fscanf(offline_file, "%d", &begin_cpu);
1488	if (n != 1)
1489	break;
1490	skip_ws(offline_file);
1491	int c = fgetc(offline_file);
1492	if (c == EOF(-1) \|\| c == ',') {
1493	// Just single CPU
1494	end_cpu = begin_cpu;
1495	} else if (c == '-') {
1496	// Range of CPUs
1497	skip_ws(offline_file);
1498	n = fscanf(offline_file, "%d", &end_cpu);
1499	if (n != 1)
1500	break;
1501	skip_ws(offline_file);
1502	c = fgetc(offline_file); // skip ','
1503	} else {
1504	// Syntax problem
1505	break;
1506	}
1507	// Ensure a valid range of CPUs
1508	if (begin_cpu < 0 \|\| begin_cpu >= __kmp_xproc \|\| end_cpu < 0 \|\|
1509	end_cpu >= __kmp_xproc \|\| begin_cpu > end_cpu) {
1510	continue;
1511	}
1512	// Insert [begin_cpu, end_cpu] into offline mask
1513	for (int cpu = begin_cpu; cpu <= end_cpu; ++cpu) {
1514	KMP_CPU_SET(cpu, offline)(offline)->set(cpu);
1515	}
1516	}
1517	#endif
1518	return offline;
1519	}
1520
1521	// Return the number of available procs
1522	int __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) {
1523	int avail_proc = 0;
1524	KMP_CPU_ZERO(mask)(mask)->zero();
1525
1526	#if KMP_GROUP_AFFINITY0
1527
1528	if (__kmp_num_proc_groups > 1) {
1529	int group;
1530	KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL)if (!(__kmp_GetActiveProcessorCount != __null)) { __kmp_debug_assert ("__kmp_GetActiveProcessorCount != __null", "openmp/runtime/src/kmp_affinity.cpp" , 1530); };
1531	for (group = 0; group < __kmp_num_proc_groups; group++) {
1532	int i;
1533	int num = __kmp_GetActiveProcessorCount(group);
1534	for (i = 0; i < num; i++) {
1535	KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask)(mask)->set(i + group * (8 * sizeof(DWORD_PTR)));
1536	avail_proc++;
1537	}
1538	}
1539	} else
1540
1541	#endif /* KMP_GROUP_AFFINITY */
1542
1543	{
1544	int proc;
1545	kmp_affin_mask_t *offline_cpus = __kmp_affinity_get_offline_cpus();
1546	for (proc = 0; proc < __kmp_xproc; proc++) {
1547	// Skip offline CPUs
1548	if (KMP_CPU_ISSET(proc, offline_cpus)(offline_cpus)->is_set(proc))
1549	continue;
1550	KMP_CPU_SET(proc, mask)(mask)->set(proc);
1551	avail_proc++;
1552	}
1553	KMP_CPU_FREE(offline_cpus)__kmp_affinity_dispatch->deallocate_mask(offline_cpus);
1554	}
1555
1556	return avail_proc;
1557	}
1558
1559	// All of the __kmp_affinity_create_*_map() routines should allocate the
1560	// internal topology object and set the layer ids for it. Each routine
1561	// returns a boolean on whether it was successful at doing so.
1562	kmp_affin_mask_t *__kmp_affin_fullMask = NULL__null;
1563	// Original mask is a subset of full mask in multiple processor groups topology
1564	kmp_affin_mask_t *__kmp_affin_origMask = NULL__null;
1565
1566	#if KMP_USE_HWLOC0
1567	static inline bool __kmp_hwloc_is_cache_type(hwloc_obj_t obj) {
1568	#if HWLOC_API_VERSION >= 0x00020000
1569	return hwloc_obj_type_is_cache(obj->type);
1570	#else
1571	return obj->type == HWLOC_OBJ_CACHE;
1572	#endif
1573	}
1574
1575	// Returns KMP_HW_* type derived from HWLOC_* type
1576	static inline kmp_hw_t __kmp_hwloc_type_2_topology_type(hwloc_obj_t obj) {
1577
1578	if (__kmp_hwloc_is_cache_type(obj)) {
1579	if (obj->attr->cache.type == HWLOC_OBJ_CACHE_INSTRUCTION)
1580	return KMP_HW_UNKNOWN;
1581	switch (obj->attr->cache.depth) {
1582	case 1:
1583	return KMP_HW_L1;
1584	case 2:
1585	#if KMP_MIC_SUPPORTED((0 \|\| 1) && (1 \|\| 0))
1586	if (__kmp_mic_type == mic3) {
1587	return KMP_HW_TILE;
1588	}
1589	#endif
1590	return KMP_HW_L2;
1591	case 3:
1592	return KMP_HW_L3;
1593	}
1594	return KMP_HW_UNKNOWN;
1595	}
1596
1597	switch (obj->type) {
1598	case HWLOC_OBJ_PACKAGE:
1599	return KMP_HW_SOCKET;
1600	case HWLOC_OBJ_NUMANODE:
1601	return KMP_HW_NUMA;
1602	case HWLOC_OBJ_CORE:
1603	return KMP_HW_CORE;
1604	case HWLOC_OBJ_PU:
1605	return KMP_HW_THREAD;
1606	case HWLOC_OBJ_GROUP:
1607	if (obj->attr->group.kind == HWLOC_GROUP_KIND_INTEL_DIE)
1608	return KMP_HW_DIE;
1609	else if (obj->attr->group.kind == HWLOC_GROUP_KIND_INTEL_TILE)
1610	return KMP_HW_TILE;
1611	else if (obj->attr->group.kind == HWLOC_GROUP_KIND_INTEL_MODULE)
1612	return KMP_HW_MODULE;
1613	else if (obj->attr->group.kind == HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP)
1614	return KMP_HW_PROC_GROUP;
1615	return KMP_HW_UNKNOWN;
1616	#if HWLOC_API_VERSION >= 0x00020100
1617	case HWLOC_OBJ_DIE:
1618	return KMP_HW_DIE;
1619	#endif
1620	}
1621	return KMP_HW_UNKNOWN;
1622	}
1623
1624	// Returns the number of objects of type 'type' below 'obj' within the topology
1625	// tree structure. e.g., if obj is a HWLOC_OBJ_PACKAGE object, and type is
1626	// HWLOC_OBJ_PU, then this will return the number of PU's under the SOCKET
1627	// object.
1628	static int __kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj,
1629	hwloc_obj_type_t type) {
1630	int retval = 0;
1631	hwloc_obj_t first;
1632	for (first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type,
1633	obj->logical_index, type, 0);
1634	first != NULL__null && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology,
1635	obj->type, first) == obj;
1636	first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type,
1637	first)) {
1638	++retval;
1639	}
1640	return retval;
1641	}
1642
1643	// This gets the sub_id for a lower object under a higher object in the
1644	// topology tree
1645	static int __kmp_hwloc_get_sub_id(hwloc_topology_t t, hwloc_obj_t higher,
1646	hwloc_obj_t lower) {
1647	hwloc_obj_t obj;
1648	hwloc_obj_type_t ltype = lower->type;
1649	int lindex = lower->logical_index - 1;
1650	int sub_id = 0;
1651	// Get the previous lower object
1652	obj = hwloc_get_obj_by_type(t, ltype, lindex);
1653	while (obj && lindex >= 0 &&
1654	hwloc_bitmap_isincluded(obj->cpuset, higher->cpuset)) {
1655	if (obj->userdata) {
1656	sub_id = (int)(RCAST(kmp_intptr_t, obj->userdata)reinterpret_cast<kmp_intptr_t>(obj->userdata));
1657	break;
1658	}
1659	sub_id++;
1660	lindex--;
1661	obj = hwloc_get_obj_by_type(t, ltype, lindex);
1662	}
1663	// store sub_id + 1 so that 0 is differed from NULL
1664	lower->userdata = RCAST(void , sub_id + 1)reinterpret_cast<void >(sub_id + 1);
1665	return sub_id;
1666	}
1667
1668	static bool __kmp_affinity_create_hwloc_map(kmp_i18n_id_t *const msg_id) {
1669	kmp_hw_t type;
1670	int hw_thread_index, sub_id;
1671	int depth;
1672	hwloc_obj_t pu, obj, root, prev;
1673	kmp_hw_t types[KMP_HW_LAST];
1674	hwloc_obj_type_t hwloc_types[KMP_HW_LAST];
1675
1676	hwloc_topology_t tp = __kmp_hwloc_topology;
1677	*msg_id = kmp_i18n_null;
1678	if (__kmp_affinity.flags.verbose) {
1679	KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffUsingHwloc , "KMP_AFFINITY"), __kmp_msg_null);
1680	}
1681
1682	if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {
1683	// Hack to try and infer the machine topology using only the data
1684	// available from hwloc on the current thread, and __kmp_xproc.
1685	KMP_ASSERT(__kmp_affinity.type == affinity_none)if (!(__kmp_affinity.type == affinity_none)) { __kmp_debug_assert ("__kmp_affinity.type == affinity_none", "openmp/runtime/src/kmp_affinity.cpp" , 1685); };
1686	// hwloc only guarantees existance of PU object, so check PACKAGE and CORE
1687	hwloc_obj_t o = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0);
1688	if (o != NULL__null)
1689	nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(o, HWLOC_OBJ_CORE);
1690	else
1691	nCoresPerPkg = 1; // no PACKAGE found
1692	o = hwloc_get_obj_by_type(tp, HWLOC_OBJ_CORE, 0);
1693	if (o != NULL__null)
1694	__kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(o, HWLOC_OBJ_PU);
1695	else
1696	__kmp_nThreadsPerCore = 1; // no CORE found
1697	__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
1698	if (nCoresPerPkg == 0)
1699	nCoresPerPkg = 1; // to prevent possible division by 0
1700	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
1701	return true;
1702	}
1703
1704	// Handle multiple types of cores if they exist on the system
1705	int nr_cpu_kinds = hwloc_cpukinds_get_nr(tp, 0);
1706
1707	typedef struct kmp_hwloc_cpukinds_info_t {
1708	int efficiency;
1709	kmp_hw_core_type_t core_type;
1710	hwloc_bitmap_t mask;
1711	} kmp_hwloc_cpukinds_info_t;
1712	kmp_hwloc_cpukinds_info_t *cpukinds = nullptr;
1713
1714	if (nr_cpu_kinds > 0) {
1715	unsigned nr_infos;
1716	struct hwloc_info_s *infos;
1717	cpukinds = (kmp_hwloc_cpukinds_info_t )__kmp_allocate(___kmp_allocate((sizeof(kmp_hwloc_cpukinds_info_t) nr_cpu_kinds ), "openmp/runtime/src/kmp_affinity.cpp", 1718)
1718	sizeof(kmp_hwloc_cpukinds_info_t) * nr_cpu_kinds)___kmp_allocate((sizeof(kmp_hwloc_cpukinds_info_t) * nr_cpu_kinds ), "openmp/runtime/src/kmp_affinity.cpp", 1718);
1719	for (unsigned idx = 0; idx < (unsigned)nr_cpu_kinds; ++idx) {
1720	cpukinds[idx].efficiency = -1;
1721	cpukinds[idx].core_type = KMP_HW_CORE_TYPE_UNKNOWN;
1722	cpukinds[idx].mask = hwloc_bitmap_alloc();
1723	if (hwloc_cpukinds_get_info(tp, idx, cpukinds[idx].mask,
1724	&cpukinds[idx].efficiency, &nr_infos, &infos,
1725	0) == 0) {
1726	for (unsigned i = 0; i < nr_infos; ++i) {
1727	if (__kmp_str_match("CoreType", 8, infos[i].name)) {
1728	#if KMP_ARCH_X860 \|\| KMP_ARCH_X86_641
1729	if (__kmp_str_match("IntelAtom", 9, infos[i].value)) {
1730	cpukinds[idx].core_type = KMP_HW_CORE_TYPE_ATOM;
1731	break;
1732	} else if (__kmp_str_match("IntelCore", 9, infos[i].value)) {
1733	cpukinds[idx].core_type = KMP_HW_CORE_TYPE_CORE;
1734	break;
1735	}
1736	#endif
1737	}
1738	}
1739	}
1740	}
1741	}
1742
1743	root = hwloc_get_root_obj(tp);
1744
1745	// Figure out the depth and types in the topology
1746	depth = 0;
1747	pu = hwloc_get_pu_obj_by_os_index(tp, __kmp_affin_fullMask->begin());
1748	KMP_ASSERT(pu)if (!(pu)) { __kmp_debug_assert("pu", "openmp/runtime/src/kmp_affinity.cpp" , 1748); };
1749	obj = pu;
1750	types[depth] = KMP_HW_THREAD;
1751	hwloc_types[depth] = obj->type;
1752	depth++;
1753	while (obj != root && obj != NULL__null) {
1754	obj = obj->parent;
1755	#if HWLOC_API_VERSION >= 0x00020000
1756	if (obj->memory_arity) {
1757	hwloc_obj_t memory;
1758	for (memory = obj->memory_first_child; memory;
1759	memory = hwloc_get_next_child(tp, obj, memory)) {
1760	if (memory->type == HWLOC_OBJ_NUMANODE)
1761	break;
1762	}
1763	if (memory && memory->type == HWLOC_OBJ_NUMANODE) {
1764	types[depth] = KMP_HW_NUMA;
1765	hwloc_types[depth] = memory->type;
1766	depth++;
1767	}
1768	}
1769	#endif
1770	type = __kmp_hwloc_type_2_topology_type(obj);
1771	if (type != KMP_HW_UNKNOWN) {
1772	types[depth] = type;
1773	hwloc_types[depth] = obj->type;
1774	depth++;
1775	}
1776	}
1777	KMP_ASSERT(depth > 0)if (!(depth > 0)) { __kmp_debug_assert("depth > 0", "openmp/runtime/src/kmp_affinity.cpp" , 1777); };
1778
1779	// Get the order for the types correct
1780	for (int i = 0, j = depth - 1; i < j; ++i, --j) {
1781	hwloc_obj_type_t hwloc_temp = hwloc_types[i];
1782	kmp_hw_t temp = types[i];
1783	types[i] = types[j];
1784	types[j] = temp;
1785	hwloc_types[i] = hwloc_types[j];
1786	hwloc_types[j] = hwloc_temp;
1787	}
1788
1789	// Allocate the data structure to be returned.
1790	__kmp_topology = kmp_topology_t::allocate(__kmp_avail_proc, depth, types);
1791
1792	hw_thread_index = 0;
1793	pu = NULL__null;
1794	while ((pu = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, pu))) {
1795	int index = depth - 1;
1796	bool included = KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(pu->os_index);
1797	kmp_hw_thread_t &hw_thread = __kmp_topology->at(hw_thread_index);
1798	if (included) {
1799	hw_thread.clear();
1800	hw_thread.ids[index] = pu->logical_index;
1801	hw_thread.os_id = pu->os_index;
1802	// If multiple core types, then set that attribute for the hardware thread
1803	if (cpukinds) {
1804	int cpukind_index = -1;
1805	for (int i = 0; i < nr_cpu_kinds; ++i) {
1806	if (hwloc_bitmap_isset(cpukinds[i].mask, hw_thread.os_id)) {
1807	cpukind_index = i;
1808	break;
1809	}
1810	}
1811	if (cpukind_index >= 0) {
1812	hw_thread.attrs.set_core_type(cpukinds[cpukind_index].core_type);
1813	hw_thread.attrs.set_core_eff(cpukinds[cpukind_index].efficiency);
1814	}
1815	}
1816	index--;
1817	}
1818	obj = pu;
1819	prev = obj;
1820	while (obj != root && obj != NULL__null) {
1821	obj = obj->parent;
1822	#if HWLOC_API_VERSION >= 0x00020000
1823	// NUMA Nodes are handled differently since they are not within the
1824	// parent/child structure anymore. They are separate children
1825	// of obj (memory_first_child points to first memory child)
1826	if (obj->memory_arity) {
1827	hwloc_obj_t memory;
1828	for (memory = obj->memory_first_child; memory;
1829	memory = hwloc_get_next_child(tp, obj, memory)) {
1830	if (memory->type == HWLOC_OBJ_NUMANODE)
1831	break;
1832	}
1833	if (memory && memory->type == HWLOC_OBJ_NUMANODE) {
1834	sub_id = __kmp_hwloc_get_sub_id(tp, memory, prev);
1835	if (included) {
1836	hw_thread.ids[index] = memory->logical_index;
1837	hw_thread.ids[index + 1] = sub_id;
1838	index--;
1839	}
1840	prev = memory;
1841	}
1842	prev = obj;
1843	}
1844	#endif
1845	type = __kmp_hwloc_type_2_topology_type(obj);
1846	if (type != KMP_HW_UNKNOWN) {
1847	sub_id = __kmp_hwloc_get_sub_id(tp, obj, prev);
1848	if (included) {
1849	hw_thread.ids[index] = obj->logical_index;
1850	hw_thread.ids[index + 1] = sub_id;
1851	index--;
1852	}
1853	prev = obj;
1854	}
1855	}
1856	if (included)
1857	hw_thread_index++;
1858	}
1859
1860	// Free the core types information
1861	if (cpukinds) {
1862	for (int idx = 0; idx < nr_cpu_kinds; ++idx)
1863	hwloc_bitmap_free(cpukinds[idx].mask);
1864	__kmp_free(cpukinds)___kmp_free((cpukinds), "openmp/runtime/src/kmp_affinity.cpp" , 1864);
1865	}
1866	__kmp_topology->sort_ids();
1867	return true;
1868	}
1869	#endif // KMP_USE_HWLOC
1870
1871	// If we don't know how to retrieve the machine's processor topology, or
1872	// encounter an error in doing so, this routine is called to form a "flat"
1873	// mapping of os thread id's <-> processor id's.
1874	static bool __kmp_affinity_create_flat_map(kmp_i18n_id_t *const msg_id) {
1875	*msg_id = kmp_i18n_null;
1876	int depth = 3;
1877	kmp_hw_t types[] = {KMP_HW_SOCKET, KMP_HW_CORE, KMP_HW_THREAD};
1878
1879	if (__kmp_affinity.flags.verbose) {
1880	KMP_INFORM(UsingFlatOS, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_UsingFlatOS , "KMP_AFFINITY"), __kmp_msg_null);
1881	}
1882
1883	// Even if __kmp_affinity.type == affinity_none, this routine might still
1884	// be called to set __kmp_ncores, as well as
1885	// __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
1886	if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {
1887	KMP_ASSERT(__kmp_affinity.type == affinity_none)if (!(__kmp_affinity.type == affinity_none)) { __kmp_debug_assert ("__kmp_affinity.type == affinity_none", "openmp/runtime/src/kmp_affinity.cpp" , 1887); };
1888	__kmp_ncores = nPackages = __kmp_xproc;
1889	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
1890	return true;
1891	}
1892
1893	// When affinity is off, this routine will still be called to set
1894	// __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
1895	// Make sure all these vars are set correctly, and return now if affinity is
1896	// not enabled.
1897	__kmp_ncores = nPackages = __kmp_avail_proc;
1898	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
1899
1900	// Construct the data structure to be returned.
1901	__kmp_topology = kmp_topology_t::allocate(__kmp_avail_proc, depth, types);
1902	int avail_ct = 0;
1903	int i;
1904	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask)for (i = (__kmp_affin_fullMask)->begin(); (int)i != (__kmp_affin_fullMask )->end(); i = (__kmp_affin_fullMask)->next(i)) {
1905	// Skip this proc if it is not included in the machine model.
1906	if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(i)) {
1907	continue;
1908	}
1909	kmp_hw_thread_t &hw_thread = __kmp_topology->at(avail_ct);
1910	hw_thread.clear();
1911	hw_thread.os_id = i;
1912	hw_thread.ids[0] = i;
1913	hw_thread.ids[1] = 0;
1914	hw_thread.ids[2] = 0;
1915	avail_ct++;
1916	}
1917	if (__kmp_affinity.flags.verbose) {
1918	KMP_INFORM(OSProcToPackage, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_OSProcToPackage , "KMP_AFFINITY"), __kmp_msg_null);
1919	}
1920	return true;
1921	}
1922
1923	#if KMP_GROUP_AFFINITY0
1924	// If multiple Windows* OS processor groups exist, we can create a 2-level
1925	// topology map with the groups at level 0 and the individual procs at level 1.
1926	// This facilitates letting the threads float among all procs in a group,
1927	// if granularity=group (the default when there are multiple groups).
1928	static bool __kmp_affinity_create_proc_group_map(kmp_i18n_id_t *const msg_id) {
1929	*msg_id = kmp_i18n_null;
1930	int depth = 3;
1931	kmp_hw_t types[] = {KMP_HW_PROC_GROUP, KMP_HW_CORE, KMP_HW_THREAD};
1932	const static size_t BITS_PER_GROUP = CHAR_BIT8 * sizeof(DWORD_PTR);
1933
1934	if (__kmp_affinity.flags.verbose) {
1935	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffWindowsProcGroupMap , "KMP_AFFINITY"), __kmp_msg_null);
1936	}
1937
1938	// If we aren't affinity capable, then use flat topology
1939	if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {
1940	KMP_ASSERT(__kmp_affinity.type == affinity_none)if (!(__kmp_affinity.type == affinity_none)) { __kmp_debug_assert ("__kmp_affinity.type == affinity_none", "openmp/runtime/src/kmp_affinity.cpp" , 1940); };
1941	nPackages = __kmp_num_proc_groups;
1942	__kmp_nThreadsPerCore = 1;
1943	__kmp_ncores = __kmp_xproc;
1944	nCoresPerPkg = nPackages / __kmp_ncores;
1945	return true;
1946	}
1947
1948	// Construct the data structure to be returned.
1949	__kmp_topology = kmp_topology_t::allocate(__kmp_avail_proc, depth, types);
1950	int avail_ct = 0;
1951	int i;
1952	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask)for (i = (__kmp_affin_fullMask)->begin(); (int)i != (__kmp_affin_fullMask )->end(); i = (__kmp_affin_fullMask)->next(i)) {
1953	// Skip this proc if it is not included in the machine model.
1954	if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(i)) {
1955	continue;
1956	}
1957	kmp_hw_thread_t &hw_thread = __kmp_topology->at(avail_ct++);
1958	hw_thread.clear();
1959	hw_thread.os_id = i;
1960	hw_thread.ids[0] = i / BITS_PER_GROUP;
1961	hw_thread.ids[1] = hw_thread.ids[2] = i % BITS_PER_GROUP;
1962	}
1963	return true;
1964	}
1965	#endif /* KMP_GROUP_AFFINITY */
1966
1967	#if KMP_ARCH_X860 \|\| KMP_ARCH_X86_641
1968
1969	template <kmp_uint32 LSB, kmp_uint32 MSB>
1970	static inline unsigned __kmp_extract_bits(kmp_uint32 v) {
1971	const kmp_uint32 SHIFT_LEFT = sizeof(kmp_uint32) * 8 - 1 - MSB;
1972	const kmp_uint32 SHIFT_RIGHT = LSB;
1973	kmp_uint32 retval = v;
1974	retval <<= SHIFT_LEFT;
1975	retval >>= (SHIFT_LEFT + SHIFT_RIGHT);
1976	return retval;
1977	}
1978
1979	static int __kmp_cpuid_mask_width(int count) {
1980	int r = 0;
1981
1982	while ((1 << r) < count)
1983	++r;
1984	return r;
1985	}
1986
1987	class apicThreadInfo {
1988	public:
1989	unsigned osId; // param to __kmp_affinity_bind_thread
1990	unsigned apicId; // from cpuid after binding
1991	unsigned maxCoresPerPkg; // ""
1992	unsigned maxThreadsPerPkg; // ""
1993	unsigned pkgId; // inferred from above values
1994	unsigned coreId; // ""
1995	unsigned threadId; // ""
1996	};
1997
1998	static int __kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a,
1999	const void *b) {
2000	const apicThreadInfo aa = (const apicThreadInfo )a;
2001	const apicThreadInfo bb = (const apicThreadInfo )b;
2002	if (aa->pkgId < bb->pkgId)
2003	return -1;
2004	if (aa->pkgId > bb->pkgId)
2005	return 1;
2006	if (aa->coreId < bb->coreId)
2007	return -1;
2008	if (aa->coreId > bb->coreId)
2009	return 1;
2010	if (aa->threadId < bb->threadId)
2011	return -1;
2012	if (aa->threadId > bb->threadId)
2013	return 1;
2014	return 0;
2015	}
2016
2017	class kmp_cache_info_t {
2018	public:
2019	struct info_t {
2020	unsigned level, mask;
2021	};
2022	kmp_cache_info_t() : depth(0) { get_leaf4_levels(); }
2023	size_t get_depth() const { return depth; }
2024	info_t &operator[](size_t index) { return table[index]; }
2025	const info_t &operator[](size_t index) const { return table[index]; }
2026
2027	static kmp_hw_t get_topology_type(unsigned level) {
2028	KMP_DEBUG_ASSERT(level >= 1 && level <= MAX_CACHE_LEVEL)if (!(level >= 1 && level <= MAX_CACHE_LEVEL)) { __kmp_debug_assert("level >= 1 && level <= MAX_CACHE_LEVEL" , "openmp/runtime/src/kmp_affinity.cpp", 2028); };
2029	switch (level) {
2030	case 1:
2031	return KMP_HW_L1;
2032	case 2:
2033	return KMP_HW_L2;
2034	case 3:
2035	return KMP_HW_L3;
2036	}
2037	return KMP_HW_UNKNOWN;
2038	}
2039
2040	private:
2041	static const int MAX_CACHE_LEVEL = 3;
2042
2043	size_t depth;
2044	info_t table[MAX_CACHE_LEVEL];
2045
2046	void get_leaf4_levels() {
2047	unsigned level = 0;
2048	while (depth < MAX_CACHE_LEVEL) {
2049	unsigned cache_type, max_threads_sharing;
2050	unsigned cache_level, cache_mask_width;
2051	kmp_cpuid buf2;
2052	__kmp_x86_cpuid(4, level, &buf2);
2053	cache_type = __kmp_extract_bits<0, 4>(buf2.eax);
2054	if (!cache_type)
2055	break;
2056	// Skip instruction caches
2057	if (cache_type == 2) {
2058	level++;
2059	continue;
2060	}
2061	max_threads_sharing = __kmp_extract_bits<14, 25>(buf2.eax) + 1;
2062	cache_mask_width = __kmp_cpuid_mask_width(max_threads_sharing);
2063	cache_level = __kmp_extract_bits<5, 7>(buf2.eax);
2064	table[depth].level = cache_level;
2065	table[depth].mask = ((-1) << cache_mask_width);
2066	depth++;
2067	level++;
2068	}
2069	}
2070	};
2071
2072	// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
2073	// an algorithm which cycles through the available os threads, setting
2074	// the current thread's affinity mask to that thread, and then retrieves
2075	// the Apic Id for each thread context using the cpuid instruction.
2076	static bool __kmp_affinity_create_apicid_map(kmp_i18n_id_t *const msg_id) {
2077	kmp_cpuid buf;
2078	*msg_id = kmp_i18n_null;
2079
2080	if (__kmp_affinity.flags.verbose) {
2081	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC))__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffInfoStr , "KMP_AFFINITY", __kmp_i18n_catgets(kmp_i18n_str_DecodingLegacyAPIC )), __kmp_msg_null);
2082	}
2083
2084	// Check if cpuid leaf 4 is supported.
2085	__kmp_x86_cpuid(0, 0, &buf);
2086	if (buf.eax < 4) {
2087	*msg_id = kmp_i18n_str_NoLeaf4Support;
2088	return false;
2089	}
2090
2091	// The algorithm used starts by setting the affinity to each available thread
2092	// and retrieving info from the cpuid instruction, so if we are not capable of
2093	// calling __kmp_get_system_affinity() and _kmp_get_system_affinity(), then we
2094	// need to do something else - use the defaults that we calculated from
2095	// issuing cpuid without binding to each proc.
2096	if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {
2097	// Hack to try and infer the machine topology using only the data
2098	// available from cpuid on the current thread, and __kmp_xproc.
2099	KMP_ASSERT(__kmp_affinity.type == affinity_none)if (!(__kmp_affinity.type == affinity_none)) { __kmp_debug_assert ("__kmp_affinity.type == affinity_none", "openmp/runtime/src/kmp_affinity.cpp" , 2099); };
2100
2101	// Get an upper bound on the number of threads per package using cpuid(1).
2102	// On some OS/chps combinations where HT is supported by the chip but is
2103	// disabled, this value will be 2 on a single core chip. Usually, it will be
2104	// 2 if HT is enabled and 1 if HT is disabled.
2105	__kmp_x86_cpuid(1, 0, &buf);
2106	int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
2107	if (maxThreadsPerPkg == 0) {
2108	maxThreadsPerPkg = 1;
2109	}
2110
2111	// The num cores per pkg comes from cpuid(4). 1 must be added to the encoded
2112	// value.
2113	//
2114	// The author of cpu_count.cpp treated this only an upper bound on the
2115	// number of cores, but I haven't seen any cases where it was greater than
2116	// the actual number of cores, so we will treat it as exact in this block of
2117	// code.
2118	//
2119	// First, we need to check if cpuid(4) is supported on this chip. To see if
2120	// cpuid(n) is supported, issue cpuid(0) and check if eax has the value n or
2121	// greater.
2122	__kmp_x86_cpuid(0, 0, &buf);
2123	if (buf.eax >= 4) {
2124	__kmp_x86_cpuid(4, 0, &buf);
2125	nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
2126	} else {
2127	nCoresPerPkg = 1;
2128	}
2129
2130	// There is no way to reliably tell if HT is enabled without issuing the
2131	// cpuid instruction from every thread, can correlating the cpuid info, so
2132	// if the machine is not affinity capable, we assume that HT is off. We have
2133	// seen quite a few machines where maxThreadsPerPkg is 2, yet the machine
2134	// does not support HT.
2135	//
2136	// - Older OSes are usually found on machines with older chips, which do not
2137	// support HT.
2138	// - The performance penalty for mistakenly identifying a machine as HT when
2139	// it isn't (which results in blocktime being incorrectly set to 0) is
2140	// greater than the penalty when for mistakenly identifying a machine as
2141	// being 1 thread/core when it is really HT enabled (which results in
2142	// blocktime being incorrectly set to a positive value).
2143	__kmp_ncores = __kmp_xproc;
2144	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
2145	__kmp_nThreadsPerCore = 1;
2146	return true;
2147	}
2148
2149	// From here on, we can assume that it is safe to call
2150	// __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if
2151	// __kmp_affinity.type = affinity_none.
2152
2153	// Save the affinity mask for the current thread.
2154	kmp_affinity_raii_t previous_affinity;
2155
2156	// Run through each of the available contexts, binding the current thread
2157	// to it, and obtaining the pertinent information using the cpuid instr.
2158	//
2159	// The relevant information is:
2160	// - Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
2161	// has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
2162	// - Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The value
2163	// of this field determines the width of the core# + thread# fields in the
2164	// Apic Id. It is also an upper bound on the number of threads per
2165	// package, but it has been verified that situations happen were it is not
2166	// exact. In particular, on certain OS/chip combinations where Intel(R)
2167	// Hyper-Threading Technology is supported by the chip but has been
2168	// disabled, the value of this field will be 2 (for a single core chip).
2169	// On other OS/chip combinations supporting Intel(R) Hyper-Threading
2170	// Technology, the value of this field will be 1 when Intel(R)
2171	// Hyper-Threading Technology is disabled and 2 when it is enabled.
2172	// - Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The value
2173	// of this field (+1) determines the width of the core# field in the Apic
2174	// Id. The comments in "cpucount.cpp" say that this value is an upper
2175	// bound, but the IA-32 architecture manual says that it is exactly the
2176	// number of cores per package, and I haven't seen any case where it
2177	// wasn't.
2178	//
2179	// From this information, deduce the package Id, core Id, and thread Id,
2180	// and set the corresponding fields in the apicThreadInfo struct.
2181	unsigned i;
2182	apicThreadInfo threadInfo = (apicThreadInfo )__kmp_allocate(___kmp_allocate((__kmp_avail_proc * sizeof(apicThreadInfo)), "openmp/runtime/src/kmp_affinity.cpp" , 2183)
2183	__kmp_avail_proc * sizeof(apicThreadInfo))___kmp_allocate((__kmp_avail_proc * sizeof(apicThreadInfo)), "openmp/runtime/src/kmp_affinity.cpp" , 2183);
2184	unsigned nApics = 0;
2185	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask)for (i = (__kmp_affin_fullMask)->begin(); (int)i != (__kmp_affin_fullMask )->end(); i = (__kmp_affin_fullMask)->next(i)) {
2186	// Skip this proc if it is not included in the machine model.
2187	if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(i)) {
2188	continue;
2189	}
2190	KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc)if (!((int)nApics < __kmp_avail_proc)) { __kmp_debug_assert ("(int)nApics < __kmp_avail_proc", "openmp/runtime/src/kmp_affinity.cpp" , 2190); };
2191
2192	__kmp_affinity_dispatch->bind_thread(i);
2193	threadInfo[nApics].osId = i;
2194
2195	// The apic id and max threads per pkg come from cpuid(1).
2196	__kmp_x86_cpuid(1, 0, &buf);
2197	if (((buf.edx >> 9) & 1) == 0) {
2198	__kmp_free(threadInfo)___kmp_free((threadInfo), "openmp/runtime/src/kmp_affinity.cpp" , 2198);
2199	*msg_id = kmp_i18n_str_ApicNotPresent;
2200	return false;
2201	}
2202	threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
2203	threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
2204	if (threadInfo[nApics].maxThreadsPerPkg == 0) {
2205	threadInfo[nApics].maxThreadsPerPkg = 1;
2206	}
2207
2208	// Max cores per pkg comes from cpuid(4). 1 must be added to the encoded
2209	// value.
2210	//
2211	// First, we need to check if cpuid(4) is supported on this chip. To see if
2212	// cpuid(n) is supported, issue cpuid(0) and check if eax has the value n
2213	// or greater.
2214	__kmp_x86_cpuid(0, 0, &buf);
2215	if (buf.eax >= 4) {
2216	__kmp_x86_cpuid(4, 0, &buf);
2217	threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
2218	} else {
2219	threadInfo[nApics].maxCoresPerPkg = 1;
2220	}
2221
2222	// Infer the pkgId / coreId / threadId using only the info obtained locally.
2223	int widthCT = __kmp_cpuid_mask_width(threadInfo[nApics].maxThreadsPerPkg);
2224	threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
2225
2226	int widthC = __kmp_cpuid_mask_width(threadInfo[nApics].maxCoresPerPkg);
2227	int widthT = widthCT - widthC;
2228	if (widthT < 0) {
2229	// I've never seen this one happen, but I suppose it could, if the cpuid
2230	// instruction on a chip was really screwed up. Make sure to restore the
2231	// affinity mask before the tail call.
2232	__kmp_free(threadInfo)___kmp_free((threadInfo), "openmp/runtime/src/kmp_affinity.cpp" , 2232);
2233	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
2234	return false;
2235	}
2236
2237	int maskC = (1 << widthC) - 1;
2238	threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT) & maskC;
2239
2240	int maskT = (1 << widthT) - 1;
2241	threadInfo[nApics].threadId = threadInfo[nApics].apicId & maskT;
2242
2243	nApics++;
2244	}
2245
2246	// We've collected all the info we need.
2247	// Restore the old affinity mask for this thread.
2248	previous_affinity.restore();
2249
2250	// Sort the threadInfo table by physical Id.
2251	qsort(threadInfo, nApics, sizeof(*threadInfo),
2252	__kmp_affinity_cmp_apicThreadInfo_phys_id);
2253
2254	// The table is now sorted by pkgId / coreId / threadId, but we really don't
2255	// know the radix of any of the fields. pkgId's may be sparsely assigned among
2256	// the chips on a system. Although coreId's are usually assigned
2257	// [0 .. coresPerPkg-1] and threadId's are usually assigned
2258	// [0..threadsPerCore-1], we don't want to make any such assumptions.
2259	//
2260	// For that matter, we don't know what coresPerPkg and threadsPerCore (or the
2261	// total # packages) are at this point - we want to determine that now. We
2262	// only have an upper bound on the first two figures.
2263	//
2264	// We also perform a consistency check at this point: the values returned by
2265	// the cpuid instruction for any thread bound to a given package had better
2266	// return the same info for maxThreadsPerPkg and maxCoresPerPkg.
2267	nPackages = 1;
2268	nCoresPerPkg = 1;
2269	__kmp_nThreadsPerCore = 1;
2270	unsigned nCores = 1;
2271
2272	unsigned pkgCt = 1; // to determine radii
2273	unsigned lastPkgId = threadInfo[0].pkgId;
2274	unsigned coreCt = 1;
2275	unsigned lastCoreId = threadInfo[0].coreId;
2276	unsigned threadCt = 1;
2277	unsigned lastThreadId = threadInfo[0].threadId;
2278
2279	// intra-pkg consist checks
2280	unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
2281	unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
2282
2283	for (i = 1; i < nApics; i++) {
2284	if (threadInfo[i].pkgId != lastPkgId) {
2285	nCores++;
2286	pkgCt++;
2287	lastPkgId = threadInfo[i].pkgId;
2288	if ((int)coreCt > nCoresPerPkg)
2289	nCoresPerPkg = coreCt;
2290	coreCt = 1;
2291	lastCoreId = threadInfo[i].coreId;
2292	if ((int)threadCt > __kmp_nThreadsPerCore)
2293	__kmp_nThreadsPerCore = threadCt;
2294	threadCt = 1;
2295	lastThreadId = threadInfo[i].threadId;
2296
2297	// This is a different package, so go on to the next iteration without
2298	// doing any consistency checks. Reset the consistency check vars, though.
2299	prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
2300	prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
2301	continue;
2302	}
2303
2304	if (threadInfo[i].coreId != lastCoreId) {
2305	nCores++;
2306	coreCt++;
2307	lastCoreId = threadInfo[i].coreId;
2308	if ((int)threadCt > __kmp_nThreadsPerCore)
2309	__kmp_nThreadsPerCore = threadCt;
2310	threadCt = 1;
2311	lastThreadId = threadInfo[i].threadId;
2312	} else if (threadInfo[i].threadId != lastThreadId) {
2313	threadCt++;
2314	lastThreadId = threadInfo[i].threadId;
2315	} else {
2316	__kmp_free(threadInfo)___kmp_free((threadInfo), "openmp/runtime/src/kmp_affinity.cpp" , 2316);
2317	*msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
2318	return false;
2319	}
2320
2321	// Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
2322	// fields agree between all the threads bounds to a given package.
2323	if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg) \|\|
2324	(prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
2325	__kmp_free(threadInfo)___kmp_free((threadInfo), "openmp/runtime/src/kmp_affinity.cpp" , 2325);
2326	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
2327	return false;
2328	}
2329	}
2330	// When affinity is off, this routine will still be called to set
2331	// __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
2332	// Make sure all these vars are set correctly
2333	nPackages = pkgCt;
2334	if ((int)coreCt > nCoresPerPkg)
2335	nCoresPerPkg = coreCt;
2336	if ((int)threadCt > __kmp_nThreadsPerCore)
2337	__kmp_nThreadsPerCore = threadCt;
2338	__kmp_ncores = nCores;
2339	KMP_DEBUG_ASSERT(nApics == (unsigned)__kmp_avail_proc)if (!(nApics == (unsigned)__kmp_avail_proc)) { __kmp_debug_assert ("nApics == (unsigned)__kmp_avail_proc", "openmp/runtime/src/kmp_affinity.cpp" , 2339); };
2340
2341	// Now that we've determined the number of packages, the number of cores per
2342	// package, and the number of threads per core, we can construct the data
2343	// structure that is to be returned.
2344	int idx = 0;
2345	int pkgLevel = 0;
2346	int coreLevel = 1;
2347	int threadLevel = 2;
2348	//(__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
2349	int depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
2350	kmp_hw_t types[3];
2351	if (pkgLevel >= 0)
2352	types[idx++] = KMP_HW_SOCKET;
2353	if (coreLevel >= 0)
2354	types[idx++] = KMP_HW_CORE;
2355	if (threadLevel >= 0)
2356	types[idx++] = KMP_HW_THREAD;
2357
2358	KMP_ASSERT(depth > 0)if (!(depth > 0)) { __kmp_debug_assert("depth > 0", "openmp/runtime/src/kmp_affinity.cpp" , 2358); };
2359	__kmp_topology = kmp_topology_t::allocate(nApics, depth, types);
2360
2361	for (i = 0; i < nApics; ++i) {
2362	idx = 0;
2363	unsigned os = threadInfo[i].osId;
2364	kmp_hw_thread_t &hw_thread = __kmp_topology->at(i);
2365	hw_thread.clear();
2366
2367	if (pkgLevel >= 0) {
2368	hw_thread.ids[idx++] = threadInfo[i].pkgId;
2369	}
2370	if (coreLevel >= 0) {
2371	hw_thread.ids[idx++] = threadInfo[i].coreId;
2372	}
2373	if (threadLevel >= 0) {
2374	hw_thread.ids[idx++] = threadInfo[i].threadId;
2375	}
2376	hw_thread.os_id = os;
2377	}
2378
2379	__kmp_free(threadInfo)___kmp_free((threadInfo), "openmp/runtime/src/kmp_affinity.cpp" , 2379);
2380	__kmp_topology->sort_ids();
2381	if (!__kmp_topology->check_ids()) {
2382	kmp_topology_t::deallocate(__kmp_topology);
2383	__kmp_topology = nullptr;
2384	*msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
2385	return false;
2386	}
2387	return true;
2388	}
2389
2390	// Hybrid cpu detection using CPUID.1A
2391	// Thread should be pinned to processor already
2392	static void __kmp_get_hybrid_info(kmp_hw_core_type_t type, int efficiency,
2393	unsigned *native_model_id) {
2394	kmp_cpuid buf;
2395	__kmp_x86_cpuid(0x1a, 0, &buf);
2396	*type = (kmp_hw_core_type_t)__kmp_extract_bits<24, 31>(buf.eax);
2397	switch (*type) {
2398	case KMP_HW_CORE_TYPE_ATOM:
2399	*efficiency = 0;
2400	break;
2401	case KMP_HW_CORE_TYPE_CORE:
2402	*efficiency = 1;
2403	break;
2404	default:
2405	*efficiency = 0;
2406	}
2407	*native_model_id = __kmp_extract_bits<0, 23>(buf.eax);
2408	}
2409
2410	// Intel(R) microarchitecture code name Nehalem, Dunnington and later
2411	// architectures support a newer interface for specifying the x2APIC Ids,
2412	// based on CPUID.B or CPUID.1F
2413	/*
2414	* CPUID.B or 1F, Input ECX (sub leaf # aka level number)
2415	Bits Bits Bits Bits
2416	31-16 15-8 7-4 4-0
2417	---+-----------+--------------+-------------+-----------------+
2418	EAX\| reserved \| reserved \| reserved \| Bits to Shift \|
2419	---+-----------\|--------------+-------------+-----------------\|
2420	EBX\| reserved \| Num logical processors at level (16 bits) \|
2421	---+-----------\|--------------+-------------------------------\|
2422	ECX\| reserved \| Level Type \| Level Number (8 bits) \|
2423	---+-----------+--------------+-------------------------------\|
2424	EDX\| X2APIC ID (32 bits) \|
2425	---+----------------------------------------------------------+
2426	*/
2427
2428	enum {
2429	INTEL_LEVEL_TYPE_INVALID = 0, // Package level
2430	INTEL_LEVEL_TYPE_SMT = 1,
2431	INTEL_LEVEL_TYPE_CORE = 2,
2432	INTEL_LEVEL_TYPE_TILE = 3,
2433	INTEL_LEVEL_TYPE_MODULE = 4,
2434	INTEL_LEVEL_TYPE_DIE = 5,
2435	INTEL_LEVEL_TYPE_LAST = 6,
2436	};
2437
2438	struct cpuid_level_info_t {
2439	unsigned level_type, mask, mask_width, nitems, cache_mask;
2440	};
2441
2442	static kmp_hw_t __kmp_intel_type_2_topology_type(int intel_type) {
2443	switch (intel_type) {
2444	case INTEL_LEVEL_TYPE_INVALID:
2445	return KMP_HW_SOCKET;
2446	case INTEL_LEVEL_TYPE_SMT:
2447	return KMP_HW_THREAD;
2448	case INTEL_LEVEL_TYPE_CORE:
2449	return KMP_HW_CORE;
2450	case INTEL_LEVEL_TYPE_TILE:
2451	return KMP_HW_TILE;
2452	case INTEL_LEVEL_TYPE_MODULE:
2453	return KMP_HW_MODULE;
2454	case INTEL_LEVEL_TYPE_DIE:
2455	return KMP_HW_DIE;
2456	}
2457	return KMP_HW_UNKNOWN;
2458	}
2459
2460	// This function takes the topology leaf, a levels array to store the levels
2461	// detected and a bitmap of the known levels.
2462	// Returns the number of levels in the topology
2463	static unsigned
2464	__kmp_x2apicid_get_levels(int leaf,
2465	cpuid_level_info_t levels[INTEL_LEVEL_TYPE_LAST],
2466	kmp_uint64 known_levels) {
2467	unsigned level, levels_index;
2468	unsigned level_type, mask_width, nitems;
2469	kmp_cpuid buf;
2470
2471	// New algorithm has known topology layers act as highest unknown topology
2472	// layers when unknown topology layers exist.
2473	// e.g., Suppose layers were SMT <X> CORE <Y> <Z> PACKAGE, where <X> <Y> <Z>
2474	// are unknown topology layers, Then SMT will take the characteristics of
2475	// (SMT x <X>) and CORE will take the characteristics of (CORE x <Y> x <Z>).
2476	// This eliminates unknown portions of the topology while still keeping the
2477	// correct structure.
2478	level = levels_index = 0;
2479	do {
2480	__kmp_x86_cpuid(leaf, level, &buf);
2481	level_type = __kmp_extract_bits<8, 15>(buf.ecx);
2482	mask_width = __kmp_extract_bits<0, 4>(buf.eax);
2483	nitems = __kmp_extract_bits<0, 15>(buf.ebx);
2484	if (level_type != INTEL_LEVEL_TYPE_INVALID && nitems == 0)
2485	return 0;
2486
2487	if (known_levels & (1ull << level_type)) {
2488	// Add a new level to the topology
2489	KMP_ASSERT(levels_index < INTEL_LEVEL_TYPE_LAST)if (!(levels_index < INTEL_LEVEL_TYPE_LAST)) { __kmp_debug_assert ("levels_index < INTEL_LEVEL_TYPE_LAST", "openmp/runtime/src/kmp_affinity.cpp" , 2489); };
2490	levels[levels_index].level_type = level_type;
2491	levels[levels_index].mask_width = mask_width;
2492	levels[levels_index].nitems = nitems;
2493	levels_index++;
2494	} else {
2495	// If it is an unknown level, then logically move the previous layer up
2496	if (levels_index > 0) {
2497	levels[levels_index - 1].mask_width = mask_width;
2498	levels[levels_index - 1].nitems = nitems;
2499	}
2500	}
2501	level++;
2502	} while (level_type != INTEL_LEVEL_TYPE_INVALID);
2503
2504	// Set the masks to & with apicid
2505	for (unsigned i = 0; i < levels_index; ++i) {
2506	if (levels[i].level_type != INTEL_LEVEL_TYPE_INVALID) {
2507	levels[i].mask = ~((-1) << levels[i].mask_width);
2508	levels[i].cache_mask = (-1) << levels[i].mask_width;
2509	for (unsigned j = 0; j < i; ++j)
2510	levels[i].mask ^= levels[j].mask;
2511	} else {
2512	KMP_DEBUG_ASSERT(levels_index > 0)if (!(levels_index > 0)) { __kmp_debug_assert("levels_index > 0" , "openmp/runtime/src/kmp_affinity.cpp", 2512); };
2513	levels[i].mask = (-1) << levels[i - 1].mask_width;
2514	levels[i].cache_mask = 0;
2515	}
2516	}
2517	return levels_index;
2518	}
2519
2520	static bool __kmp_affinity_create_x2apicid_map(kmp_i18n_id_t *const msg_id) {
2521
2522	cpuid_level_info_t levels[INTEL_LEVEL_TYPE_LAST];
2523	kmp_hw_t types[INTEL_LEVEL_TYPE_LAST];
2524	unsigned levels_index;
2525	kmp_cpuid buf;
2526	kmp_uint64 known_levels;
2527	int topology_leaf, highest_leaf, apic_id;
2528	int num_leaves;
2529	static int leaves[] = {0, 0};
2530
2531	kmp_i18n_id_t leaf_message_id;
2532
2533	KMP_BUILD_ASSERT(sizeof(known_levels) * CHAR_BIT > KMP_HW_LAST)static_assert(sizeof(known_levels) * 8 > KMP_HW_LAST, "Build condition error" );
2534
2535	*msg_id = kmp_i18n_null;
2536	if (__kmp_affinity.flags.verbose) {
2537	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC))__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffInfoStr , "KMP_AFFINITY", __kmp_i18n_catgets(kmp_i18n_str_Decodingx2APIC )), __kmp_msg_null);
2538	}
2539
2540	// Figure out the known topology levels
2541	known_levels = 0ull;
2542	for (int i = 0; i < INTEL_LEVEL_TYPE_LAST; ++i) {
2543	if (__kmp_intel_type_2_topology_type(i) != KMP_HW_UNKNOWN) {
2544	known_levels \|= (1ull << i);
2545	}
2546	}
2547
2548	// Get the highest cpuid leaf supported
2549	__kmp_x86_cpuid(0, 0, &buf);
2550	highest_leaf = buf.eax;
2551
2552	// If a specific topology method was requested, only allow that specific leaf
2553	// otherwise, try both leaves 31 and 11 in that order
2554	num_leaves = 0;
2555	if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
2556	num_leaves = 1;
2557	leaves[0] = 11;
2558	leaf_message_id = kmp_i18n_str_NoLeaf11Support;
2559	} else if (__kmp_affinity_top_method == affinity_top_method_x2apicid_1f) {
2560	num_leaves = 1;
2561	leaves[0] = 31;
2562	leaf_message_id = kmp_i18n_str_NoLeaf31Support;
2563	} else {
2564	num_leaves = 2;
2565	leaves[0] = 31;
2566	leaves[1] = 11;
2567	leaf_message_id = kmp_i18n_str_NoLeaf11Support;
2568	}
2569
2570	// Check to see if cpuid leaf 31 or 11 is supported.
2571	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
2572	topology_leaf = -1;
2573	for (int i = 0; i < num_leaves; ++i) {
2574	int leaf = leaves[i];
2575	if (highest_leaf < leaf)
2576	continue;
2577	__kmp_x86_cpuid(leaf, 0, &buf);
2578	if (buf.ebx == 0)
2579	continue;
2580	topology_leaf = leaf;
2581	levels_index = __kmp_x2apicid_get_levels(leaf, levels, known_levels);
2582	if (levels_index == 0)
2583	continue;
2584	break;
2585	}
2586	if (topology_leaf == -1 \|\| levels_index == 0) {
2587	*msg_id = leaf_message_id;
2588	return false;
2589	}
2590	KMP_ASSERT(levels_index <= INTEL_LEVEL_TYPE_LAST)if (!(levels_index <= INTEL_LEVEL_TYPE_LAST)) { __kmp_debug_assert ("levels_index <= INTEL_LEVEL_TYPE_LAST", "openmp/runtime/src/kmp_affinity.cpp" , 2590); };
2591
2592	// The algorithm used starts by setting the affinity to each available thread
2593	// and retrieving info from the cpuid instruction, so if we are not capable of
2594	// calling __kmp_get_system_affinity() and __kmp_get_system_affinity(), then
2595	// we need to do something else - use the defaults that we calculated from
2596	// issuing cpuid without binding to each proc.
2597	if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {
2598	// Hack to try and infer the machine topology using only the data
2599	// available from cpuid on the current thread, and __kmp_xproc.
2600	KMP_ASSERT(__kmp_affinity.type == affinity_none)if (!(__kmp_affinity.type == affinity_none)) { __kmp_debug_assert ("__kmp_affinity.type == affinity_none", "openmp/runtime/src/kmp_affinity.cpp" , 2600); };
2601	for (unsigned i = 0; i < levels_index; ++i) {
2602	if (levels[i].level_type == INTEL_LEVEL_TYPE_SMT) {
2603	__kmp_nThreadsPerCore = levels[i].nitems;
2604	} else if (levels[i].level_type == INTEL_LEVEL_TYPE_CORE) {
2605	nCoresPerPkg = levels[i].nitems;
2606	}
2607	}
2608	__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
2609	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
2610	return true;
2611	}
2612
2613	// Allocate the data structure to be returned.
2614	int depth = levels_index;
2615	for (int i = depth - 1, j = 0; i >= 0; --i, ++j)
2616	types[j] = __kmp_intel_type_2_topology_type(levels[i].level_type);
2617	__kmp_topology =
2618	kmp_topology_t::allocate(__kmp_avail_proc, levels_index, types);
2619
2620	// Insert equivalent cache types if they exist
2621	kmp_cache_info_t cache_info;
2622	for (size_t i = 0; i < cache_info.get_depth(); ++i) {
2623	const kmp_cache_info_t::info_t &info = cache_info[i];
2624	unsigned cache_mask = info.mask;
2625	unsigned cache_level = info.level;
2626	for (unsigned j = 0; j < levels_index; ++j) {
2627	unsigned hw_cache_mask = levels[j].cache_mask;
2628	kmp_hw_t cache_type = kmp_cache_info_t::get_topology_type(cache_level);
2629	if (hw_cache_mask == cache_mask && j < levels_index - 1) {
2630	kmp_hw_t type =
2631	__kmp_intel_type_2_topology_type(levels[j + 1].level_type);
2632	__kmp_topology->set_equivalent_type(cache_type, type);
2633	}
2634	}
2635	}
2636
2637	// From here on, we can assume that it is safe to call
2638	// __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if
2639	// __kmp_affinity.type = affinity_none.
2640
2641	// Save the affinity mask for the current thread.
2642	kmp_affinity_raii_t previous_affinity;
2643
2644	// Run through each of the available contexts, binding the current thread
2645	// to it, and obtaining the pertinent information using the cpuid instr.
2646	unsigned int proc;
2647	int hw_thread_index = 0;
2648	KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask)for (proc = (__kmp_affin_fullMask)->begin(); (int)proc != ( __kmp_affin_fullMask)->end(); proc = (__kmp_affin_fullMask )->next(proc)) {
2649	cpuid_level_info_t my_levels[INTEL_LEVEL_TYPE_LAST];
2650	unsigned my_levels_index;
2651
2652	// Skip this proc if it is not included in the machine model.
2653	if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(proc)) {
2654	continue;
2655	}
2656	KMP_DEBUG_ASSERT(hw_thread_index < __kmp_avail_proc)if (!(hw_thread_index < __kmp_avail_proc)) { __kmp_debug_assert ("hw_thread_index < __kmp_avail_proc", "openmp/runtime/src/kmp_affinity.cpp" , 2656); };
2657
2658	__kmp_affinity_dispatch->bind_thread(proc);
2659
2660	// New algorithm
2661	__kmp_x86_cpuid(topology_leaf, 0, &buf);
2662	apic_id = buf.edx;
2663	kmp_hw_thread_t &hw_thread = __kmp_topology->at(hw_thread_index);
2664	my_levels_index =
2665	__kmp_x2apicid_get_levels(topology_leaf, my_levels, known_levels);
2666	if (my_levels_index == 0 \|\| my_levels_index != levels_index) {
2667	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
2668	return false;
2669	}
2670	hw_thread.clear();
2671	hw_thread.os_id = proc;
2672	// Put in topology information
2673	for (unsigned j = 0, idx = depth - 1; j < my_levels_index; ++j, --idx) {
2674	hw_thread.ids[idx] = apic_id & my_levels[j].mask;
2675	if (j > 0) {
2676	hw_thread.ids[idx] >>= my_levels[j - 1].mask_width;
2677	}
2678	}
2679	// Hybrid information
2680	if (__kmp_is_hybrid_cpu() && highest_leaf >= 0x1a) {
2681	kmp_hw_core_type_t type;
2682	unsigned native_model_id;
2683	int efficiency;
2684	__kmp_get_hybrid_info(&type, &efficiency, &native_model_id);
2685	hw_thread.attrs.set_core_type(type);
2686	hw_thread.attrs.set_core_eff(efficiency);
2687	}
2688	hw_thread_index++;
2689	}
2690	KMP_ASSERT(hw_thread_index > 0)if (!(hw_thread_index > 0)) { __kmp_debug_assert("hw_thread_index > 0" , "openmp/runtime/src/kmp_affinity.cpp", 2690); };
2691	__kmp_topology->sort_ids();
2692	if (!__kmp_topology->check_ids()) {
2693	kmp_topology_t::deallocate(__kmp_topology);
2694	__kmp_topology = nullptr;
2695	*msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
2696	return false;
2697	}
2698	return true;
2699	}
2700	#endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
2701
2702	#define osIdIndex0 0
2703	#define threadIdIndex1 1
2704	#define coreIdIndex2 2
2705	#define pkgIdIndex3 3
2706	#define nodeIdIndex4 4
2707
2708	typedef unsigned *ProcCpuInfo;
2709	static unsigned maxIndex = pkgIdIndex3;
2710
2711	static int __kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a,
2712	const void *b) {
2713	unsigned i;
2714	const unsigned aa = (unsigned const )a;
2715	const unsigned bb = (unsigned const )b;
2716	for (i = maxIndex;; i--) {
2717	if (aa[i] < bb[i])
2718	return -1;
2719	if (aa[i] > bb[i])
2720	return 1;
2721	if (i == osIdIndex0)
2722	break;
2723	}
2724	return 0;
2725	}
2726
2727	#if KMP_USE_HIER_SCHED0
2728	// Set the array sizes for the hierarchy layers
2729	static void __kmp_dispatch_set_hierarchy_values() {
2730	// Set the maximum number of L1's to number of cores
2731	// Set the maximum number of L2's to to either number of cores / 2 for
2732	// Intel(R) Xeon Phi(TM) coprocessor formally codenamed Knights Landing
2733	// Or the number of cores for Intel(R) Xeon(R) processors
2734	// Set the maximum number of NUMA nodes and L3's to number of packages
2735	__kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1] =
2736	nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;
2737	__kmp_hier_max_units[kmp_hier_layer_e::LAYER_L1 + 1] = __kmp_ncores;
2738	#if KMP_ARCH_X86_641 && (KMP_OS_LINUX1 \|\| KMP_OS_FREEBSD0 \|\| KMP_OS_WINDOWS0) && \
2739	KMP_MIC_SUPPORTED((0 \|\| 1) && (1 \|\| 0))
2740	if (__kmp_mic_type >= mic3)
2741	__kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores / 2;
2742	else
2743	#endif // KMP_ARCH_X86_64 && (KMP_OS_LINUX \|\| KMP_OS_WINDOWS)
2744	__kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores;
2745	__kmp_hier_max_units[kmp_hier_layer_e::LAYER_L3 + 1] = nPackages;
2746	__kmp_hier_max_units[kmp_hier_layer_e::LAYER_NUMA + 1] = nPackages;
2747	__kmp_hier_max_units[kmp_hier_layer_e::LAYER_LOOP + 1] = 1;
2748	// Set the number of threads per unit
2749	// Number of hardware threads per L1/L2/L3/NUMA/LOOP
2750	__kmp_hier_threads_per[kmp_hier_layer_e::LAYER_THREAD + 1] = 1;
2751	__kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L1 + 1] =
2752	__kmp_nThreadsPerCore;
2753	#if KMP_ARCH_X86_641 && (KMP_OS_LINUX1 \|\| KMP_OS_FREEBSD0 \|\| KMP_OS_WINDOWS0) && \
2754	KMP_MIC_SUPPORTED((0 \|\| 1) && (1 \|\| 0))
2755	if (__kmp_mic_type >= mic3)
2756	__kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =
2757	2 * __kmp_nThreadsPerCore;
2758	else
2759	#endif // KMP_ARCH_X86_64 && (KMP_OS_LINUX \|\| KMP_OS_WINDOWS)
2760	__kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =
2761	__kmp_nThreadsPerCore;
2762	__kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L3 + 1] =
2763	nCoresPerPkg * __kmp_nThreadsPerCore;
2764	__kmp_hier_threads_per[kmp_hier_layer_e::LAYER_NUMA + 1] =
2765	nCoresPerPkg * __kmp_nThreadsPerCore;
2766	__kmp_hier_threads_per[kmp_hier_layer_e::LAYER_LOOP + 1] =
2767	nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;
2768	}
2769
2770	// Return the index into the hierarchy for this tid and layer type (L1, L2, etc)
2771	// i.e., this thread's L1 or this thread's L2, etc.
2772	int __kmp_dispatch_get_index(int tid, kmp_hier_layer_e type) {
2773	int index = type + 1;
2774	int num_hw_threads = __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1];
2775	KMP_DEBUG_ASSERT(type != kmp_hier_layer_e::LAYER_LAST)if (!(type != kmp_hier_layer_e::LAYER_LAST)) { __kmp_debug_assert ("type != kmp_hier_layer_e::LAYER_LAST", "openmp/runtime/src/kmp_affinity.cpp" , 2775); };
2776	if (type == kmp_hier_layer_e::LAYER_THREAD)
2777	return tid;
2778	else if (type == kmp_hier_layer_e::LAYER_LOOP)
2779	return 0;
2780	KMP_DEBUG_ASSERT(__kmp_hier_max_units[index] != 0)if (!(__kmp_hier_max_units[index] != 0)) { __kmp_debug_assert ("__kmp_hier_max_units[index] != 0", "openmp/runtime/src/kmp_affinity.cpp" , 2780); };
2781	if (tid >= num_hw_threads)
2782	tid = tid % num_hw_threads;
2783	return (tid / __kmp_hier_threads_per[index]) % __kmp_hier_max_units[index];
2784	}
2785
2786	// Return the number of t1's per t2
2787	int __kmp_dispatch_get_t1_per_t2(kmp_hier_layer_e t1, kmp_hier_layer_e t2) {
2788	int i1 = t1 + 1;
2789	int i2 = t2 + 1;
2790	KMP_DEBUG_ASSERT(i1 <= i2)if (!(i1 <= i2)) { __kmp_debug_assert("i1 <= i2", "openmp/runtime/src/kmp_affinity.cpp" , 2790); };
2791	KMP_DEBUG_ASSERT(t1 != kmp_hier_layer_e::LAYER_LAST)if (!(t1 != kmp_hier_layer_e::LAYER_LAST)) { __kmp_debug_assert ("t1 != kmp_hier_layer_e::LAYER_LAST", "openmp/runtime/src/kmp_affinity.cpp" , 2791); };
2792	KMP_DEBUG_ASSERT(t2 != kmp_hier_layer_e::LAYER_LAST)if (!(t2 != kmp_hier_layer_e::LAYER_LAST)) { __kmp_debug_assert ("t2 != kmp_hier_layer_e::LAYER_LAST", "openmp/runtime/src/kmp_affinity.cpp" , 2792); };
2793	KMP_DEBUG_ASSERT(__kmp_hier_threads_per[i1] != 0)if (!(__kmp_hier_threads_per[i1] != 0)) { __kmp_debug_assert( "__kmp_hier_threads_per[i1] != 0", "openmp/runtime/src/kmp_affinity.cpp" , 2793); };
2794	// (nthreads/t2) / (nthreads/t1) = t1 / t2
2795	return __kmp_hier_threads_per[i2] / __kmp_hier_threads_per[i1];
2796	}
2797	#endif // KMP_USE_HIER_SCHED
2798
2799	static inline const char *__kmp_cpuinfo_get_filename() {
2800	const char *filename;
2801	if (__kmp_cpuinfo_file != nullptr)
2802	filename = __kmp_cpuinfo_file;
2803	else
2804	filename = "/proc/cpuinfo";
2805	return filename;
2806	}
2807
2808	static inline const char *__kmp_cpuinfo_get_envvar() {
2809	const char *envvar = nullptr;
2810	if (__kmp_cpuinfo_file != nullptr)
2811	envvar = "KMP_CPUINFO_FILE";
2812	return envvar;
2813	}
2814
2815	// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
2816	// affinity map.
2817	static bool __kmp_affinity_create_cpuinfo_map(int *line,
2818	kmp_i18n_id_t *const msg_id) {
2819	const char *filename = __kmp_cpuinfo_get_filename();
2820	const char *envvar = __kmp_cpuinfo_get_envvar();
2821	*msg_id = kmp_i18n_null;
2822
2823	if (__kmp_affinity.flags.verbose) {
2824	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffParseFilename , "KMP_AFFINITY", filename), __kmp_msg_null);
2825	}
2826
2827	kmp_safe_raii_file_t f(filename, "r", envvar);
2828
2829	// Scan of the file, and count the number of "processor" (osId) fields,
2830	// and find the highest value of <n> for a node_<n> field.
2831	char buf[256];
2832	unsigned num_records = 0;
2833	while (!feof(f)) {
2834	buf[sizeof(buf) - 1] = 1;
2835	if (!fgets(buf, sizeof(buf), f)) {
2836	// Read errors presumably because of EOF
2837	break;
2838	}
2839
2840	char s1[] = "processor";
2841	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
2842	num_records++;
2843	continue;
2844	}
2845
2846	// FIXME - this will match "node_<n> <garbage>"
2847	unsigned level;
2848	if (KMP_SSCANFsscanf(buf, "node_%u id", &level) == 1) {
2849	// validate the input fisrt:
2850	if (level > (unsigned)__kmp_xproc) { // level is too big
2851	level = __kmp_xproc;
2852	}
2853	if (nodeIdIndex4 + level >= maxIndex) {
2854	maxIndex = nodeIdIndex4 + level;
2855	}
2856	continue;
2857	}
2858	}
2859
2860	// Check for empty file / no valid processor records, or too many. The number
2861	// of records can't exceed the number of valid bits in the affinity mask.
2862	if (num_records == 0) {
2863	*msg_id = kmp_i18n_str_NoProcRecords;
2864	return false;
2865	}
2866	if (num_records > (unsigned)__kmp_xproc) {
2867	*msg_id = kmp_i18n_str_TooManyProcRecords;
2868	return false;
2869	}
2870
2871	// Set the file pointer back to the beginning, so that we can scan the file
2872	// again, this time performing a full parse of the data. Allocate a vector of
2873	// ProcCpuInfo object, where we will place the data. Adding an extra element
2874	// at the end allows us to remove a lot of extra checks for termination
2875	// conditions.
2876	if (fseek(f, 0, SEEK_SET0) != 0) {
2877	*msg_id = kmp_i18n_str_CantRewindCpuinfo;
2878	return false;
2879	}
2880
2881	// Allocate the array of records to store the proc info in. The dummy
2882	// element at the end makes the logic in filling them out easier to code.
2883	unsigned **threadInfo =
2884	(unsigned *)__kmp_allocate((num_records + 1) sizeof(unsigned ))___kmp_allocate(((num_records + 1) sizeof(unsigned *)), "openmp/runtime/src/kmp_affinity.cpp" , 2884);
2885	unsigned i;
2886	for (i = 0; i <= num_records; i++) {
2887	threadInfo[i] =
2888	(unsigned )__kmp_allocate((maxIndex + 1) sizeof(unsigned))___kmp_allocate(((maxIndex + 1) * sizeof(unsigned)), "openmp/runtime/src/kmp_affinity.cpp" , 2888);
2889	}
2890
2891	#define CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo [i]), "openmp/runtime/src/kmp_affinity.cpp", 2891); } ___kmp_free ((threadInfo), "openmp/runtime/src/kmp_affinity.cpp", 2891); \
2892	for (i = 0; i <= num_records; i++) { \
2893	__kmp_free(threadInfo[i])___kmp_free((threadInfo[i]), "openmp/runtime/src/kmp_affinity.cpp" , 2893); \
2894	} \
2895	__kmp_free(threadInfo)___kmp_free((threadInfo), "openmp/runtime/src/kmp_affinity.cpp" , 2895);
2896
2897	// A value of UINT_MAX means that we didn't find the field
2898	unsigned __index;
2899
2900	#define INIT_PROC_INFO(p)for (__index = 0; __index <= maxIndex; __index++) { (p)[__index ] = (2147483647 *2U +1U); } \
2901	for (__index = 0; __index <= maxIndex; __index++) { \
2902	(p)[__index] = UINT_MAX(2147483647 *2U +1U); \
2903	}
2904
2905	for (i = 0; i <= num_records; i++) {
2906	INIT_PROC_INFO(threadInfo[i])for (__index = 0; __index <= maxIndex; __index++) { (threadInfo [i])[__index] = (2147483647 *2U +1U); };
2907	}
2908
2909	unsigned num_avail = 0;
2910	*line = 0;
2911	while (!feof(f)) {
2912	// Create an inner scoping level, so that all the goto targets at the end of
2913	// the loop appear in an outer scoping level. This avoids warnings about
2914	// jumping past an initialization to a target in the same block.
2915	{
2916	buf[sizeof(buf) - 1] = 1;
2917	bool long_line = false;
2918	if (!fgets(buf, sizeof(buf), f)) {
2919	// Read errors presumably because of EOF
2920	// If there is valid data in threadInfo[num_avail], then fake
2921	// a blank line in ensure that the last address gets parsed.
2922	bool valid = false;
2923	for (i = 0; i <= maxIndex; i++) {
2924	if (threadInfo[num_avail][i] != UINT_MAX(2147483647 *2U +1U)) {
2925	valid = true;
2926	}
2927	}
2928	if (!valid) {
2929	break;
2930	}
2931	buf[0] = 0;
2932	} else if (!buf[sizeof(buf) - 1]) {
2933	// The line is longer than the buffer. Set a flag and don't
2934	// emit an error if we were going to ignore the line, anyway.
2935	long_line = true;
2936
2937	#define CHECK_LINEif (long_line) { for (i = 0; i <= num_records; i++) { ___kmp_free ((threadInfo[i]), "openmp/runtime/src/kmp_affinity.cpp", 2937 ); } ___kmp_free((threadInfo), "openmp/runtime/src/kmp_affinity.cpp" , 2937);; *msg_id = kmp_i18n_str_LongLineCpuinfo; return false ; } \
2938	if (long_line) { \
2939	CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo [i]), "openmp/runtime/src/kmp_affinity.cpp", 2939); } ___kmp_free ((threadInfo), "openmp/runtime/src/kmp_affinity.cpp", 2939);; \
2940	*msg_id = kmp_i18n_str_LongLineCpuinfo; \
2941	return false; \
2942	}
2943	}
2944	(*line)++;
2945
2946	char s1[] = "processor";
2947	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
2948	CHECK_LINEif (long_line) { for (i = 0; i <= num_records; i++) { ___kmp_free ((threadInfo[i]), "openmp/runtime/src/kmp_affinity.cpp", 2948 ); } ___kmp_free((threadInfo), "openmp/runtime/src/kmp_affinity.cpp" , 2948);; *msg_id = kmp_i18n_str_LongLineCpuinfo; return false ; };
2949	char *p = strchr(buf + sizeof(s1) - 1, ':');
2950	unsigned val;
2951	if ((p == NULL__null) \|\| (KMP_SSCANFsscanf(p + 1, "%u\n", &val) != 1))
2952	goto no_val;
2953	if (threadInfo[num_avail][osIdIndex0] != UINT_MAX(2147483647 *2U +1U))
2954	#if KMP_ARCH_AARCH640
2955	// Handle the old AArch64 /proc/cpuinfo layout differently,
2956	// it contains all of the 'processor' entries listed in a
2957	// single 'Processor' section, therefore the normal looking
2958	// for duplicates in that section will always fail.
2959	num_avail++;
2960	#else
2961	goto dup_field;
2962	#endif
2963	threadInfo[num_avail][osIdIndex0] = val;
2964	#if KMP_OS_LINUX1 && !(KMP_ARCH_X860 \|\| KMP_ARCH_X86_641)
2965	char path[256];
2966	KMP_SNPRINTFsnprintf(
2967	path, sizeof(path),
2968	"/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
2969	threadInfo[num_avail][osIdIndex0]);
2970	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex3]);
2971
2972	KMP_SNPRINTFsnprintf(path, sizeof(path),
2973	"/sys/devices/system/cpu/cpu%u/topology/core_id",
2974	threadInfo[num_avail][osIdIndex0]);
2975	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex2]);
2976	continue;
2977	#else
2978	}
2979	char s2[] = "physical id";
2980	if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
2981	CHECK_LINEif (long_line) { for (i = 0; i <= num_records; i++) { ___kmp_free ((threadInfo[i]), "openmp/runtime/src/kmp_affinity.cpp", 2981 ); } ___kmp_free((threadInfo), "openmp/runtime/src/kmp_affinity.cpp" , 2981);; *msg_id = kmp_i18n_str_LongLineCpuinfo; return false ; };
2982	char *p = strchr(buf + sizeof(s2) - 1, ':');
2983	unsigned val;
2984	if ((p == NULL__null) \|\| (KMP_SSCANFsscanf(p + 1, "%u\n", &val) != 1))
2985	goto no_val;
2986	if (threadInfo[num_avail][pkgIdIndex3] != UINT_MAX(2147483647 *2U +1U))
2987	goto dup_field;
2988	threadInfo[num_avail][pkgIdIndex3] = val;
2989	continue;
2990	}
2991	char s3[] = "core id";
2992	if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
2993	CHECK_LINEif (long_line) { for (i = 0; i <= num_records; i++) { ___kmp_free ((threadInfo[i]), "openmp/runtime/src/kmp_affinity.cpp", 2993 ); } ___kmp_free((threadInfo), "openmp/runtime/src/kmp_affinity.cpp" , 2993);; *msg_id = kmp_i18n_str_LongLineCpuinfo; return false ; };
2994	char *p = strchr(buf + sizeof(s3) - 1, ':');
2995	unsigned val;
2996	if ((p == NULL__null) \|\| (KMP_SSCANFsscanf(p + 1, "%u\n", &val) != 1))
2997	goto no_val;
2998	if (threadInfo[num_avail][coreIdIndex2] != UINT_MAX(2147483647 *2U +1U))
2999	goto dup_field;
3000	threadInfo[num_avail][coreIdIndex2] = val;
3001	continue;
3002	#endif // KMP_OS_LINUX && USE_SYSFS_INFO
3003	}
3004	char s4[] = "thread id";
3005	if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
3006	CHECK_LINEif (long_line) { for (i = 0; i <= num_records; i++) { ___kmp_free ((threadInfo[i]), "openmp/runtime/src/kmp_affinity.cpp", 3006 ); } ___kmp_free((threadInfo), "openmp/runtime/src/kmp_affinity.cpp" , 3006);; *msg_id = kmp_i18n_str_LongLineCpuinfo; return false ; };
3007	char *p = strchr(buf + sizeof(s4) - 1, ':');
3008	unsigned val;
3009	if ((p == NULL__null) \|\| (KMP_SSCANFsscanf(p + 1, "%u\n", &val) != 1))
3010	goto no_val;
3011	if (threadInfo[num_avail][threadIdIndex1] != UINT_MAX(2147483647 *2U +1U))
3012	goto dup_field;
3013	threadInfo[num_avail][threadIdIndex1] = val;
3014	continue;
3015	}
3016	unsigned level;
3017	if (KMP_SSCANFsscanf(buf, "node_%u id", &level) == 1) {
3018	CHECK_LINEif (long_line) { for (i = 0; i <= num_records; i++) { ___kmp_free ((threadInfo[i]), "openmp/runtime/src/kmp_affinity.cpp", 3018 ); } ___kmp_free((threadInfo), "openmp/runtime/src/kmp_affinity.cpp" , 3018);; *msg_id = kmp_i18n_str_LongLineCpuinfo; return false ; };
3019	char *p = strchr(buf + sizeof(s4) - 1, ':');
3020	unsigned val;
3021	if ((p == NULL__null) \|\| (KMP_SSCANFsscanf(p + 1, "%u\n", &val) != 1))
3022	goto no_val;
3023	// validate the input before using level:
3024	if (level > (unsigned)__kmp_xproc) { // level is too big
3025	level = __kmp_xproc;
3026	}
3027	if (threadInfo[num_avail][nodeIdIndex4 + level] != UINT_MAX(2147483647 *2U +1U))
3028	goto dup_field;
3029	threadInfo[num_avail][nodeIdIndex4 + level] = val;
3030	continue;
3031	}
3032
3033	// We didn't recognize the leading token on the line. There are lots of
3034	// leading tokens that we don't recognize - if the line isn't empty, go on
3035	// to the next line.
3036	if ((buf != 0) && (buf != '\n')) {
3037	// If the line is longer than the buffer, read characters
3038	// until we find a newline.
3039	if (long_line) {
3040	int ch;
3041	while (((ch = fgetc(f)) != EOF(-1)) && (ch != '\n'))
3042	;
3043	}
3044	continue;
3045	}
3046
3047	// A newline has signalled the end of the processor record.
3048	// Check that there aren't too many procs specified.
3049	if ((int)num_avail == __kmp_xproc) {
3050	CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo [i]), "openmp/runtime/src/kmp_affinity.cpp", 3050); } ___kmp_free ((threadInfo), "openmp/runtime/src/kmp_affinity.cpp", 3050);;
3051	*msg_id = kmp_i18n_str_TooManyEntries;
3052	return false;
3053	}
3054
3055	// Check for missing fields. The osId field must be there, and we
3056	// currently require that the physical id field is specified, also.
3057	if (threadInfo[num_avail][osIdIndex0] == UINT_MAX(2147483647 *2U +1U)) {
3058	CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo [i]), "openmp/runtime/src/kmp_affinity.cpp", 3058); } ___kmp_free ((threadInfo), "openmp/runtime/src/kmp_affinity.cpp", 3058);;
3059	*msg_id = kmp_i18n_str_MissingProcField;
3060	return false;
3061	}
3062	if (threadInfo[0][pkgIdIndex3] == UINT_MAX(2147483647 *2U +1U)) {
3063	CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo [i]), "openmp/runtime/src/kmp_affinity.cpp", 3063); } ___kmp_free ((threadInfo), "openmp/runtime/src/kmp_affinity.cpp", 3063);;
3064	*msg_id = kmp_i18n_str_MissingPhysicalIDField;
3065	return false;
3066	}
3067
3068	// Skip this proc if it is not included in the machine model.
3069	if (KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0) &&
3070	!KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex],(__kmp_affin_fullMask)->is_set(threadInfo[num_avail][0])
3071	__kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(threadInfo[num_avail][0])) {
3072	INIT_PROC_INFO(threadInfo[num_avail])for (__index = 0; __index <= maxIndex; __index++) { (threadInfo [num_avail])[__index] = (2147483647 *2U +1U); };
3073	continue;
3074	}
3075
3076	// We have a successful parse of this proc's info.
3077	// Increment the counter, and prepare for the next proc.
3078	num_avail++;
3079	KMP_ASSERT(num_avail <= num_records)if (!(num_avail <= num_records)) { __kmp_debug_assert("num_avail <= num_records" , "openmp/runtime/src/kmp_affinity.cpp", 3079); };
3080	INIT_PROC_INFO(threadInfo[num_avail])for (__index = 0; __index <= maxIndex; __index++) { (threadInfo [num_avail])[__index] = (2147483647 *2U +1U); };
3081	}
3082	continue;
3083
3084	no_val:
3085	CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo [i]), "openmp/runtime/src/kmp_affinity.cpp", 3085); } ___kmp_free ((threadInfo), "openmp/runtime/src/kmp_affinity.cpp", 3085);;
3086	*msg_id = kmp_i18n_str_MissingValCpuinfo;
3087	return false;
3088
3089	dup_field:
3090	CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo [i]), "openmp/runtime/src/kmp_affinity.cpp", 3090); } ___kmp_free ((threadInfo), "openmp/runtime/src/kmp_affinity.cpp", 3090);;
3091	*msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
3092	return false;
3093	}
3094	*line = 0;
3095
3096	#if KMP_MIC0 && REDUCE_TEAM_SIZE
3097	unsigned teamSize = 0;
3098	#endif // KMP_MIC && REDUCE_TEAM_SIZE
3099
3100	// check for num_records == __kmp_xproc ???
3101
3102	// If it is configured to omit the package level when there is only a single
3103	// package, the logic at the end of this routine won't work if there is only a
3104	// single thread
3105	KMP_ASSERT(num_avail > 0)if (!(num_avail > 0)) { __kmp_debug_assert("num_avail > 0" , "openmp/runtime/src/kmp_affinity.cpp", 3105); };
3106	KMP_ASSERT(num_avail <= num_records)if (!(num_avail <= num_records)) { __kmp_debug_assert("num_avail <= num_records" , "openmp/runtime/src/kmp_affinity.cpp", 3106); };
3107
3108	// Sort the threadInfo table by physical Id.
3109	qsort(threadInfo, num_avail, sizeof(*threadInfo),
3110	__kmp_affinity_cmp_ProcCpuInfo_phys_id);
3111
3112	// The table is now sorted by pkgId / coreId / threadId, but we really don't
3113	// know the radix of any of the fields. pkgId's may be sparsely assigned among
3114	// the chips on a system. Although coreId's are usually assigned
3115	// [0 .. coresPerPkg-1] and threadId's are usually assigned
3116	// [0..threadsPerCore-1], we don't want to make any such assumptions.
3117	//
3118	// For that matter, we don't know what coresPerPkg and threadsPerCore (or the
3119	// total # packages) are at this point - we want to determine that now. We
3120	// only have an upper bound on the first two figures.
3121	unsigned *counts =
3122	(unsigned )__kmp_allocate((maxIndex + 1) sizeof(unsigned))___kmp_allocate(((maxIndex + 1) * sizeof(unsigned)), "openmp/runtime/src/kmp_affinity.cpp" , 3122);
3123	unsigned *maxCt =
3124	(unsigned )__kmp_allocate((maxIndex + 1) sizeof(unsigned))___kmp_allocate(((maxIndex + 1) * sizeof(unsigned)), "openmp/runtime/src/kmp_affinity.cpp" , 3124);
3125	unsigned *totals =
3126	(unsigned )__kmp_allocate((maxIndex + 1) sizeof(unsigned))___kmp_allocate(((maxIndex + 1) * sizeof(unsigned)), "openmp/runtime/src/kmp_affinity.cpp" , 3126);
3127	unsigned *lastId =
3128	(unsigned )__kmp_allocate((maxIndex + 1) sizeof(unsigned))___kmp_allocate(((maxIndex + 1) * sizeof(unsigned)), "openmp/runtime/src/kmp_affinity.cpp" , 3128);
3129
3130	bool assign_thread_ids = false;
3131	unsigned threadIdCt;
3132	unsigned index;
3133
3134	restart_radix_check:
3135	threadIdCt = 0;
3136
3137	// Initialize the counter arrays with data from threadInfo[0].
3138	if (assign_thread_ids) {
3139	if (threadInfo[0][threadIdIndex1] == UINT_MAX(2147483647 *2U +1U)) {
3140	threadInfo[0][threadIdIndex1] = threadIdCt++;
3141	} else if (threadIdCt <= threadInfo[0][threadIdIndex1]) {
3142	threadIdCt = threadInfo[0][threadIdIndex1] + 1;
3143	}
3144	}
3145	for (index = 0; index <= maxIndex; index++) {
3146	counts[index] = 1;
3147	maxCt[index] = 1;
3148	totals[index] = 1;
3149	lastId[index] = threadInfo[0][index];
3150	;
3151	}
3152
3153	// Run through the rest of the OS procs.
3154	for (i = 1; i < num_avail; i++) {
3155	// Find the most significant index whose id differs from the id for the
3156	// previous OS proc.
3157	for (index = maxIndex; index >= threadIdIndex1; index--) {
3158	if (assign_thread_ids && (index == threadIdIndex1)) {
3159	// Auto-assign the thread id field if it wasn't specified.
3160	if (threadInfo[i][threadIdIndex1] == UINT_MAX(2147483647 *2U +1U)) {
3161	threadInfo[i][threadIdIndex1] = threadIdCt++;
3162	}
3163	// Apparently the thread id field was specified for some entries and not
3164	// others. Start the thread id counter off at the next higher thread id.
3165	else if (threadIdCt <= threadInfo[i][threadIdIndex1]) {
3166	threadIdCt = threadInfo[i][threadIdIndex1] + 1;
3167	}
3168	}
3169	if (threadInfo[i][index] != lastId[index]) {
3170	// Run through all indices which are less significant, and reset the
3171	// counts to 1. At all levels up to and including index, we need to
3172	// increment the totals and record the last id.
3173	unsigned index2;
3174	for (index2 = threadIdIndex1; index2 < index; index2++) {
3175	totals[index2]++;
3176	if (counts[index2] > maxCt[index2]) {
3177	maxCt[index2] = counts[index2];
3178	}
3179	counts[index2] = 1;
3180	lastId[index2] = threadInfo[i][index2];
3181	}
3182	counts[index]++;
3183	totals[index]++;
3184	lastId[index] = threadInfo[i][index];
3185
3186	if (assign_thread_ids && (index > threadIdIndex1)) {
3187
3188	#if KMP_MIC0 && REDUCE_TEAM_SIZE
3189	// The default team size is the total #threads in the machine
3190	// minus 1 thread for every core that has 3 or more threads.
3191	teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
3192	#endif // KMP_MIC && REDUCE_TEAM_SIZE
3193
3194	// Restart the thread counter, as we are on a new core.
3195	threadIdCt = 0;
3196
3197	// Auto-assign the thread id field if it wasn't specified.
3198	if (threadInfo[i][threadIdIndex1] == UINT_MAX(2147483647 *2U +1U)) {
3199	threadInfo[i][threadIdIndex1] = threadIdCt++;
3200	}
3201
3202	// Apparently the thread id field was specified for some entries and
3203	// not others. Start the thread id counter off at the next higher
3204	// thread id.
3205	else if (threadIdCt <= threadInfo[i][threadIdIndex1]) {
3206	threadIdCt = threadInfo[i][threadIdIndex1] + 1;
3207	}
3208	}
3209	break;
3210	}
3211	}
3212	if (index < threadIdIndex1) {
3213	// If thread ids were specified, it is an error if they are not unique.
3214	// Also, check that we waven't already restarted the loop (to be safe -
3215	// shouldn't need to).
3216	if ((threadInfo[i][threadIdIndex1] != UINT_MAX(2147483647 *2U +1U)) \|\| assign_thread_ids) {
3217	__kmp_free(lastId)___kmp_free((lastId), "openmp/runtime/src/kmp_affinity.cpp", 3217 );
3218	__kmp_free(totals)___kmp_free((totals), "openmp/runtime/src/kmp_affinity.cpp", 3218 );
3219	__kmp_free(maxCt)___kmp_free((maxCt), "openmp/runtime/src/kmp_affinity.cpp", 3219 );
3220	__kmp_free(counts)___kmp_free((counts), "openmp/runtime/src/kmp_affinity.cpp", 3220 );
3221	CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo [i]), "openmp/runtime/src/kmp_affinity.cpp", 3221); } ___kmp_free ((threadInfo), "openmp/runtime/src/kmp_affinity.cpp", 3221);;
3222	*msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
3223	return false;
3224	}
3225
3226	// If the thread ids were not specified and we see entries entries that
3227	// are duplicates, start the loop over and assign the thread ids manually.
3228	assign_thread_ids = true;
3229	goto restart_radix_check;
3230	}
3231	}
3232
3233	#if KMP_MIC0 && REDUCE_TEAM_SIZE
3234	// The default team size is the total #threads in the machine
3235	// minus 1 thread for every core that has 3 or more threads.
3236	teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
3237	#endif // KMP_MIC && REDUCE_TEAM_SIZE
3238
3239	for (index = threadIdIndex1; index <= maxIndex; index++) {
3240	if (counts[index] > maxCt[index]) {
3241	maxCt[index] = counts[index];
3242	}
3243	}
3244
3245	__kmp_nThreadsPerCore = maxCt[threadIdIndex1];
3246	nCoresPerPkg = maxCt[coreIdIndex2];
3247	nPackages = totals[pkgIdIndex3];
3248
3249	// When affinity is off, this routine will still be called to set
3250	// __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
3251	// Make sure all these vars are set correctly, and return now if affinity is
3252	// not enabled.
3253	__kmp_ncores = totals[coreIdIndex2];
3254	if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {
3255	KMP_ASSERT(__kmp_affinity.type == affinity_none)if (!(__kmp_affinity.type == affinity_none)) { __kmp_debug_assert ("__kmp_affinity.type == affinity_none", "openmp/runtime/src/kmp_affinity.cpp" , 3255); };
3256	return true;
3257	}
3258
3259	#if KMP_MIC0 && REDUCE_TEAM_SIZE
3260	// Set the default team size.
3261	if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
3262	__kmp_dflt_team_nth = teamSize;
3263	KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_affinity_create_cpuinfo_map: setting " "__kmp_dflt_team_nth = %d\n", __kmp_dflt_team_nth); }
3264	"__kmp_dflt_team_nth = %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_affinity_create_cpuinfo_map: setting " "__kmp_dflt_team_nth = %d\n", __kmp_dflt_team_nth); }
3265	__kmp_dflt_team_nth))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_affinity_create_cpuinfo_map: setting " "__kmp_dflt_team_nth = %d\n", __kmp_dflt_team_nth); };
3266	}
3267	#endif // KMP_MIC && REDUCE_TEAM_SIZE
3268
3269	KMP_DEBUG_ASSERT(num_avail == (unsigned)__kmp_avail_proc)if (!(num_avail == (unsigned)__kmp_avail_proc)) { __kmp_debug_assert ("num_avail == (unsigned)__kmp_avail_proc", "openmp/runtime/src/kmp_affinity.cpp" , 3269); };
3270
3271	// Count the number of levels which have more nodes at that level than at the
3272	// parent's level (with there being an implicit root node of the top level).
3273	// This is equivalent to saying that there is at least one node at this level
3274	// which has a sibling. These levels are in the map, and the package level is
3275	// always in the map.
3276	bool inMap = (bool )__kmp_allocate((maxIndex + 1) * sizeof(bool))___kmp_allocate(((maxIndex + 1) * sizeof(bool)), "openmp/runtime/src/kmp_affinity.cpp" , 3276);
3277	for (index = threadIdIndex1; index < maxIndex; index++) {
3278	KMP_ASSERT(totals[index] >= totals[index + 1])if (!(totals[index] >= totals[index + 1])) { __kmp_debug_assert ("totals[index] >= totals[index + 1]", "openmp/runtime/src/kmp_affinity.cpp" , 3278); };
3279	inMap[index] = (totals[index] > totals[index + 1]);
3280	}
3281	inMap[maxIndex] = (totals[maxIndex] > 1);
3282	inMap[pkgIdIndex3] = true;
3283	inMap[coreIdIndex2] = true;
3284	inMap[threadIdIndex1] = true;
3285
3286	int depth = 0;
3287	int idx = 0;
3288	kmp_hw_t types[KMP_HW_LAST];
3289	int pkgLevel = -1;
3290	int coreLevel = -1;
3291	int threadLevel = -1;
3292	for (index = threadIdIndex1; index <= maxIndex; index++) {
3293	if (inMap[index]) {
3294	depth++;
3295	}
3296	}
3297	if (inMap[pkgIdIndex3]) {
3298	pkgLevel = idx;
3299	types[idx++] = KMP_HW_SOCKET;
3300	}
3301	if (inMap[coreIdIndex2]) {
3302	coreLevel = idx;
3303	types[idx++] = KMP_HW_CORE;
3304	}
3305	if (inMap[threadIdIndex1]) {
3306	threadLevel = idx;
3307	types[idx++] = KMP_HW_THREAD;
3308	}
3309	KMP_ASSERT(depth > 0)if (!(depth > 0)) { __kmp_debug_assert("depth > 0", "openmp/runtime/src/kmp_affinity.cpp" , 3309); };
3310
3311	// Construct the data structure that is to be returned.
3312	__kmp_topology = kmp_topology_t::allocate(num_avail, depth, types);
3313
3314	for (i = 0; i < num_avail; ++i) {
3315	unsigned os = threadInfo[i][osIdIndex0];
3316	int src_index;
3317	kmp_hw_thread_t &hw_thread = __kmp_topology->at(i);
3318	hw_thread.clear();
3319	hw_thread.os_id = os;
3320
3321	idx = 0;
	Value stored to 'idx' is never read
3322	for (src_index = maxIndex; src_index >= threadIdIndex1; src_index--) {
3323	if (!inMap[src_index]) {
3324	continue;
3325	}
3326	if (src_index == pkgIdIndex3) {
3327	hw_thread.ids[pkgLevel] = threadInfo[i][src_index];
3328	} else if (src_index == coreIdIndex2) {
3329	hw_thread.ids[coreLevel] = threadInfo[i][src_index];
3330	} else if (src_index == threadIdIndex1) {
3331	hw_thread.ids[threadLevel] = threadInfo[i][src_index];
3332	}
3333	}
3334	}
3335
3336	__kmp_free(inMap)___kmp_free((inMap), "openmp/runtime/src/kmp_affinity.cpp", 3336 );
3337	__kmp_free(lastId)___kmp_free((lastId), "openmp/runtime/src/kmp_affinity.cpp", 3337 );
3338	__kmp_free(totals)___kmp_free((totals), "openmp/runtime/src/kmp_affinity.cpp", 3338 );
3339	__kmp_free(maxCt)___kmp_free((maxCt), "openmp/runtime/src/kmp_affinity.cpp", 3339 );
3340	__kmp_free(counts)___kmp_free((counts), "openmp/runtime/src/kmp_affinity.cpp", 3340 );
3341	CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo [i]), "openmp/runtime/src/kmp_affinity.cpp", 3341); } ___kmp_free ((threadInfo), "openmp/runtime/src/kmp_affinity.cpp", 3341);;
3342	__kmp_topology->sort_ids();
3343	if (!__kmp_topology->check_ids()) {
3344	kmp_topology_t::deallocate(__kmp_topology);
3345	__kmp_topology = nullptr;
3346	*msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
3347	return false;
3348	}
3349	return true;
3350	}
3351
3352	// Create and return a table of affinity masks, indexed by OS thread ID.
3353	// This routine handles OR'ing together all the affinity masks of threads
3354	// that are sufficiently close, if granularity > fine.
3355	static void __kmp_create_os_id_masks(unsigned *numUnique,
3356	kmp_affinity_t &affinity) {
3357	// First form a table of affinity masks in order of OS thread id.
3358	int maxOsId;
3359	int i;
3360	int numAddrs = __kmp_topology->get_num_hw_threads();
3361	int depth = __kmp_topology->get_depth();
3362	const char *env_var = affinity.env_var;
3363	KMP_ASSERT(numAddrs)if (!(numAddrs)) { __kmp_debug_assert("numAddrs", "openmp/runtime/src/kmp_affinity.cpp" , 3363); };
3364	KMP_ASSERT(depth)if (!(depth)) { __kmp_debug_assert("depth", "openmp/runtime/src/kmp_affinity.cpp" , 3364); };
3365
3366	maxOsId = 0;
3367	for (i = numAddrs - 1;; --i) {
3368	int osId = __kmp_topology->at(i).os_id;
3369	if (osId > maxOsId) {
3370	maxOsId = osId;
3371	}
3372	if (i == 0)
3373	break;
3374	}
3375	affinity.num_os_id_masks = maxOsId + 1;
3376	KMP_CPU_ALLOC_ARRAY(affinity.os_id_masks, affinity.num_os_id_masks)(affinity.os_id_masks = __kmp_affinity_dispatch->allocate_mask_array (affinity.num_os_id_masks));
3377	KMP_ASSERT(affinity.gran_levels >= 0)if (!(affinity.gran_levels >= 0)) { __kmp_debug_assert("affinity.gran_levels >= 0" , "openmp/runtime/src/kmp_affinity.cpp", 3377); };
3378	if (affinity.flags.verbose && (affinity.gran_levels > 0)) {
3379	KMP_INFORM(ThreadsMigrate, env_var, affinity.gran_levels)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_ThreadsMigrate , env_var, affinity.gran_levels), __kmp_msg_null);
3380	}
3381	if (affinity.gran_levels >= (int)depth) {
3382	KMP_AFF_WARNING(affinity, AffThreadsMayMigrate)if (affinity.flags.verbose \|\| (affinity.flags.warnings && (affinity.type != affinity_none))) { __kmp_msg(kmp_ms_warning , __kmp_msg_format(kmp_i18n_msg_AffThreadsMayMigrate), __kmp_msg_null ); };
3383	}
3384
3385	// Run through the table, forming the masks for all threads on each core.
3386	// Threads on the same core will have identical kmp_hw_thread_t objects, not
3387	// considering the last level, which must be the thread id. All threads on a
3388	// core will appear consecutively.
3389	int unique = 0;
3390	int j = 0; // index of 1st thread on core
3391	int leader = 0;
3392	kmp_affin_mask_t *sum;
3393	KMP_CPU_ALLOC_ON_STACK(sum)(sum = __kmp_affinity_dispatch->allocate_mask());
3394	KMP_CPU_ZERO(sum)(sum)->zero();
3395	KMP_CPU_SET(__kmp_topology->at(0).os_id, sum)(sum)->set(__kmp_topology->at(0).os_id);
3396	for (i = 1; i < numAddrs; i++) {
3397	// If this thread is sufficiently close to the leader (within the
3398	// granularity setting), then set the bit for this os thread in the
3399	// affinity mask for this group, and go on to the next thread.
3400	if (__kmp_topology->is_close(leader, i, affinity.gran_levels)) {
3401	KMP_CPU_SET(__kmp_topology->at(i).os_id, sum)(sum)->set(__kmp_topology->at(i).os_id);
3402	continue;
3403	}
3404
3405	// For every thread in this group, copy the mask to the thread's entry in
3406	// the OS Id mask table. Mark the first address as a leader.
3407	for (; j < i; j++) {
3408	int osId = __kmp_topology->at(j).os_id;
3409	KMP_DEBUG_ASSERT(osId <= maxOsId)if (!(osId <= maxOsId)) { __kmp_debug_assert("osId <= maxOsId" , "openmp/runtime/src/kmp_affinity.cpp", 3409); };
3410	kmp_affin_mask_t *mask = KMP_CPU_INDEX(affinity.os_id_masks, osId)__kmp_affinity_dispatch->index_mask_array(affinity.os_id_masks , osId);
3411	KMP_CPU_COPY(mask, sum)(mask)->copy(sum);
3412	__kmp_topology->at(j).leader = (j == leader);
3413	}
3414	unique++;
3415
3416	// Start a new mask.
3417	leader = i;
3418	KMP_CPU_ZERO(sum)(sum)->zero();
3419	KMP_CPU_SET(__kmp_topology->at(i).os_id, sum)(sum)->set(__kmp_topology->at(i).os_id);
3420	}
3421
3422	// For every thread in last group, copy the mask to the thread's
3423	// entry in the OS Id mask table.
3424	for (; j < i; j++) {
3425	int osId = __kmp_topology->at(j).os_id;
3426	KMP_DEBUG_ASSERT(osId <= maxOsId)if (!(osId <= maxOsId)) { __kmp_debug_assert("osId <= maxOsId" , "openmp/runtime/src/kmp_affinity.cpp", 3426); };
3427	kmp_affin_mask_t *mask = KMP_CPU_INDEX(affinity.os_id_masks, osId)__kmp_affinity_dispatch->index_mask_array(affinity.os_id_masks , osId);
3428	KMP_CPU_COPY(mask, sum)(mask)->copy(sum);
3429	__kmp_topology->at(j).leader = (j == leader);
3430	}
3431	unique++;
3432	KMP_CPU_FREE_FROM_STACK(sum)__kmp_affinity_dispatch->deallocate_mask(sum);
3433
3434	*numUnique = unique;
3435	}
3436
3437	// Stuff for the affinity proclist parsers. It's easier to declare these vars
3438	// as file-static than to try and pass them through the calling sequence of
3439	// the recursive-descent OMP_PLACES parser.
3440	static kmp_affin_mask_t *newMasks;
3441	static int numNewMasks;
3442	static int nextNewMask;
3443
3444	#define ADD_MASK(_mask) \
3445	{ \
3446	if (nextNewMask >= numNewMasks) { \
3447	int i; \
3448	numNewMasks *= 2; \
3449	kmp_affin_mask_t *temp; \
3450	KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks)(temp = __kmp_affinity_dispatch->allocate_mask_array(numNewMasks )); \
3451	for (i = 0; i < numNewMasks / 2; i++) { \
3452	kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i)__kmp_affinity_dispatch->index_mask_array(newMasks, i); \
3453	kmp_affin_mask_t *dest = KMP_CPU_INDEX(temp, i)__kmp_affinity_dispatch->index_mask_array(temp, i); \
3454	KMP_CPU_COPY(dest, src)(dest)->copy(src); \
3455	} \
3456	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks / 2)__kmp_affinity_dispatch->deallocate_mask_array(newMasks); \
3457	newMasks = temp; \
3458	} \
3459	KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask))(__kmp_affinity_dispatch->index_mask_array(newMasks, nextNewMask ))->copy((_mask)); \
3460	nextNewMask++; \
3461	}
3462
3463	#define ADD_MASK_OSID(_osId, _osId2Mask, _maxOsId) \
3464	{ \
3465	if (((_osId) > _maxOsId) \|\| \
3466	(!KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId)))(__kmp_affinity_dispatch->index_mask_array((_osId2Mask), ( _osId)))->is_set((_osId)))) { \
3467	KMP_AFF_WARNING(affinity, AffIgnoreInvalidProcID, _osId)if (affinity.flags.verbose \|\| (affinity.flags.warnings && (affinity.type != affinity_none))) { __kmp_msg(kmp_ms_warning , __kmp_msg_format(kmp_i18n_msg_AffIgnoreInvalidProcID, _osId ), __kmp_msg_null); }; \
3468	} else { \
3469	ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))__kmp_affinity_dispatch->index_mask_array(_osId2Mask, (_osId ))); \
3470	} \
3471	}
3472
3473	// Re-parse the proclist (for the explicit affinity type), and form the list
3474	// of affinity newMasks indexed by gtid.
3475	static void __kmp_affinity_process_proclist(kmp_affinity_t &affinity) {
3476	int i;
3477	kmp_affin_mask_t **out_masks = &affinity.masks;
3478	unsigned *out_numMasks = &affinity.num_masks;
3479	const char *proclist = affinity.proclist;
3480	kmp_affin_mask_t *osId2Mask = affinity.os_id_masks;
3481	int maxOsId = affinity.num_os_id_masks - 1;
3482	const char *scan = proclist;
3483	const char *next = proclist;
3484
3485	// We use malloc() for the temporary mask vector, so that we can use
3486	// realloc() to extend it.
3487	numNewMasks = 2;
3488	KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks)(newMasks = __kmp_affinity_dispatch->allocate_mask_array(numNewMasks ));
3489	nextNewMask = 0;
3490	kmp_affin_mask_t *sumMask;
3491	KMP_CPU_ALLOC(sumMask)(sumMask = __kmp_affinity_dispatch->allocate_mask());
3492	int setSize = 0;
3493
3494	for (;;) {
3495	int start, end, stride;
3496
3497	SKIP_WS(scan){ while ((scan) == ' ' \|\| (scan) == '\t') (scan)++; };
3498	next = scan;
3499	if (*next == '\0') {
3500	break;
3501	}
3502
3503	if (*next == '{') {
3504	int num;
3505	setSize = 0;
3506	next++; // skip '{'
3507	SKIP_WS(next){ while ((next) == ' ' \|\| (next) == '\t') (next)++; };
3508	scan = next;
3509
3510	// Read the first integer in the set.
3511	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad proclist")if (!((next >= '0') && (next <= '9'))) { __kmp_debug_assert (("bad proclist"), "openmp/runtime/src/kmp_affinity.cpp", 3511 ); };
3512	SKIP_DIGITS(next){ while ((next) >= '0' && (next) <= '9') (next )++; };
3513	num = __kmp_str_to_int(scan, *next);
3514	KMP_ASSERT2(num >= 0, "bad explicit proc list")if (!(num >= 0)) { __kmp_debug_assert(("bad explicit proc list" ), "openmp/runtime/src/kmp_affinity.cpp", 3514); };
3515
3516	// Copy the mask for that osId to the sum (union) mask.
3517	if ((num > maxOsId) \|\|
3518	(!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num))(__kmp_affinity_dispatch->index_mask_array(osId2Mask, num) )->is_set(num))) {
3519	KMP_AFF_WARNING(affinity, AffIgnoreInvalidProcID, num)if (affinity.flags.verbose \|\| (affinity.flags.warnings && (affinity.type != affinity_none))) { __kmp_msg(kmp_ms_warning , __kmp_msg_format(kmp_i18n_msg_AffIgnoreInvalidProcID, num), __kmp_msg_null); };
3520	KMP_CPU_ZERO(sumMask)(sumMask)->zero();
3521	} else {
3522	KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num))(sumMask)->copy(__kmp_affinity_dispatch->index_mask_array (osId2Mask, num));
3523	setSize = 1;
3524	}
3525
3526	for (;;) {
3527	// Check for end of set.
3528	SKIP_WS(next){ while ((next) == ' ' \|\| (next) == '\t') (next)++; };
3529	if (*next == '}') {
3530	next++; // skip '}'
3531	break;
3532	}
3533
3534	// Skip optional comma.
3535	if (*next == ',') {
3536	next++;
3537	}
3538	SKIP_WS(next){ while ((next) == ' ' \|\| (next) == '\t') (next)++; };
3539
3540	// Read the next integer in the set.
3541	scan = next;
3542	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list")if (!((next >= '0') && (next <= '9'))) { __kmp_debug_assert (("bad explicit proc list"), "openmp/runtime/src/kmp_affinity.cpp" , 3542); };
3543
3544	SKIP_DIGITS(next){ while ((next) >= '0' && (next) <= '9') (next )++; };
3545	num = __kmp_str_to_int(scan, *next);
3546	KMP_ASSERT2(num >= 0, "bad explicit proc list")if (!(num >= 0)) { __kmp_debug_assert(("bad explicit proc list" ), "openmp/runtime/src/kmp_affinity.cpp", 3546); };
3547
3548	// Add the mask for that osId to the sum mask.
3549	if ((num > maxOsId) \|\|
3550	(!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num))(__kmp_affinity_dispatch->index_mask_array(osId2Mask, num) )->is_set(num))) {
3551	KMP_AFF_WARNING(affinity, AffIgnoreInvalidProcID, num)if (affinity.flags.verbose \|\| (affinity.flags.warnings && (affinity.type != affinity_none))) { __kmp_msg(kmp_ms_warning , __kmp_msg_format(kmp_i18n_msg_AffIgnoreInvalidProcID, num), __kmp_msg_null); };
3552	} else {
3553	KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num))(sumMask)->bitwise_or(__kmp_affinity_dispatch->index_mask_array (osId2Mask, num));
3554	setSize++;
3555	}
3556	}
3557	if (setSize > 0) {
3558	ADD_MASK(sumMask);
3559	}
3560
3561	SKIP_WS(next){ while ((next) == ' ' \|\| (next) == '\t') (next)++; };
3562	if (*next == ',') {
3563	next++;
3564	}
3565	scan = next;
3566	continue;
3567	}
3568
3569	// Read the first integer.
3570	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list")if (!((next >= '0') && (next <= '9'))) { __kmp_debug_assert (("bad explicit proc list"), "openmp/runtime/src/kmp_affinity.cpp" , 3570); };
3571	SKIP_DIGITS(next){ while ((next) >= '0' && (next) <= '9') (next )++; };
3572	start = __kmp_str_to_int(scan, *next);
3573	KMP_ASSERT2(start >= 0, "bad explicit proc list")if (!(start >= 0)) { __kmp_debug_assert(("bad explicit proc list" ), "openmp/runtime/src/kmp_affinity.cpp", 3573); };
3574	SKIP_WS(next){ while ((next) == ' ' \|\| (next) == '\t') (next)++; };
3575
3576	// If this isn't a range, then add a mask to the list and go on.
3577	if (*next != '-') {
3578	ADD_MASK_OSID(start, osId2Mask, maxOsId);
3579
3580	// Skip optional comma.
3581	if (*next == ',') {
3582	next++;
3583	}
3584	scan = next;
3585	continue;
3586	}
3587
3588	// This is a range. Skip over the '-' and read in the 2nd int.
3589	next++; // skip '-'
3590	SKIP_WS(next){ while ((next) == ' ' \|\| (next) == '\t') (next)++; };
3591	scan = next;
3592	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list")if (!((next >= '0') && (next <= '9'))) { __kmp_debug_assert (("bad explicit proc list"), "openmp/runtime/src/kmp_affinity.cpp" , 3592); };
3593	SKIP_DIGITS(next){ while ((next) >= '0' && (next) <= '9') (next )++; };
3594	end = __kmp_str_to_int(scan, *next);
3595	KMP_ASSERT2(end >= 0, "bad explicit proc list")if (!(end >= 0)) { __kmp_debug_assert(("bad explicit proc list" ), "openmp/runtime/src/kmp_affinity.cpp", 3595); };
3596
3597	// Check for a stride parameter
3598	stride = 1;
3599	SKIP_WS(next){ while ((next) == ' ' \|\| (next) == '\t') (next)++; };
3600	if (*next == ':') {
3601	// A stride is specified. Skip over the ':" and read the 3rd int.
3602	int sign = +1;
3603	next++; // skip ':'
3604	SKIP_WS(next){ while ((next) == ' ' \|\| (next) == '\t') (next)++; };
3605	scan = next;
3606	if (*next == '-') {
3607	sign = -1;
3608	next++;
3609	SKIP_WS(next){ while ((next) == ' ' \|\| (next) == '\t') (next)++; };
3610	scan = next;
3611	}
3612	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list")if (!((next >= '0') && (next <= '9'))) { __kmp_debug_assert (("bad explicit proc list"), "openmp/runtime/src/kmp_affinity.cpp" , 3612); };
3613	SKIP_DIGITS(next){ while ((next) >= '0' && (next) <= '9') (next )++; };
3614	stride = __kmp_str_to_int(scan, *next);
3615	KMP_ASSERT2(stride >= 0, "bad explicit proc list")if (!(stride >= 0)) { __kmp_debug_assert(("bad explicit proc list" ), "openmp/runtime/src/kmp_affinity.cpp", 3615); };
3616	stride *= sign;
3617	}
3618
3619	// Do some range checks.
3620	KMP_ASSERT2(stride != 0, "bad explicit proc list")if (!(stride != 0)) { __kmp_debug_assert(("bad explicit proc list" ), "openmp/runtime/src/kmp_affinity.cpp", 3620); };
3621	if (stride > 0) {
3622	KMP_ASSERT2(start <= end, "bad explicit proc list")if (!(start <= end)) { __kmp_debug_assert(("bad explicit proc list" ), "openmp/runtime/src/kmp_affinity.cpp", 3622); };
3623	} else {
3624	KMP_ASSERT2(start >= end, "bad explicit proc list")if (!(start >= end)) { __kmp_debug_assert(("bad explicit proc list" ), "openmp/runtime/src/kmp_affinity.cpp", 3624); };
3625	}
3626	KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list")if (!((end - start) / stride <= 65536)) { __kmp_debug_assert (("bad explicit proc list"), "openmp/runtime/src/kmp_affinity.cpp" , 3626); };
3627
3628	// Add the mask for each OS proc # to the list.
3629	if (stride > 0) {
3630	do {
3631	ADD_MASK_OSID(start, osId2Mask, maxOsId);
3632	start += stride;
3633	} while (start <= end);
3634	} else {
3635	do {
3636	ADD_MASK_OSID(start, osId2Mask, maxOsId);
3637	start += stride;
3638	} while (start >= end);
3639	}
3640
3641	// Skip optional comma.
3642	SKIP_WS(next){ while ((next) == ' ' \|\| (next) == '\t') (next)++; };
3643	if (*next == ',') {
3644	next++;
3645	}
3646	scan = next;
3647	}
3648
3649	*out_numMasks = nextNewMask;
3650	if (nextNewMask == 0) {
3651	*out_masks = NULL__null;
3652	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks)__kmp_affinity_dispatch->deallocate_mask_array(newMasks);
3653	return;
3654	}
3655	KMP_CPU_ALLOC_ARRAY((out_masks), nextNewMask)((out_masks) = __kmp_affinity_dispatch->allocate_mask_array (nextNewMask));
3656	for (i = 0; i < nextNewMask; i++) {
3657	kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i)__kmp_affinity_dispatch->index_mask_array(newMasks, i);
3658	kmp_affin_mask_t dest = KMP_CPU_INDEX((out_masks), i)__kmp_affinity_dispatch->index_mask_array((*out_masks), i);
3659	KMP_CPU_COPY(dest, src)(dest)->copy(src);
3660	}
3661	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks)__kmp_affinity_dispatch->deallocate_mask_array(newMasks);
3662	KMP_CPU_FREE(sumMask)__kmp_affinity_dispatch->deallocate_mask(sumMask);
3663	}
3664
3665	/*-----------------------------------------------------------------------------
3666	Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
3667	places. Again, Here is the grammar:
3668
3669	place_list := place
3670	place_list := place , place_list
3671	place := num
3672	place := place : num
3673	place := place : num : signed
3674	place := { subplacelist }
3675	place := ! place // (lowest priority)
3676	subplace_list := subplace
3677	subplace_list := subplace , subplace_list
3678	subplace := num
3679	subplace := num : num
3680	subplace := num : num : signed
3681	signed := num
3682	signed := + signed
3683	signed := - signed
3684	-----------------------------------------------------------------------------*/
3685	static void __kmp_process_subplace_list(const char **scan,
3686	kmp_affinity_t &affinity, int maxOsId,
3687	kmp_affin_mask_t *tempMask,
3688	int *setSize) {
3689	const char *next;
3690	kmp_affin_mask_t *osId2Mask = affinity.os_id_masks;
3691
3692	for (;;) {
3693	int start, count, stride, i;
3694
3695	// Read in the starting proc id
3696	SKIP_WS(scan){ while ((scan) == ' ' \|\| (scan) == '\t') (scan)++; };
3697	KMP_ASSERT2((scan >= '0') && (scan <= '9'), "bad explicit places list")if (!((scan >= '0') && (scan <= '9'))) { __kmp_debug_assert (("bad explicit places list"), "openmp/runtime/src/kmp_affinity.cpp" , 3697); };
3698	next = *scan;
3699	SKIP_DIGITS(next){ while ((next) >= '0' && (next) <= '9') (next )++; };
3700	start = __kmp_str_to_int(scan, next);
3701	KMP_ASSERT(start >= 0)if (!(start >= 0)) { __kmp_debug_assert("start >= 0", "openmp/runtime/src/kmp_affinity.cpp" , 3701); };
3702	*scan = next;
3703
3704	// valid follow sets are ',' ':' and '}'
3705	SKIP_WS(scan){ while ((scan) == ' ' \|\| (scan) == '\t') (scan)++; };
3706	if (scan == '}' \|\| scan == ',') {
3707	if ((start > maxOsId) \|\|
3708	(!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start))(__kmp_affinity_dispatch->index_mask_array(osId2Mask, start ))->is_set(start))) {
3709	KMP_AFF_WARNING(affinity, AffIgnoreInvalidProcID, start)if (affinity.flags.verbose \|\| (affinity.flags.warnings && (affinity.type != affinity_none))) { __kmp_msg(kmp_ms_warning , __kmp_msg_format(kmp_i18n_msg_AffIgnoreInvalidProcID, start ), __kmp_msg_null); };
3710	} else {
3711	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start))(tempMask)->bitwise_or(__kmp_affinity_dispatch->index_mask_array (osId2Mask, start));
3712	(*setSize)++;
3713	}
3714	if (**scan == '}') {
3715	break;
3716	}
3717	(*scan)++; // skip ','
3718	continue;
3719	}
3720	KMP_ASSERT2(scan == ':', "bad explicit places list")if (!(scan == ':')) { __kmp_debug_assert(("bad explicit places list" ), "openmp/runtime/src/kmp_affinity.cpp", 3720); };
3721	(*scan)++; // skip ':'
3722
3723	// Read count parameter
3724	SKIP_WS(scan){ while ((scan) == ' ' \|\| (scan) == '\t') (scan)++; };
3725	KMP_ASSERT2((scan >= '0') && (scan <= '9'), "bad explicit places list")if (!((scan >= '0') && (scan <= '9'))) { __kmp_debug_assert (("bad explicit places list"), "openmp/runtime/src/kmp_affinity.cpp" , 3725); };
3726	next = *scan;
3727	SKIP_DIGITS(next){ while ((next) >= '0' && (next) <= '9') (next )++; };
3728	count = __kmp_str_to_int(scan, next);
3729	KMP_ASSERT(count >= 0)if (!(count >= 0)) { __kmp_debug_assert("count >= 0", "openmp/runtime/src/kmp_affinity.cpp" , 3729); };
3730	*scan = next;
3731
3732	// valid follow sets are ',' ':' and '}'
3733	SKIP_WS(scan){ while ((scan) == ' ' \|\| (scan) == '\t') (scan)++; };
3734	if (scan == '}' \|\| scan == ',') {
3735	for (i = 0; i < count; i++) {
3736	if ((start > maxOsId) \|\|
3737	(!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start))(__kmp_affinity_dispatch->index_mask_array(osId2Mask, start ))->is_set(start))) {
3738	KMP_AFF_WARNING(affinity, AffIgnoreInvalidProcID, start)if (affinity.flags.verbose \|\| (affinity.flags.warnings && (affinity.type != affinity_none))) { __kmp_msg(kmp_ms_warning , __kmp_msg_format(kmp_i18n_msg_AffIgnoreInvalidProcID, start ), __kmp_msg_null); };
3739	break; // don't proliferate warnings for large count
3740	} else {
3741	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start))(tempMask)->bitwise_or(__kmp_affinity_dispatch->index_mask_array (osId2Mask, start));
3742	start++;
3743	(*setSize)++;
3744	}
3745	}
3746	if (**scan == '}') {
3747	break;
3748	}
3749	(*scan)++; // skip ','
3750	continue;
3751	}
3752	KMP_ASSERT2(scan == ':', "bad explicit places list")if (!(scan == ':')) { __kmp_debug_assert(("bad explicit places list" ), "openmp/runtime/src/kmp_affinity.cpp", 3752); };
3753	(*scan)++; // skip ':'
3754
3755	// Read stride parameter
3756	int sign = +1;
3757	for (;;) {
3758	SKIP_WS(scan){ while ((scan) == ' ' \|\| (scan) == '\t') (scan)++; };
3759	if (**scan == '+') {
3760	(*scan)++; // skip '+'
3761	continue;
3762	}
3763	if (**scan == '-') {
3764	sign *= -1;
3765	(*scan)++; // skip '-'
3766	continue;
3767	}
3768	break;
3769	}
3770	SKIP_WS(scan){ while ((scan) == ' ' \|\| (scan) == '\t') (scan)++; };
3771	KMP_ASSERT2((scan >= '0') && (scan <= '9'), "bad explicit places list")if (!((scan >= '0') && (scan <= '9'))) { __kmp_debug_assert (("bad explicit places list"), "openmp/runtime/src/kmp_affinity.cpp" , 3771); };
3772	next = *scan;
3773	SKIP_DIGITS(next){ while ((next) >= '0' && (next) <= '9') (next )++; };
3774	stride = __kmp_str_to_int(scan, next);
3775	KMP_ASSERT(stride >= 0)if (!(stride >= 0)) { __kmp_debug_assert("stride >= 0", "openmp/runtime/src/kmp_affinity.cpp", 3775); };
3776	*scan = next;
3777	stride *= sign;
3778
3779	// valid follow sets are ',' and '}'
3780	SKIP_WS(scan){ while ((scan) == ' ' \|\| (scan) == '\t') (scan)++; };
3781	if (scan == '}' \|\| scan == ',') {
3782	for (i = 0; i < count; i++) {
3783	if ((start > maxOsId) \|\|
3784	(!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start))(__kmp_affinity_dispatch->index_mask_array(osId2Mask, start ))->is_set(start))) {
3785	KMP_AFF_WARNING(affinity, AffIgnoreInvalidProcID, start)if (affinity.flags.verbose \|\| (affinity.flags.warnings && (affinity.type != affinity_none))) { __kmp_msg(kmp_ms_warning , __kmp_msg_format(kmp_i18n_msg_AffIgnoreInvalidProcID, start ), __kmp_msg_null); };
3786	break; // don't proliferate warnings for large count
3787	} else {
3788	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start))(tempMask)->bitwise_or(__kmp_affinity_dispatch->index_mask_array (osId2Mask, start));
3789	start += stride;
3790	(*setSize)++;
3791	}
3792	}
3793	if (**scan == '}') {
3794	break;
3795	}
3796	(*scan)++; // skip ','
3797	continue;
3798	}
3799
3800	KMP_ASSERT2(0, "bad explicit places list")if (!(0)) { __kmp_debug_assert(("bad explicit places list"), "openmp/runtime/src/kmp_affinity.cpp" , 3800); };
3801	}
3802	}
3803
3804	static void __kmp_process_place(const char **scan, kmp_affinity_t &affinity,
3805	int maxOsId, kmp_affin_mask_t *tempMask,
3806	int *setSize) {
3807	const char *next;
3808	kmp_affin_mask_t *osId2Mask = affinity.os_id_masks;
3809
3810	// valid follow sets are '{' '!' and num
3811	SKIP_WS(scan){ while ((scan) == ' ' \|\| (scan) == '\t') (scan)++; };
3812	if (**scan == '{') {
3813	(*scan)++; // skip '{'
3814	__kmp_process_subplace_list(scan, affinity, maxOsId, tempMask, setSize);
3815	KMP_ASSERT2(scan == '}', "bad explicit places list")if (!(scan == '}')) { __kmp_debug_assert(("bad explicit places list" ), "openmp/runtime/src/kmp_affinity.cpp", 3815); };
3816	(*scan)++; // skip '}'
3817	} else if (**scan == '!') {
3818	(*scan)++; // skip '!'
3819	__kmp_process_place(scan, affinity, maxOsId, tempMask, setSize);
3820	KMP_CPU_COMPLEMENT(maxOsId, tempMask)(tempMask)->bitwise_not();
3821	} else if ((scan >= '0') && (scan <= '9')) {
3822	next = *scan;
3823	SKIP_DIGITS(next){ while ((next) >= '0' && (next) <= '9') (next )++; };
3824	int num = __kmp_str_to_int(scan, next);
3825	KMP_ASSERT(num >= 0)if (!(num >= 0)) { __kmp_debug_assert("num >= 0", "openmp/runtime/src/kmp_affinity.cpp" , 3825); };
3826	if ((num > maxOsId) \|\|
3827	(!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num))(__kmp_affinity_dispatch->index_mask_array(osId2Mask, num) )->is_set(num))) {
3828	KMP_AFF_WARNING(affinity, AffIgnoreInvalidProcID, num)if (affinity.flags.verbose \|\| (affinity.flags.warnings && (affinity.type != affinity_none))) { __kmp_msg(kmp_ms_warning , __kmp_msg_format(kmp_i18n_msg_AffIgnoreInvalidProcID, num), __kmp_msg_null); };
3829	} else {
3830	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num))(tempMask)->bitwise_or(__kmp_affinity_dispatch->index_mask_array (osId2Mask, num));
3831	(*setSize)++;
3832	}
3833	*scan = next; // skip num
3834	} else {
3835	KMP_ASSERT2(0, "bad explicit places list")if (!(0)) { __kmp_debug_assert(("bad explicit places list"), "openmp/runtime/src/kmp_affinity.cpp" , 3835); };
3836	}
3837	}
3838
3839	// static void
3840	void __kmp_affinity_process_placelist(kmp_affinity_t &affinity) {
3841	int i, j, count, stride, sign;
3842	kmp_affin_mask_t **out_masks = &affinity.masks;
3843	unsigned *out_numMasks = &affinity.num_masks;
3844	const char *placelist = affinity.proclist;
3845	kmp_affin_mask_t *osId2Mask = affinity.os_id_masks;
3846	int maxOsId = affinity.num_os_id_masks - 1;
3847	const char *scan = placelist;
3848	const char *next = placelist;
3849
3850	numNewMasks = 2;
3851	KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks)(newMasks = __kmp_affinity_dispatch->allocate_mask_array(numNewMasks ));
3852	nextNewMask = 0;
3853
3854	// tempMask is modified based on the previous or initial
3855	// place to form the current place
3856	// previousMask contains the previous place
3857	kmp_affin_mask_t *tempMask;
3858	kmp_affin_mask_t *previousMask;
3859	KMP_CPU_ALLOC(tempMask)(tempMask = __kmp_affinity_dispatch->allocate_mask());
3860	KMP_CPU_ZERO(tempMask)(tempMask)->zero();
3861	KMP_CPU_ALLOC(previousMask)(previousMask = __kmp_affinity_dispatch->allocate_mask());
3862	KMP_CPU_ZERO(previousMask)(previousMask)->zero();
3863	int setSize = 0;
3864
3865	for (;;) {
3866	__kmp_process_place(&scan, affinity, maxOsId, tempMask, &setSize);
3867
3868	// valid follow sets are ',' ':' and EOL
3869	SKIP_WS(scan){ while ((scan) == ' ' \|\| (scan) == '\t') (scan)++; };
3870	if (scan == '\0' \|\| scan == ',') {
3871	if (setSize > 0) {
3872	ADD_MASK(tempMask);
3873	}
3874	KMP_CPU_ZERO(tempMask)(tempMask)->zero();
3875	setSize = 0;
3876	if (*scan == '\0') {
3877	break;
3878	}
3879	scan++; // skip ','
3880	continue;
3881	}
3882
3883	KMP_ASSERT2(scan == ':', "bad explicit places list")if (!(scan == ':')) { __kmp_debug_assert(("bad explicit places list" ), "openmp/runtime/src/kmp_affinity.cpp", 3883); };
3884	scan++; // skip ':'
3885
3886	// Read count parameter
3887	SKIP_WS(scan){ while ((scan) == ' ' \|\| (scan) == '\t') (scan)++; };
3888	KMP_ASSERT2((scan >= '0') && (scan <= '9'), "bad explicit places list")if (!((scan >= '0') && (scan <= '9'))) { __kmp_debug_assert (("bad explicit places list"), "openmp/runtime/src/kmp_affinity.cpp" , 3888); };
3889	next = scan;
3890	SKIP_DIGITS(next){ while ((next) >= '0' && (next) <= '9') (next )++; };
3891	count = __kmp_str_to_int(scan, *next);
3892	KMP_ASSERT(count >= 0)if (!(count >= 0)) { __kmp_debug_assert("count >= 0", "openmp/runtime/src/kmp_affinity.cpp" , 3892); };
3893	scan = next;
3894
3895	// valid follow sets are ',' ':' and EOL
3896	SKIP_WS(scan){ while ((scan) == ' ' \|\| (scan) == '\t') (scan)++; };
3897	if (scan == '\0' \|\| scan == ',') {
3898	stride = +1;
3899	} else {
3900	KMP_ASSERT2(scan == ':', "bad explicit places list")if (!(scan == ':')) { __kmp_debug_assert(("bad explicit places list" ), "openmp/runtime/src/kmp_affinity.cpp", 3900); };
3901	scan++; // skip ':'
3902
3903	// Read stride parameter
3904	sign = +1;
3905	for (;;) {
3906	SKIP_WS(scan){ while ((scan) == ' ' \|\| (scan) == '\t') (scan)++; };
3907	if (*scan == '+') {
3908	scan++; // skip '+'
3909	continue;
3910	}
3911	if (*scan == '-') {
3912	sign *= -1;
3913	scan++; // skip '-'
3914	continue;
3915	}
3916	break;
3917	}
3918	SKIP_WS(scan){ while ((scan) == ' ' \|\| (scan) == '\t') (scan)++; };
3919	KMP_ASSERT2((scan >= '0') && (scan <= '9'), "bad explicit places list")if (!((scan >= '0') && (scan <= '9'))) { __kmp_debug_assert (("bad explicit places list"), "openmp/runtime/src/kmp_affinity.cpp" , 3919); };
3920	next = scan;
3921	SKIP_DIGITS(next){ while ((next) >= '0' && (next) <= '9') (next )++; };
3922	stride = __kmp_str_to_int(scan, *next);
3923	KMP_DEBUG_ASSERT(stride >= 0)if (!(stride >= 0)) { __kmp_debug_assert("stride >= 0", "openmp/runtime/src/kmp_affinity.cpp", 3923); };
3924	scan = next;
3925	stride *= sign;
3926	}
3927
3928	// Add places determined by initial_place : count : stride
3929	for (i = 0; i < count; i++) {
3930	if (setSize == 0) {
3931	break;
3932	}
3933	// Add the current place, then build the next place (tempMask) from that
3934	KMP_CPU_COPY(previousMask, tempMask)(previousMask)->copy(tempMask);
3935	ADD_MASK(previousMask);
3936	KMP_CPU_ZERO(tempMask)(tempMask)->zero();
3937	setSize = 0;
3938	KMP_CPU_SET_ITERATE(j, previousMask)for (j = (previousMask)->begin(); (int)j != (previousMask) ->end(); j = (previousMask)->next(j)) {
3939	if (!KMP_CPU_ISSET(j, previousMask)(previousMask)->is_set(j)) {
3940	continue;
3941	}
3942	if ((j + stride > maxOsId) \|\| (j + stride < 0) \|\|
3943	(!KMP_CPU_ISSET(j, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(j)) \|\|
3944	(!KMP_CPU_ISSET(j + stride,(__kmp_affinity_dispatch->index_mask_array(osId2Mask, j + stride ))->is_set(j + stride)
3945	KMP_CPU_INDEX(osId2Mask, j + stride))(__kmp_affinity_dispatch->index_mask_array(osId2Mask, j + stride ))->is_set(j + stride))) {
3946	if (i < count - 1) {
3947	KMP_AFF_WARNING(affinity, AffIgnoreInvalidProcID, j + stride)if (affinity.flags.verbose \|\| (affinity.flags.warnings && (affinity.type != affinity_none))) { __kmp_msg(kmp_ms_warning , __kmp_msg_format(kmp_i18n_msg_AffIgnoreInvalidProcID, j + stride ), __kmp_msg_null); };
3948	}
3949	continue;
3950	}
3951	KMP_CPU_SET(j + stride, tempMask)(tempMask)->set(j + stride);
3952	setSize++;
3953	}
3954	}
3955	KMP_CPU_ZERO(tempMask)(tempMask)->zero();
3956	setSize = 0;
3957
3958	// valid follow sets are ',' and EOL
3959	SKIP_WS(scan){ while ((scan) == ' ' \|\| (scan) == '\t') (scan)++; };
3960	if (*scan == '\0') {
3961	break;
3962	}
3963	if (*scan == ',') {
3964	scan++; // skip ','
3965	continue;
3966	}
3967
3968	KMP_ASSERT2(0, "bad explicit places list")if (!(0)) { __kmp_debug_assert(("bad explicit places list"), "openmp/runtime/src/kmp_affinity.cpp" , 3968); };
3969	}
3970
3971	*out_numMasks = nextNewMask;
3972	if (nextNewMask == 0) {
3973	*out_masks = NULL__null;
3974	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks)__kmp_affinity_dispatch->deallocate_mask_array(newMasks);
3975	return;
3976	}
3977	KMP_CPU_ALLOC_ARRAY((out_masks), nextNewMask)((out_masks) = __kmp_affinity_dispatch->allocate_mask_array (nextNewMask));
3978	KMP_CPU_FREE(tempMask)__kmp_affinity_dispatch->deallocate_mask(tempMask);
3979	KMP_CPU_FREE(previousMask)__kmp_affinity_dispatch->deallocate_mask(previousMask);
3980	for (i = 0; i < nextNewMask; i++) {
3981	kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i)__kmp_affinity_dispatch->index_mask_array(newMasks, i);
3982	kmp_affin_mask_t dest = KMP_CPU_INDEX((out_masks), i)__kmp_affinity_dispatch->index_mask_array((*out_masks), i);
3983	KMP_CPU_COPY(dest, src)(dest)->copy(src);
3984	}
3985	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks)__kmp_affinity_dispatch->deallocate_mask_array(newMasks);
3986	}
3987
3988	#undef ADD_MASK
3989	#undef ADD_MASK_OSID
3990
3991	// This function figures out the deepest level at which there is at least one
3992	// cluster/core with more than one processing unit bound to it.
3993	static int __kmp_affinity_find_core_level(int nprocs, int bottom_level) {
3994	int core_level = 0;
3995
3996	for (int i = 0; i < nprocs; i++) {
3997	const kmp_hw_thread_t &hw_thread = __kmp_topology->at(i);
3998	for (int j = bottom_level; j > 0; j--) {
3999	if (hw_thread.ids[j] > 0) {
4000	if (core_level < (j - 1)) {
4001	core_level = j - 1;
4002	}
4003	}
4004	}
4005	}
4006	return core_level;
4007	}
4008
4009	// This function counts number of clusters/cores at given level.
4010	static int __kmp_affinity_compute_ncores(int nprocs, int bottom_level,
4011	int core_level) {
4012	return __kmp_topology->get_count(core_level);
4013	}
4014	// This function finds to which cluster/core given processing unit is bound.
4015	static int __kmp_affinity_find_core(int proc, int bottom_level,
4016	int core_level) {
4017	int core = 0;
4018	KMP_DEBUG_ASSERT(proc >= 0 && proc < __kmp_topology->get_num_hw_threads())if (!(proc >= 0 && proc < __kmp_topology->get_num_hw_threads ())) { __kmp_debug_assert("proc >= 0 && proc < __kmp_topology->get_num_hw_threads()" , "openmp/runtime/src/kmp_affinity.cpp", 4018); };
4019	for (int i = 0; i <= proc; ++i) {
4020	if (i + 1 <= proc) {
4021	for (int j = 0; j <= core_level; ++j) {
4022	if (__kmp_topology->at(i + 1).sub_ids[j] !=
4023	__kmp_topology->at(i).sub_ids[j]) {
4024	core++;
4025	break;
4026	}
4027	}
4028	}
4029	}
4030	return core;
4031	}
4032
4033	// This function finds maximal number of processing units bound to a
4034	// cluster/core at given level.
4035	static int __kmp_affinity_max_proc_per_core(int nprocs, int bottom_level,
4036	int core_level) {
4037	if (core_level >= bottom_level)
4038	return 1;
4039	int thread_level = __kmp_topology->get_level(KMP_HW_THREAD);
4040	return __kmp_topology->calculate_ratio(thread_level, core_level);
4041	}
4042
4043	static int *procarr = NULL__null;
4044	static int __kmp_aff_depth = 0;
4045
4046	// Create a one element mask array (set of places) which only contains the
4047	// initial process's affinity mask
4048	static void __kmp_create_affinity_none_places(kmp_affinity_t &affinity) {
4049	KMP_ASSERT(__kmp_affin_fullMask != NULL)if (!(__kmp_affin_fullMask != __null)) { __kmp_debug_assert("__kmp_affin_fullMask != NULL" , "openmp/runtime/src/kmp_affinity.cpp", 4049); };
4050	KMP_ASSERT(affinity.type == affinity_none)if (!(affinity.type == affinity_none)) { __kmp_debug_assert("affinity.type == affinity_none" , "openmp/runtime/src/kmp_affinity.cpp", 4050); };
4051	affinity.num_masks = 1;
4052	KMP_CPU_ALLOC_ARRAY(affinity.masks, affinity.num_masks)(affinity.masks = __kmp_affinity_dispatch->allocate_mask_array (affinity.num_masks));
4053	kmp_affin_mask_t *dest = KMP_CPU_INDEX(affinity.masks, 0)__kmp_affinity_dispatch->index_mask_array(affinity.masks, 0 );
4054	KMP_CPU_COPY(dest, __kmp_affin_fullMask)(dest)->copy(__kmp_affin_fullMask);
4055	}
4056
4057	static void __kmp_aux_affinity_initialize_masks(kmp_affinity_t &affinity) {
4058	// Create the "full" mask - this defines all of the processors that we
4059	// consider to be in the machine model. If respect is set, then it is the
4060	// initialization thread's affinity mask. Otherwise, it is all processors that
4061	// we know about on the machine.
4062	int verbose = affinity.flags.verbose;
4063	const char *env_var = affinity.env_var;
4064
4065	// Already initialized
4066	if (__kmp_affin_fullMask && __kmp_affin_origMask)
4067	return;
4068
4069	if (__kmp_affin_fullMask == NULL__null) {
4070	KMP_CPU_ALLOC(__kmp_affin_fullMask)(__kmp_affin_fullMask = __kmp_affinity_dispatch->allocate_mask ());
4071	}
4072	if (__kmp_affin_origMask == NULL__null) {
4073	KMP_CPU_ALLOC(__kmp_affin_origMask)(__kmp_affin_origMask = __kmp_affinity_dispatch->allocate_mask ());
4074	}
4075	if (KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {
4076	__kmp_get_system_affinity(__kmp_affin_fullMask, TRUE)(__kmp_affin_fullMask)->get_system_affinity((!0));
4077	// Make a copy before possible expanding to the entire machine mask
4078	__kmp_affin_origMask->copy(__kmp_affin_fullMask);
4079	if (affinity.flags.respect) {
4080	// Count the number of available processors.
4081	unsigned i;
4082	__kmp_avail_proc = 0;
4083	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask)for (i = (__kmp_affin_fullMask)->begin(); (int)i != (__kmp_affin_fullMask )->end(); i = (__kmp_affin_fullMask)->next(i)) {
4084	if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(i)) {
4085	continue;
4086	}
4087	__kmp_avail_proc++;
4088	}
4089	if (__kmp_avail_proc > __kmp_xproc) {
4090	KMP_AFF_WARNING(affinity, ErrorInitializeAffinity)if (affinity.flags.verbose \|\| (affinity.flags.warnings && (affinity.type != affinity_none))) { __kmp_msg(kmp_ms_warning , __kmp_msg_format(kmp_i18n_msg_ErrorInitializeAffinity), __kmp_msg_null ); };
4091	affinity.type = affinity_none;
4092	KMP_AFFINITY_DISABLE()(__kmp_affin_mask_size = 0);
4093	return;
4094	}
4095
4096	if (verbose) {
4097	char buf[KMP_AFFIN_MASK_PRINT_LEN1024];
4098	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN1024,
4099	__kmp_affin_fullMask);
4100	KMP_INFORM(InitOSProcSetRespect, env_var, buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetRespect , env_var, buf), __kmp_msg_null);
4101	}
4102	} else {
4103	if (verbose) {
4104	char buf[KMP_AFFIN_MASK_PRINT_LEN1024];
4105	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN1024,
4106	__kmp_affin_fullMask);
4107	KMP_INFORM(InitOSProcSetNotRespect, env_var, buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetNotRespect , env_var, buf), __kmp_msg_null);
4108	}
4109	__kmp_avail_proc =
4110	__kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
4111	#if KMP_OS_WINDOWS0
4112	if (__kmp_num_proc_groups <= 1) {
4113	// Copy expanded full mask if topology has single processor group
4114	__kmp_affin_origMask->copy(__kmp_affin_fullMask);
4115	}
4116	// Set the process affinity mask since threads' affinity
4117	// masks must be subset of process mask in Windows* OS
4118	__kmp_affin_fullMask->set_process_affinity(true);
4119	#endif
4120	}
4121	}
4122	}
4123
4124	static bool __kmp_aux_affinity_initialize_topology(kmp_affinity_t &affinity) {
4125	bool success = false;
4126	const char *env_var = affinity.env_var;
4127	kmp_i18n_id_t msg_id = kmp_i18n_null;
4128	int verbose = affinity.flags.verbose;
4129
4130	// For backward compatibility, setting KMP_CPUINFO_FILE =>
4131	// KMP_TOPOLOGY_METHOD=cpuinfo
4132	if ((__kmp_cpuinfo_file != NULL__null) &&
4133	(__kmp_affinity_top_method == affinity_top_method_all)) {
4134	__kmp_affinity_top_method = affinity_top_method_cpuinfo;
4135	}
4136
4137	if (__kmp_affinity_top_method == affinity_top_method_all) {
4138	// In the default code path, errors are not fatal - we just try using
4139	// another method. We only emit a warning message if affinity is on, or the
4140	// verbose flag is set, an the nowarnings flag was not set.
4141	#if KMP_USE_HWLOC0
4142	if (!success &&
4143	__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
4144	if (!__kmp_hwloc_error) {
4145	success = __kmp_affinity_create_hwloc_map(&msg_id);
4146	if (!success && verbose) {
4147	KMP_INFORM(AffIgnoringHwloc, env_var)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffIgnoringHwloc , env_var), __kmp_msg_null);
4148	}
4149	} else if (verbose) {
4150	KMP_INFORM(AffIgnoringHwloc, env_var)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffIgnoringHwloc , env_var), __kmp_msg_null);
4151	}
4152	}
4153	#endif
4154
4155	#if KMP_ARCH_X860 \|\| KMP_ARCH_X86_641
4156	if (!success) {
4157	success = __kmp_affinity_create_x2apicid_map(&msg_id);
4158	if (!success && verbose && msg_id != kmp_i18n_null) {
4159	KMP_INFORM(AffInfoStr, env_var, __kmp_i18n_catgets(msg_id))__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffInfoStr , env_var, __kmp_i18n_catgets(msg_id)), __kmp_msg_null);
4160	}
4161	}
4162	if (!success) {
4163	success = __kmp_affinity_create_apicid_map(&msg_id);
4164	if (!success && verbose && msg_id != kmp_i18n_null) {
4165	KMP_INFORM(AffInfoStr, env_var, __kmp_i18n_catgets(msg_id))__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffInfoStr , env_var, __kmp_i18n_catgets(msg_id)), __kmp_msg_null);
4166	}
4167	}
4168	#endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
4169
4170	#if KMP_OS_LINUX1
4171	if (!success) {
4172	int line = 0;
4173	success = __kmp_affinity_create_cpuinfo_map(&line, &msg_id);
4174	if (!success && verbose && msg_id != kmp_i18n_null) {
4175	KMP_INFORM(AffInfoStr, env_var, __kmp_i18n_catgets(msg_id))__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffInfoStr , env_var, __kmp_i18n_catgets(msg_id)), __kmp_msg_null);
4176	}
4177	}
4178	#endif /* KMP_OS_LINUX */
4179
4180	#if KMP_GROUP_AFFINITY0
4181	if (!success && (__kmp_num_proc_groups > 1)) {
4182	success = __kmp_affinity_create_proc_group_map(&msg_id);
4183	if (!success && verbose && msg_id != kmp_i18n_null) {
4184	KMP_INFORM(AffInfoStr, env_var, __kmp_i18n_catgets(msg_id))__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffInfoStr , env_var, __kmp_i18n_catgets(msg_id)), __kmp_msg_null);
4185	}
4186	}
4187	#endif /* KMP_GROUP_AFFINITY */
4188
4189	if (!success) {
4190	success = __kmp_affinity_create_flat_map(&msg_id);
4191	if (!success && verbose && msg_id != kmp_i18n_null) {
4192	KMP_INFORM(AffInfoStr, env_var, __kmp_i18n_catgets(msg_id))__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffInfoStr , env_var, __kmp_i18n_catgets(msg_id)), __kmp_msg_null);
4193	}
4194	KMP_ASSERT(success)if (!(success)) { __kmp_debug_assert("success", "openmp/runtime/src/kmp_affinity.cpp" , 4194); };
4195	}
4196	}
4197
4198	// If the user has specified that a paricular topology discovery method is to be
4199	// used, then we abort if that method fails. The exception is group affinity,
4200	// which might have been implicitly set.
4201	#if KMP_USE_HWLOC0
4202	else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
4203	KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC)if (!(__kmp_affinity_dispatch->get_api_type() == KMPAffinity ::HWLOC)) { __kmp_debug_assert("__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC" , "openmp/runtime/src/kmp_affinity.cpp", 4203); };
4204	success = __kmp_affinity_create_hwloc_map(&msg_id);
4205	if (!success) {
4206	KMP_ASSERT(msg_id != kmp_i18n_null)if (!(msg_id != kmp_i18n_null)) { __kmp_debug_assert("msg_id != kmp_i18n_null" , "openmp/runtime/src/kmp_affinity.cpp", 4206); };
4207	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id))__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_MsgExiting, __kmp_i18n_catgets (msg_id)), __kmp_msg_null);
4208	}
4209	}
4210	#endif // KMP_USE_HWLOC
4211
4212	#if KMP_ARCH_X860 \|\| KMP_ARCH_X86_641
4213	else if (__kmp_affinity_top_method == affinity_top_method_x2apicid \|\|
4214	__kmp_affinity_top_method == affinity_top_method_x2apicid_1f) {
4215	success = __kmp_affinity_create_x2apicid_map(&msg_id);
4216	if (!success) {
4217	KMP_ASSERT(msg_id != kmp_i18n_null)if (!(msg_id != kmp_i18n_null)) { __kmp_debug_assert("msg_id != kmp_i18n_null" , "openmp/runtime/src/kmp_affinity.cpp", 4217); };
4218	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id))__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_MsgExiting, __kmp_i18n_catgets (msg_id)), __kmp_msg_null);
4219	}
4220	} else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
4221	success = __kmp_affinity_create_apicid_map(&msg_id);
4222	if (!success) {
4223	KMP_ASSERT(msg_id != kmp_i18n_null)if (!(msg_id != kmp_i18n_null)) { __kmp_debug_assert("msg_id != kmp_i18n_null" , "openmp/runtime/src/kmp_affinity.cpp", 4223); };
4224	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id))__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_MsgExiting, __kmp_i18n_catgets (msg_id)), __kmp_msg_null);
4225	}
4226	}
4227	#endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
4228
4229	else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
4230	int line = 0;
4231	success = __kmp_affinity_create_cpuinfo_map(&line, &msg_id);
4232	if (!success) {
4233	KMP_ASSERT(msg_id != kmp_i18n_null)if (!(msg_id != kmp_i18n_null)) { __kmp_debug_assert("msg_id != kmp_i18n_null" , "openmp/runtime/src/kmp_affinity.cpp", 4233); };
4234	const char *filename = __kmp_cpuinfo_get_filename();
4235	if (line > 0) {
4236	KMP_FATAL(FileLineMsgExiting, filename, line,__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id)), __kmp_msg_null)
4237	__kmp_i18n_catgets(msg_id))__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id)), __kmp_msg_null);
4238	} else {
4239	KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id))__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_FileMsgExiting, filename , __kmp_i18n_catgets(msg_id)), __kmp_msg_null);
4240	}
4241	}
4242	}
4243
4244	#if KMP_GROUP_AFFINITY0
4245	else if (__kmp_affinity_top_method == affinity_top_method_group) {
4246	success = __kmp_affinity_create_proc_group_map(&msg_id);
4247	KMP_ASSERT(success)if (!(success)) { __kmp_debug_assert("success", "openmp/runtime/src/kmp_affinity.cpp" , 4247); };
4248	if (!success) {
4249	KMP_ASSERT(msg_id != kmp_i18n_null)if (!(msg_id != kmp_i18n_null)) { __kmp_debug_assert("msg_id != kmp_i18n_null" , "openmp/runtime/src/kmp_affinity.cpp", 4249); };
4250	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id))__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_MsgExiting, __kmp_i18n_catgets (msg_id)), __kmp_msg_null);
4251	}
4252	}
4253	#endif /* KMP_GROUP_AFFINITY */
4254
4255	else if (__kmp_affinity_top_method == affinity_top_method_flat) {
4256	success = __kmp_affinity_create_flat_map(&msg_id);
4257	// should not fail
4258	KMP_ASSERT(success)if (!(success)) { __kmp_debug_assert("success", "openmp/runtime/src/kmp_affinity.cpp" , 4258); };
4259	}
4260
4261	// Early exit if topology could not be created
4262	if (!__kmp_topology) {
4263	if (KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {
4264	KMP_AFF_WARNING(affinity, ErrorInitializeAffinity)if (affinity.flags.verbose \|\| (affinity.flags.warnings && (affinity.type != affinity_none))) { __kmp_msg(kmp_ms_warning , __kmp_msg_format(kmp_i18n_msg_ErrorInitializeAffinity), __kmp_msg_null ); };
4265	}
4266	if (nPackages > 0 && nCoresPerPkg > 0 && __kmp_nThreadsPerCore > 0 &&
4267	__kmp_ncores > 0) {
4268	__kmp_topology = kmp_topology_t::allocate(0, 0, NULL__null);
4269	__kmp_topology->canonicalize(nPackages, nCoresPerPkg,
4270	__kmp_nThreadsPerCore, __kmp_ncores);
4271	if (verbose) {
4272	__kmp_topology->print(env_var);
4273	}
4274	}
4275	return false;
4276	}
4277
4278	// Canonicalize, print (if requested), apply KMP_HW_SUBSET
4279	__kmp_topology->canonicalize();
4280	if (verbose)
4281	__kmp_topology->print(env_var);
4282	bool filtered = __kmp_topology->filter_hw_subset();
4283	if (filtered) {
4284	#if KMP_OS_WINDOWS0
4285	// Copy filtered full mask if topology has single processor group
4286	if (__kmp_num_proc_groups <= 1)
4287	#endif
4288	__kmp_affin_origMask->copy(__kmp_affin_fullMask);
4289	}
4290	if (filtered && verbose)
4291	__kmp_topology->print("KMP_HW_SUBSET");
4292	return success;
4293	}
4294
4295	static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
4296	bool is_regular_affinity = (&affinity == &__kmp_affinity);
4297	bool is_hidden_helper_affinity = (&affinity == &__kmp_hh_affinity);
4298	const char *env_var = affinity.env_var;
4299
4300	if (affinity.flags.initialized) {
4301	KMP_ASSERT(__kmp_affin_fullMask != NULL)if (!(__kmp_affin_fullMask != __null)) { __kmp_debug_assert("__kmp_affin_fullMask != NULL" , "openmp/runtime/src/kmp_affinity.cpp", 4301); };
4302	return;
4303	}
4304
4305	if (is_regular_affinity && (!__kmp_affin_fullMask \|\| !__kmp_affin_origMask))
4306	__kmp_aux_affinity_initialize_masks(affinity);
4307
4308	if (is_regular_affinity && !__kmp_topology) {
4309	bool success = __kmp_aux_affinity_initialize_topology(affinity);
4310	if (success) {
4311	// Initialize other data structures which depend on the topology
4312	machine_hierarchy.init(__kmp_topology->get_num_hw_threads());
4313	KMP_ASSERT(__kmp_avail_proc == __kmp_topology->get_num_hw_threads())if (!(__kmp_avail_proc == __kmp_topology->get_num_hw_threads ())) { __kmp_debug_assert("__kmp_avail_proc == __kmp_topology->get_num_hw_threads()" , "openmp/runtime/src/kmp_affinity.cpp", 4313); };
4314	} else {
4315	affinity.type = affinity_none;
4316	KMP_AFFINITY_DISABLE()(__kmp_affin_mask_size = 0);
4317	}
4318	}
4319
4320	// If KMP_AFFINITY=none, then only create the single "none" place
4321	// which is the process's initial affinity mask or the number of
4322	// hardware threads depending on respect,norespect
4323	if (affinity.type == affinity_none) {
4324	__kmp_create_affinity_none_places(affinity);
4325	#if KMP_USE_HIER_SCHED0
4326	__kmp_dispatch_set_hierarchy_values();
4327	#endif
4328	affinity.flags.initialized = TRUE(!0);
4329	return;
4330	}
4331
4332	__kmp_topology->set_granularity(affinity);
4333	int depth = __kmp_topology->get_depth();
4334
4335	// Create the table of masks, indexed by thread Id.
4336	unsigned numUnique;
4337	__kmp_create_os_id_masks(&numUnique, affinity);
4338	if (affinity.gran_levels == 0) {
4339	KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc)if (!((int)numUnique == __kmp_avail_proc)) { __kmp_debug_assert ("(int)numUnique == __kmp_avail_proc", "openmp/runtime/src/kmp_affinity.cpp" , 4339); };
4340	}
4341
4342	switch (affinity.type) {
4343
4344	case affinity_explicit:
4345	KMP_DEBUG_ASSERT(affinity.proclist != NULL)if (!(affinity.proclist != __null)) { __kmp_debug_assert("affinity.proclist != __null" , "openmp/runtime/src/kmp_affinity.cpp", 4345); };
4346	if (is_hidden_helper_affinity \|\|
4347	__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) {
4348	__kmp_affinity_process_proclist(affinity);
4349	} else {
4350	__kmp_affinity_process_placelist(affinity);
4351	}
4352	if (affinity.num_masks == 0) {
4353	KMP_AFF_WARNING(affinity, AffNoValidProcID)if (affinity.flags.verbose \|\| (affinity.flags.warnings && (affinity.type != affinity_none))) { __kmp_msg(kmp_ms_warning , __kmp_msg_format(kmp_i18n_msg_AffNoValidProcID), __kmp_msg_null ); };
4354	affinity.type = affinity_none;
4355	__kmp_create_affinity_none_places(affinity);
4356	affinity.flags.initialized = TRUE(!0);
4357	return;
4358	}
4359	break;
4360
4361	// The other affinity types rely on sorting the hardware threads according to
4362	// some permutation of the machine topology tree. Set affinity.compact
4363	// and affinity.offset appropriately, then jump to a common code
4364	// fragment to do the sort and create the array of affinity masks.
4365	case affinity_logical:
4366	affinity.compact = 0;
4367	if (affinity.offset) {
4368	affinity.offset =
4369	__kmp_nThreadsPerCore * affinity.offset % __kmp_avail_proc;
4370	}
4371	goto sortTopology;
4372
4373	case affinity_physical:
4374	if (__kmp_nThreadsPerCore > 1) {
4375	affinity.compact = 1;
4376	if (affinity.compact >= depth) {
4377	affinity.compact = 0;
4378	}
4379	} else {
4380	affinity.compact = 0;
4381	}
4382	if (affinity.offset) {
4383	affinity.offset =
4384	__kmp_nThreadsPerCore * affinity.offset % __kmp_avail_proc;
4385	}
4386	goto sortTopology;
4387
4388	case affinity_scatter:
4389	if (affinity.compact >= depth) {
4390	affinity.compact = 0;
4391	} else {
4392	affinity.compact = depth - 1 - affinity.compact;
4393	}
4394	goto sortTopology;
4395
4396	case affinity_compact:
4397	if (affinity.compact >= depth) {
4398	affinity.compact = depth - 1;
4399	}
4400	goto sortTopology;
4401
4402	case affinity_balanced:
4403	if (depth <= 1 \|\| is_hidden_helper_affinity) {
4404	KMP_AFF_WARNING(affinity, AffBalancedNotAvail, env_var)if (affinity.flags.verbose \|\| (affinity.flags.warnings && (affinity.type != affinity_none))) { __kmp_msg(kmp_ms_warning , __kmp_msg_format(kmp_i18n_msg_AffBalancedNotAvail, env_var) , __kmp_msg_null); };
4405	affinity.type = affinity_none;
4406	__kmp_create_affinity_none_places(affinity);
4407	affinity.flags.initialized = TRUE(!0);
4408	return;
4409	} else if (!__kmp_topology->is_uniform()) {
4410	// Save the depth for further usage
4411	__kmp_aff_depth = depth;
4412
4413	int core_level =
4414	__kmp_affinity_find_core_level(__kmp_avail_proc, depth - 1);
4415	int ncores = __kmp_affinity_compute_ncores(__kmp_avail_proc, depth - 1,
4416	core_level);
4417	int maxprocpercore = __kmp_affinity_max_proc_per_core(
4418	__kmp_avail_proc, depth - 1, core_level);
4419
4420	int nproc = ncores * maxprocpercore;
4421	if ((nproc < 2) \|\| (nproc < __kmp_avail_proc)) {
4422	KMP_AFF_WARNING(affinity, AffBalancedNotAvail, env_var)if (affinity.flags.verbose \|\| (affinity.flags.warnings && (affinity.type != affinity_none))) { __kmp_msg(kmp_ms_warning , __kmp_msg_format(kmp_i18n_msg_AffBalancedNotAvail, env_var) , __kmp_msg_null); };
4423	affinity.type = affinity_none;
4424	affinity.flags.initialized = TRUE(!0);
4425	return;
4426	}
4427
4428	procarr = (int )__kmp_allocate(sizeof(int) nproc)___kmp_allocate((sizeof(int) * nproc), "openmp/runtime/src/kmp_affinity.cpp" , 4428);
4429	for (int i = 0; i < nproc; i++) {
4430	procarr[i] = -1;
4431	}
4432
4433	int lastcore = -1;
4434	int inlastcore = 0;
4435	for (int i = 0; i < __kmp_avail_proc; i++) {
4436	int proc = __kmp_topology->at(i).os_id;
4437	int core = __kmp_affinity_find_core(i, depth - 1, core_level);
4438
4439	if (core == lastcore) {
4440	inlastcore++;
4441	} else {
4442	inlastcore = 0;
4443	}
4444	lastcore = core;
4445
4446	procarr[core * maxprocpercore + inlastcore] = proc;
4447	}
4448	}
4449	if (affinity.compact >= depth) {
4450	affinity.compact = depth - 1;
4451	}
4452
4453	sortTopology:
4454	// Allocate the gtid->affinity mask table.
4455	if (affinity.flags.dups) {
4456	affinity.num_masks = __kmp_avail_proc;
4457	} else {
4458	affinity.num_masks = numUnique;
4459	}
4460
4461	if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) &&
4462	(__kmp_affinity_num_places > 0) &&
4463	((unsigned)__kmp_affinity_num_places < affinity.num_masks) &&
4464	!is_hidden_helper_affinity) {
4465	affinity.num_masks = __kmp_affinity_num_places;
4466	}
4467
4468	KMP_CPU_ALLOC_ARRAY(affinity.masks, affinity.num_masks)(affinity.masks = __kmp_affinity_dispatch->allocate_mask_array (affinity.num_masks));
4469
4470	// Sort the topology table according to the current setting of
4471	// affinity.compact, then fill out affinity.masks.
4472	__kmp_topology->sort_compact(affinity);
4473	{
4474	int i;
4475	unsigned j;
4476	int num_hw_threads = __kmp_topology->get_num_hw_threads();
4477	for (i = 0, j = 0; i < num_hw_threads; i++) {
4478	if ((!affinity.flags.dups) && (!__kmp_topology->at(i).leader)) {
4479	continue;
4480	}
4481	int osId = __kmp_topology->at(i).os_id;
4482
4483	kmp_affin_mask_t *src = KMP_CPU_INDEX(affinity.os_id_masks, osId)__kmp_affinity_dispatch->index_mask_array(affinity.os_id_masks , osId);
4484	kmp_affin_mask_t *dest = KMP_CPU_INDEX(affinity.masks, j)__kmp_affinity_dispatch->index_mask_array(affinity.masks, j );
4485	KMP_ASSERT(KMP_CPU_ISSET(osId, src))if (!((src)->is_set(osId))) { __kmp_debug_assert("KMP_CPU_ISSET(osId, src)" , "openmp/runtime/src/kmp_affinity.cpp", 4485); };
4486	KMP_CPU_COPY(dest, src)(dest)->copy(src);
4487	if (++j >= affinity.num_masks) {
4488	break;
4489	}
4490	}
4491	KMP_DEBUG_ASSERT(j == affinity.num_masks)if (!(j == affinity.num_masks)) { __kmp_debug_assert("j == affinity.num_masks" , "openmp/runtime/src/kmp_affinity.cpp", 4491); };
4492	}
4493	// Sort the topology back using ids
4494	__kmp_topology->sort_ids();
4495	break;
4496
4497	default:
4498	KMP_ASSERT2(0, "Unexpected affinity setting")if (!(0)) { __kmp_debug_assert(("Unexpected affinity setting" ), "openmp/runtime/src/kmp_affinity.cpp", 4498); };
4499	}
4500	affinity.flags.initialized = TRUE(!0);
4501	}
4502
4503	void __kmp_affinity_initialize(kmp_affinity_t &affinity) {
4504	// Much of the code above was written assuming that if a machine was not
4505	// affinity capable, then affinity type == affinity_none.
4506	// We now explicitly represent this as affinity type == affinity_disabled.
4507	// There are too many checks for affinity type == affinity_none in this code.
4508	// Instead of trying to change them all, check if
4509	// affinity type == affinity_disabled, and if so, slam it with affinity_none,
4510	// call the real initialization routine, then restore affinity type to
4511	// affinity_disabled.
4512	int disabled = (affinity.type == affinity_disabled);
4513	if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0))
4514	KMP_ASSERT(disabled)if (!(disabled)) { __kmp_debug_assert("disabled", "openmp/runtime/src/kmp_affinity.cpp" , 4514); };
4515	if (disabled)
4516	affinity.type = affinity_none;
4517	__kmp_aux_affinity_initialize(affinity);
4518	if (disabled)
4519	affinity.type = affinity_disabled;
4520	}
4521
4522	void __kmp_affinity_uninitialize(void) {
4523	for (kmp_affinity_t *affinity : __kmp_affinities) {
4524	if (affinity->masks != NULL__null)
4525	KMP_CPU_FREE_ARRAY(affinity->masks, affinity->num_masks)__kmp_affinity_dispatch->deallocate_mask_array(affinity-> masks);
4526	if (affinity->os_id_masks != NULL__null)
4527	KMP_CPU_FREE_ARRAY(affinity->os_id_masks, affinity->num_os_id_masks)__kmp_affinity_dispatch->deallocate_mask_array(affinity-> os_id_masks);
4528	if (affinity->proclist != NULL__null)
4529	__kmp_free(affinity->proclist)___kmp_free((affinity->proclist), "openmp/runtime/src/kmp_affinity.cpp" , 4529);
4530	*affinity = KMP_AFFINITY_INIT(affinity->env_var){ nullptr, affinity_default, KMP_HW_UNKNOWN, -1, 0, 0, {(!0), 0, (!0), (2), 0, 0}, 0, nullptr, 0, nullptr, affinity->env_var };
4531	}
4532	if (__kmp_affin_origMask != NULL__null) {
4533	if (KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {
4534	__kmp_set_system_affinity(__kmp_affin_origMask, FALSE)(__kmp_affin_origMask)->set_system_affinity(0);
4535	}
4536	KMP_CPU_FREE(__kmp_affin_origMask)__kmp_affinity_dispatch->deallocate_mask(__kmp_affin_origMask );
4537	__kmp_affin_origMask = NULL__null;
4538	}
4539	__kmp_affinity_num_places = 0;
4540	if (procarr != NULL__null) {
4541	__kmp_free(procarr)___kmp_free((procarr), "openmp/runtime/src/kmp_affinity.cpp", 4541);
4542	procarr = NULL__null;
4543	}
4544	#if KMP_USE_HWLOC0
4545	if (__kmp_hwloc_topology != NULL__null) {
4546	hwloc_topology_destroy(__kmp_hwloc_topology);
4547	__kmp_hwloc_topology = NULL__null;
4548	}
4549	#endif
4550	if (__kmp_hw_subset) {
4551	kmp_hw_subset_t::deallocate(__kmp_hw_subset);
4552	__kmp_hw_subset = nullptr;
4553	}
4554	if (__kmp_topology) {
4555	kmp_topology_t::deallocate(__kmp_topology);
4556	__kmp_topology = nullptr;
4557	}
4558	KMPAffinity::destroy_api();
4559	}
4560
4561	static void __kmp_select_mask_by_gtid(int gtid, const kmp_affinity_t *affinity,
4562	int place, kmp_affin_mask_t *mask) {
4563	int mask_idx;
4564	bool is_hidden_helper = KMP_HIDDEN_HELPER_THREAD(gtid)((gtid) >= 1 && (gtid) <= __kmp_hidden_helper_threads_num );
4565	if (is_hidden_helper)
4566	// The first gtid is the regular primary thread, the second gtid is the main
4567	// thread of hidden team which does not participate in task execution.
4568	mask_idx = gtid - 2;
4569	else
4570	mask_idx = __kmp_adjust_gtid_for_hidden_helpers(gtid);
4571	KMP_DEBUG_ASSERT(affinity->num_masks > 0)if (!(affinity->num_masks > 0)) { __kmp_debug_assert("affinity->num_masks > 0" , "openmp/runtime/src/kmp_affinity.cpp", 4571); };
4572	*place = (mask_idx + affinity->offset) % affinity->num_masks;
4573	mask = KMP_CPU_INDEX(affinity->masks, place)__kmp_affinity_dispatch->index_mask_array(affinity->masks , *place);
4574	}
4575
4576	void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
4577	if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {
4578	return;
4579	}
4580
4581	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid])((void *)(__kmp_threads[gtid]));
4582	if (th->th.th_affin_mask == NULL__null) {
4583	KMP_CPU_ALLOC(th->th.th_affin_mask)(th->th.th_affin_mask = __kmp_affinity_dispatch->allocate_mask ());
4584	} else {
4585	KMP_CPU_ZERO(th->th.th_affin_mask)(th->th.th_affin_mask)->zero();
4586	}
4587
4588	// Copy the thread mask to the kmp_info_t structure. If
4589	// __kmp_affinity.type == affinity_none, copy the "full" mask, i.e.
4590	// one that has all of the OS proc ids set, or if
4591	// __kmp_affinity.flags.respect is set, then the full mask is the
4592	// same as the mask of the initialization thread.
4593	kmp_affin_mask_t *mask;
4594	int i;
4595	const kmp_affinity_t *affinity;
4596	const char *env_var;
4597	bool is_hidden_helper = KMP_HIDDEN_HELPER_THREAD(gtid)((gtid) >= 1 && (gtid) <= __kmp_hidden_helper_threads_num );
4598
4599	if (is_hidden_helper)
4600	affinity = &__kmp_hh_affinity;
4601	else
4602	affinity = &__kmp_affinity;
4603	env_var = affinity->env_var;
4604
4605	if (KMP_AFFINITY_NON_PROC_BIND((__kmp_nested_proc_bind.bind_types[0] == proc_bind_false \|\| __kmp_nested_proc_bind .bind_types[0] == proc_bind_intel) && (__kmp_affinity .num_masks > 0 \|\| __kmp_affinity.type == affinity_balanced )) \|\| is_hidden_helper) {
4606	if ((affinity->type == affinity_none) \|\|
4607	(affinity->type == affinity_balanced) \|\|
4608	KMP_HIDDEN_HELPER_MAIN_THREAD(gtid)((gtid) == 1 && (gtid) <= __kmp_hidden_helper_threads_num )) {
4609	#if KMP_GROUP_AFFINITY0
4610	if (__kmp_num_proc_groups > 1) {
4611	return;
4612	}
4613	#endif
4614	KMP_ASSERT(__kmp_affin_fullMask != NULL)if (!(__kmp_affin_fullMask != __null)) { __kmp_debug_assert("__kmp_affin_fullMask != NULL" , "openmp/runtime/src/kmp_affinity.cpp", 4614); };
4615	i = 0;
4616	mask = __kmp_affin_fullMask;
4617	} else {
4618	__kmp_select_mask_by_gtid(gtid, affinity, &i, &mask);
4619	}
4620	} else {
4621	if (!isa_root \|\| __kmp_nested_proc_bind.bind_types[0] == proc_bind_false) {
4622	#if KMP_GROUP_AFFINITY0
4623	if (__kmp_num_proc_groups > 1) {
4624	return;
4625	}
4626	#endif
4627	KMP_ASSERT(__kmp_affin_fullMask != NULL)if (!(__kmp_affin_fullMask != __null)) { __kmp_debug_assert("__kmp_affin_fullMask != NULL" , "openmp/runtime/src/kmp_affinity.cpp", 4627); };
4628	i = KMP_PLACE_ALL(-1);
4629	mask = __kmp_affin_fullMask;
4630	} else {
4631	__kmp_select_mask_by_gtid(gtid, affinity, &i, &mask);
4632	}
4633	}
4634
4635	th->th.th_current_place = i;
4636	if (isa_root && !is_hidden_helper) {
4637	th->th.th_new_place = i;
4638	th->th.th_first_place = 0;
4639	th->th.th_last_place = affinity->num_masks - 1;
4640	} else if (KMP_AFFINITY_NON_PROC_BIND((__kmp_nested_proc_bind.bind_types[0] == proc_bind_false \|\| __kmp_nested_proc_bind .bind_types[0] == proc_bind_intel) && (__kmp_affinity .num_masks > 0 \|\| __kmp_affinity.type == affinity_balanced ))) {
4641	// When using a Non-OMP_PROC_BIND affinity method,
4642	// set all threads' place-partition-var to the entire place list
4643	th->th.th_first_place = 0;
4644	th->th.th_last_place = affinity->num_masks - 1;
4645	}
4646
4647	if (i == KMP_PLACE_ALL(-1)) {
4648	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_init_mask: binding T#%d to all places\n" , gtid); }
4649	gtid))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_init_mask: binding T#%d to all places\n" , gtid); };
4650	} else {
4651	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n" , gtid, i); }
4652	gtid, i))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n" , gtid, i); };
4653	}
4654
4655	KMP_CPU_COPY(th->th.th_affin_mask, mask)(th->th.th_affin_mask)->copy(mask);
4656
4657	/* to avoid duplicate printing (will be correctly printed on barrier) */
4658	if (affinity->flags.verbose &&
4659	(affinity->type == affinity_none \|\|
4660	(i != KMP_PLACE_ALL(-1) && affinity->type != affinity_balanced)) &&
4661	!KMP_HIDDEN_HELPER_MAIN_THREAD(gtid)((gtid) == 1 && (gtid) <= __kmp_hidden_helper_threads_num )) {
4662	char buf[KMP_AFFIN_MASK_PRINT_LEN1024];
4663	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN1024,
4664	th->th.th_affin_mask);
4665	KMP_INFORM(BoundToOSProcSet, env_var, (kmp_int32)getpid(), __kmp_gettid(),__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_BoundToOSProcSet , env_var, (kmp_int32)getpid(), syscall(186), gtid, buf), __kmp_msg_null )
4666	gtid, buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_BoundToOSProcSet , env_var, (kmp_int32)getpid(), syscall(186), gtid, buf), __kmp_msg_null );
4667	}
4668
4669	#if KMP_OS_WINDOWS0
4670	// On Windows* OS, the process affinity mask might have changed. If the user
4671	// didn't request affinity and this call fails, just continue silently.
4672	// See CQ171393.
4673	if (affinity->type == affinity_none) {
4674	__kmp_set_system_affinity(th->th.th_affin_mask, FALSE)(th->th.th_affin_mask)->set_system_affinity(0);
4675	} else
4676	#endif
4677	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE)(th->th.th_affin_mask)->set_system_affinity((!0));
4678	}
4679
4680	void __kmp_affinity_set_place(int gtid) {
4681	// Hidden helper threads should not be affected by OMP_PLACES/OMP_PROC_BIND
4682	if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0) \|\| KMP_HIDDEN_HELPER_THREAD(gtid)((gtid) >= 1 && (gtid) <= __kmp_hidden_helper_threads_num )) {
4683	return;
4684	}
4685
4686	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid])((void *)(__kmp_threads[gtid]));
4687
4688	KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_place: binding T#%d to place %d (current " "place = %d)\n", gtid, th->th.th_new_place, th->th.th_current_place ); }
4689	"place = %d)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_place: binding T#%d to place %d (current " "place = %d)\n", gtid, th->th.th_new_place, th->th.th_current_place ); }
4690	gtid, th->th.th_new_place, th->th.th_current_place))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_place: binding T#%d to place %d (current " "place = %d)\n", gtid, th->th.th_new_place, th->th.th_current_place ); };
4691
4692	// Check that the new place is within this thread's partition.
4693	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL)if (!(th->th.th_affin_mask != __null)) { __kmp_debug_assert ("th->th.th_affin_mask != __null", "openmp/runtime/src/kmp_affinity.cpp" , 4693); };
4694	KMP_ASSERT(th->th.th_new_place >= 0)if (!(th->th.th_new_place >= 0)) { __kmp_debug_assert("th->th.th_new_place >= 0" , "openmp/runtime/src/kmp_affinity.cpp", 4694); };
4695	KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity.num_masks)if (!((unsigned)th->th.th_new_place <= __kmp_affinity.num_masks )) { __kmp_debug_assert("(unsigned)th->th.th_new_place <= __kmp_affinity.num_masks" , "openmp/runtime/src/kmp_affinity.cpp", 4695); };
4696	if (th->th.th_first_place <= th->th.th_last_place) {
4697	KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) &&if (!((th->th.th_new_place >= th->th.th_first_place) && (th->th.th_new_place <= th->th.th_last_place ))) { __kmp_debug_assert("(th->th.th_new_place >= th->th.th_first_place) && (th->th.th_new_place <= th->th.th_last_place)" , "openmp/runtime/src/kmp_affinity.cpp", 4698); }
4698	(th->th.th_new_place <= th->th.th_last_place))if (!((th->th.th_new_place >= th->th.th_first_place) && (th->th.th_new_place <= th->th.th_last_place ))) { __kmp_debug_assert("(th->th.th_new_place >= th->th.th_first_place) && (th->th.th_new_place <= th->th.th_last_place)" , "openmp/runtime/src/kmp_affinity.cpp", 4698); };
4699	} else {
4700	KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place) \|\|if (!((th->th.th_new_place <= th->th.th_first_place) \|\| (th->th.th_new_place >= th->th.th_last_place))) { __kmp_debug_assert("(th->th.th_new_place <= th->th.th_first_place) \|\| (th->th.th_new_place >= th->th.th_last_place)" , "openmp/runtime/src/kmp_affinity.cpp", 4701); }
4701	(th->th.th_new_place >= th->th.th_last_place))if (!((th->th.th_new_place <= th->th.th_first_place) \|\| (th->th.th_new_place >= th->th.th_last_place))) { __kmp_debug_assert("(th->th.th_new_place <= th->th.th_first_place) \|\| (th->th.th_new_place >= th->th.th_last_place)" , "openmp/runtime/src/kmp_affinity.cpp", 4701); };
4702	}
4703
4704	// Copy the thread mask to the kmp_info_t structure,
4705	// and set this thread's affinity.
4706	kmp_affin_mask_t *mask =
4707	KMP_CPU_INDEX(__kmp_affinity.masks, th->th.th_new_place)__kmp_affinity_dispatch->index_mask_array(__kmp_affinity.masks , th->th.th_new_place);
4708	KMP_CPU_COPY(th->th.th_affin_mask, mask)(th->th.th_affin_mask)->copy(mask);
4709	th->th.th_current_place = th->th.th_new_place;
4710
4711	if (__kmp_affinity.flags.verbose) {
4712	char buf[KMP_AFFIN_MASK_PRINT_LEN1024];
4713	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN1024,
4714	th->th.th_affin_mask);
4715	KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_BoundToOSProcSet , "OMP_PROC_BIND", (kmp_int32)getpid(), syscall(186), gtid, buf ), __kmp_msg_null)
4716	__kmp_gettid(), gtid, buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_BoundToOSProcSet , "OMP_PROC_BIND", (kmp_int32)getpid(), syscall(186), gtid, buf ), __kmp_msg_null);
4717	}
4718	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE)(th->th.th_affin_mask)->set_system_affinity((!0));
4719	}
4720
4721	int __kmp_aux_set_affinity(void **mask) {
4722	int gtid;
4723	kmp_info_t *th;
4724	int retval;
4725
4726	if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {
4727	return -1;
4728	}
4729
4730	gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
4731	KA_TRACE(if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { char buf[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_set_affinity: setting affinity mask for thread %d = %s\n" , gtid, buf); }; }
4732	1000, (""); {if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { char buf[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_set_affinity: setting affinity mask for thread %d = %s\n" , gtid, buf); }; }
4733	char buf[KMP_AFFIN_MASK_PRINT_LEN];if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { char buf[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_set_affinity: setting affinity mask for thread %d = %s\n" , gtid, buf); }; }
4734	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { char buf[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_set_affinity: setting affinity mask for thread %d = %s\n" , gtid, buf); }; }
4735	(kmp_affin_mask_t )(mask));if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { char buf[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_set_affinity: setting affinity mask for thread %d = %s\n" , gtid, buf); }; }
4736	__kmp_debug_printf(if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { char buf[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_set_affinity: setting affinity mask for thread %d = %s\n" , gtid, buf); }; }
4737	"kmp_set_affinity: setting affinity mask for thread %d = %s\n",if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { char buf[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_set_affinity: setting affinity mask for thread %d = %s\n" , gtid, buf); }; }
4738	gtid, buf);if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { char buf[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_set_affinity: setting affinity mask for thread %d = %s\n" , gtid, buf); }; }
4739	})if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { char buf[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_set_affinity: setting affinity mask for thread %d = %s\n" , gtid, buf); }; };
4740
4741	if (__kmp_env_consistency_check) {
4742	if ((mask == NULL__null) \|\| (*mask == NULL__null)) {
4743	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_AffinityInvalidMask , "kmp_set_affinity"), __kmp_msg_null);
4744	} else {
4745	unsigned proc;
4746	int num_procs = 0;
4747
4748	KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t )(mask)))for (proc = (((kmp_affin_mask_t )(mask)))->begin(); (int )proc != (((kmp_affin_mask_t )(mask)))->end(); proc = (( (kmp_affin_mask_t )(mask)))->next(proc)) {
4749	if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(proc)) {
4750	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_AffinityInvalidMask , "kmp_set_affinity"), __kmp_msg_null);
4751	}
4752	if (!KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask))((kmp_affin_mask_t )(mask))->is_set(proc)) {
4753	continue;
4754	}
4755	num_procs++;
4756	}
4757	if (num_procs == 0) {
4758	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_AffinityInvalidMask , "kmp_set_affinity"), __kmp_msg_null);
4759	}
4760
4761	#if KMP_GROUP_AFFINITY0
4762	if (__kmp_get_proc_group((kmp_affin_mask_t )(mask))((kmp_affin_mask_t )(mask))->get_proc_group() < 0) {
4763	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_AffinityInvalidMask , "kmp_set_affinity"), __kmp_msg_null);
4764	}
4765	#endif /* KMP_GROUP_AFFINITY */
4766	}
4767	}
4768
4769	th = __kmp_threads[gtid];
4770	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL)if (!(th->th.th_affin_mask != __null)) { __kmp_debug_assert ("th->th.th_affin_mask != __null", "openmp/runtime/src/kmp_affinity.cpp" , 4770); };
4771	retval = __kmp_set_system_affinity((kmp_affin_mask_t )(mask), FALSE)((kmp_affin_mask_t )(mask))->set_system_affinity(0);
4772	if (retval == 0) {
4773	KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t )(mask))(th->th.th_affin_mask)->copy((kmp_affin_mask_t )(mask ));
4774	}
4775
4776	th->th.th_current_place = KMP_PLACE_UNDEFINED(-2);
4777	th->th.th_new_place = KMP_PLACE_UNDEFINED(-2);
4778	th->th.th_first_place = 0;
4779	th->th.th_last_place = __kmp_affinity.num_masks - 1;
4780
4781	// Turn off 4.0 affinity for the current tread at this parallel level.
4782	th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
4783
4784	return retval;
4785	}
4786
4787	int __kmp_aux_get_affinity(void **mask) {
4788	int gtid;
4789	int retval;
4790	#if KMP_OS_WINDOWS0 \|\| KMP_DEBUG1
4791	kmp_info_t *th;
4792	#endif
4793	if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {
4794	return -1;
4795	}
4796
4797	gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
4798	#if KMP_OS_WINDOWS0 \|\| KMP_DEBUG1
4799	th = __kmp_threads[gtid];
4800	#else
4801	(void)gtid; // unused variable
4802	#endif
4803	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL)if (!(th->th.th_affin_mask != __null)) { __kmp_debug_assert ("th->th.th_affin_mask != __null", "openmp/runtime/src/kmp_affinity.cpp" , 4803); };
4804
4805	KA_TRACE(if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { char buf[1024]; __kmp_affinity_print_mask(buf, 1024, th->th.th_affin_mask ); __kmp_printf( "kmp_get_affinity: stored affinity mask for thread %d = %s\n" , gtid, buf); }; }
4806	1000, (""); {if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { char buf[1024]; __kmp_affinity_print_mask(buf, 1024, th->th.th_affin_mask ); __kmp_printf( "kmp_get_affinity: stored affinity mask for thread %d = %s\n" , gtid, buf); }; }
4807	char buf[KMP_AFFIN_MASK_PRINT_LEN];if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { char buf[1024]; __kmp_affinity_print_mask(buf, 1024, th->th.th_affin_mask ); __kmp_printf( "kmp_get_affinity: stored affinity mask for thread %d = %s\n" , gtid, buf); }; }
4808	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { char buf[1024]; __kmp_affinity_print_mask(buf, 1024, th->th.th_affin_mask ); __kmp_printf( "kmp_get_affinity: stored affinity mask for thread %d = %s\n" , gtid, buf); }; }
4809	th->th.th_affin_mask);if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { char buf[1024]; __kmp_affinity_print_mask(buf, 1024, th->th.th_affin_mask ); __kmp_printf( "kmp_get_affinity: stored affinity mask for thread %d = %s\n" , gtid, buf); }; }
4810	__kmp_printf(if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { char buf[1024]; __kmp_affinity_print_mask(buf, 1024, th->th.th_affin_mask ); __kmp_printf( "kmp_get_affinity: stored affinity mask for thread %d = %s\n" , gtid, buf); }; }
4811	"kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid,if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { char buf[1024]; __kmp_affinity_print_mask(buf, 1024, th->th.th_affin_mask ); __kmp_printf( "kmp_get_affinity: stored affinity mask for thread %d = %s\n" , gtid, buf); }; }
4812	buf);if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { char buf[1024]; __kmp_affinity_print_mask(buf, 1024, th->th.th_affin_mask ); __kmp_printf( "kmp_get_affinity: stored affinity mask for thread %d = %s\n" , gtid, buf); }; }
4813	})if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { char buf[1024]; __kmp_affinity_print_mask(buf, 1024, th->th.th_affin_mask ); __kmp_printf( "kmp_get_affinity: stored affinity mask for thread %d = %s\n" , gtid, buf); }; };
4814
4815	if (__kmp_env_consistency_check) {
4816	if ((mask == NULL__null) \|\| (*mask == NULL__null)) {
4817	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_AffinityInvalidMask , "kmp_get_affinity"), __kmp_msg_null);
4818	}
4819	}
4820
4821	#if !KMP_OS_WINDOWS0
4822
4823	retval = __kmp_get_system_affinity((kmp_affin_mask_t )(mask), FALSE)((kmp_affin_mask_t )(mask))->get_system_affinity(0);
4824	KA_TRACE(if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { char buf[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_printf( "kmp_get_affinity: system affinity mask for thread %d = %s\n" , gtid, buf); }; }
4825	1000, (""); {if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { char buf[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_printf( "kmp_get_affinity: system affinity mask for thread %d = %s\n" , gtid, buf); }; }
4826	char buf[KMP_AFFIN_MASK_PRINT_LEN];if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { char buf[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_printf( "kmp_get_affinity: system affinity mask for thread %d = %s\n" , gtid, buf); }; }
4827	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { char buf[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_printf( "kmp_get_affinity: system affinity mask for thread %d = %s\n" , gtid, buf); }; }
4828	(kmp_affin_mask_t )(mask));if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { char buf[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_printf( "kmp_get_affinity: system affinity mask for thread %d = %s\n" , gtid, buf); }; }
4829	__kmp_printf(if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { char buf[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_printf( "kmp_get_affinity: system affinity mask for thread %d = %s\n" , gtid, buf); }; }
4830	"kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid,if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { char buf[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_printf( "kmp_get_affinity: system affinity mask for thread %d = %s\n" , gtid, buf); }; }
4831	buf);if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { char buf[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_printf( "kmp_get_affinity: system affinity mask for thread %d = %s\n" , gtid, buf); }; }
4832	})if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { char buf[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_printf( "kmp_get_affinity: system affinity mask for thread %d = %s\n" , gtid, buf); }; };
4833	return retval;
4834
4835	#else
4836	(void)retval;
4837
4838	KMP_CPU_COPY((kmp_affin_mask_t )(mask), th->th.th_affin_mask)((kmp_affin_mask_t )(mask))->copy(th->th.th_affin_mask );
4839	return 0;
4840
4841	#endif /* KMP_OS_WINDOWS */
4842	}
4843
4844	int __kmp_aux_get_affinity_max_proc() {
4845	if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {
4846	return 0;
4847	}
4848	#if KMP_GROUP_AFFINITY0
4849	if (__kmp_num_proc_groups > 1) {
4850	return (int)(__kmp_num_proc_groups * sizeof(DWORD_PTR) * CHAR_BIT8);
4851	}
4852	#endif
4853	return __kmp_xproc;
4854	}
4855
4856	int __kmp_aux_set_affinity_mask_proc(int proc, void **mask) {
4857	if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {
4858	return -1;
4859	}
4860
4861	KA_TRACE(if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_set_affinity_mask_proc: setting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; }
4862	1000, (""); {if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_set_affinity_mask_proc: setting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; }
4863	int gtid = __kmp_entry_gtid();if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_set_affinity_mask_proc: setting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; }
4864	char buf[KMP_AFFIN_MASK_PRINT_LEN];if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_set_affinity_mask_proc: setting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; }
4865	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_set_affinity_mask_proc: setting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; }
4866	(kmp_affin_mask_t )(mask));if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_set_affinity_mask_proc: setting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; }
4867	__kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in "if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_set_affinity_mask_proc: setting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; }
4868	"affinity mask for thread %d = %s\n",if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_set_affinity_mask_proc: setting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; }
4869	proc, gtid, buf);if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_set_affinity_mask_proc: setting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; }
4870	})if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_set_affinity_mask_proc: setting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; };
4871
4872	if (__kmp_env_consistency_check) {
4873	if ((mask == NULL__null) \|\| (*mask == NULL__null)) {
4874	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_AffinityInvalidMask , "kmp_set_affinity_mask_proc"), __kmp_msg_null);
4875	}
4876	}
4877
4878	if ((proc < 0) \|\| (proc >= __kmp_aux_get_affinity_max_proc())) {
4879	return -1;
4880	}
4881	if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(proc)) {
4882	return -2;
4883	}
4884
4885	KMP_CPU_SET(proc, (kmp_affin_mask_t )(mask))((kmp_affin_mask_t )(mask))->set(proc);
4886	return 0;
4887	}
4888
4889	int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask) {
4890	if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {
4891	return -1;
4892	}
4893
4894	KA_TRACE(if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_unset_affinity_mask_proc: unsetting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; }
4895	1000, (""); {if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_unset_affinity_mask_proc: unsetting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; }
4896	int gtid = __kmp_entry_gtid();if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_unset_affinity_mask_proc: unsetting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; }
4897	char buf[KMP_AFFIN_MASK_PRINT_LEN];if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_unset_affinity_mask_proc: unsetting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; }
4898	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_unset_affinity_mask_proc: unsetting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; }
4899	(kmp_affin_mask_t )(mask));if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_unset_affinity_mask_proc: unsetting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; }
4900	__kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in "if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_unset_affinity_mask_proc: unsetting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; }
4901	"affinity mask for thread %d = %s\n",if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_unset_affinity_mask_proc: unsetting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; }
4902	proc, gtid, buf);if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_unset_affinity_mask_proc: unsetting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; }
4903	})if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_unset_affinity_mask_proc: unsetting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; };
4904
4905	if (__kmp_env_consistency_check) {
4906	if ((mask == NULL__null) \|\| (*mask == NULL__null)) {
4907	KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_AffinityInvalidMask , "kmp_unset_affinity_mask_proc"), __kmp_msg_null);
4908	}
4909	}
4910
4911	if ((proc < 0) \|\| (proc >= __kmp_aux_get_affinity_max_proc())) {
4912	return -1;
4913	}
4914	if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(proc)) {
4915	return -2;
4916	}
4917
4918	KMP_CPU_CLR(proc, (kmp_affin_mask_t )(mask))((kmp_affin_mask_t )(mask))->clear(proc);
4919	return 0;
4920	}
4921
4922	int __kmp_aux_get_affinity_mask_proc(int proc, void **mask) {
4923	if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {
4924	return -1;
4925	}
4926
4927	KA_TRACE(if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_get_affinity_mask_proc: getting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; }
4928	1000, (""); {if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_get_affinity_mask_proc: getting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; }
4929	int gtid = __kmp_entry_gtid();if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_get_affinity_mask_proc: getting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; }
4930	char buf[KMP_AFFIN_MASK_PRINT_LEN];if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_get_affinity_mask_proc: getting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; }
4931	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_get_affinity_mask_proc: getting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; }
4932	(kmp_affin_mask_t )(mask));if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_get_affinity_mask_proc: getting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; }
4933	__kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in "if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_get_affinity_mask_proc: getting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; }
4934	"affinity mask for thread %d = %s\n",if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_get_affinity_mask_proc: getting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; }
4935	proc, gtid, buf);if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_get_affinity_mask_proc: getting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; }
4936	})if (kmp_a_debug >= 1000) { __kmp_debug_printf (""); { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t )(mask)); __kmp_debug_printf( "kmp_get_affinity_mask_proc: getting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; };
4937
4938	if (__kmp_env_consistency_check) {
4939	if ((mask == NULL__null) \|\| (*mask == NULL__null)) {
4940	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_AffinityInvalidMask , "kmp_get_affinity_mask_proc"), __kmp_msg_null);
4941	}
4942	}
4943
4944	if ((proc < 0) \|\| (proc >= __kmp_aux_get_affinity_max_proc())) {
4945	return -1;
4946	}
4947	if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(proc)) {
4948	return 0;
4949	}
4950
4951	return KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask))((kmp_affin_mask_t )(mask))->is_set(proc);
4952	}
4953
4954	// Dynamic affinity settings - Affinity balanced
4955	void __kmp_balanced_affinity(kmp_info_t *th, int nthreads) {
4956	KMP_DEBUG_ASSERT(th)if (!(th)) { __kmp_debug_assert("th", "openmp/runtime/src/kmp_affinity.cpp" , 4956); };
4957	bool fine_gran = true;
4958	int tid = th->th.th_info.ds.ds_tid;
4959	const char *env_var = "KMP_AFFINITY";
4960
4961	// Do not perform balanced affinity for the hidden helper threads
4962	if (KMP_HIDDEN_HELPER_THREAD(__kmp_gtid_from_thread(th))((__kmp_gtid_from_thread(th)) >= 1 && (__kmp_gtid_from_thread (th)) <= __kmp_hidden_helper_threads_num))
4963	return;
4964
4965	switch (__kmp_affinity.gran) {
4966	case KMP_HW_THREAD:
4967	break;
4968	case KMP_HW_CORE:
4969	if (__kmp_nThreadsPerCore > 1) {
4970	fine_gran = false;
4971	}
4972	break;
4973	case KMP_HW_SOCKET:
4974	if (nCoresPerPkg > 1) {
4975	fine_gran = false;
4976	}
4977	break;
4978	default:
4979	fine_gran = false;
4980	}
4981
4982	if (__kmp_topology->is_uniform()) {
4983	int coreID;
4984	int threadID;
4985	// Number of hyper threads per core in HT machine
4986	int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
4987	// Number of cores
4988	int ncores = __kmp_ncores;
4989	if ((nPackages > 1) && (__kmp_nth_per_core <= 1)) {
4990	__kmp_nth_per_core = __kmp_avail_proc / nPackages;
4991	ncores = nPackages;
4992	}
4993	// How many threads will be bound to each core
4994	int chunk = nthreads / ncores;
4995	// How many cores will have an additional thread bound to it - "big cores"
4996	int big_cores = nthreads % ncores;
4997	// Number of threads on the big cores
4998	int big_nth = (chunk + 1) * big_cores;
4999	if (tid < big_nth) {
5000	coreID = tid / (chunk + 1);
5001	threadID = (tid % (chunk + 1)) % __kmp_nth_per_core;
5002	} else { // tid >= big_nth
5003	coreID = (tid - big_cores) / chunk;
5004	threadID = ((tid - big_cores) % chunk) % __kmp_nth_per_core;
5005	}
5006	KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),if (!((__kmp_affin_mask_size > 0))) { __kmp_debug_assert(( "Illegal set affinity operation when not capable"), "openmp/runtime/src/kmp_affinity.cpp" , 5007); }
5007	"Illegal set affinity operation when not capable")if (!((__kmp_affin_mask_size > 0))) { __kmp_debug_assert(( "Illegal set affinity operation when not capable"), "openmp/runtime/src/kmp_affinity.cpp" , 5007); };
5008
5009	kmp_affin_mask_t *mask = th->th.th_affin_mask;
5010	KMP_CPU_ZERO(mask)(mask)->zero();
5011
5012	if (fine_gran) {
5013	int osID =
5014	__kmp_topology->at(coreID * __kmp_nth_per_core + threadID).os_id;
5015	KMP_CPU_SET(osID, mask)(mask)->set(osID);
5016	} else {
5017	for (int i = 0; i < __kmp_nth_per_core; i++) {
5018	int osID;
5019	osID = __kmp_topology->at(coreID * __kmp_nth_per_core + i).os_id;
5020	KMP_CPU_SET(osID, mask)(mask)->set(osID);
5021	}
5022	}
5023	if (__kmp_affinity.flags.verbose) {
5024	char buf[KMP_AFFIN_MASK_PRINT_LEN1024];
5025	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN1024, mask);
5026	KMP_INFORM(BoundToOSProcSet, env_var, (kmp_int32)getpid(), __kmp_gettid(),__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_BoundToOSProcSet , env_var, (kmp_int32)getpid(), syscall(186), tid, buf), __kmp_msg_null )
5027	tid, buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_BoundToOSProcSet , env_var, (kmp_int32)getpid(), syscall(186), tid, buf), __kmp_msg_null );
5028	}
5029	__kmp_set_system_affinity(mask, TRUE)(mask)->set_system_affinity((!0));
5030	} else { // Non-uniform topology
5031
5032	kmp_affin_mask_t *mask = th->th.th_affin_mask;
5033	KMP_CPU_ZERO(mask)(mask)->zero();
5034
5035	int core_level =
5036	__kmp_affinity_find_core_level(__kmp_avail_proc, __kmp_aff_depth - 1);
5037	int ncores = __kmp_affinity_compute_ncores(__kmp_avail_proc,
5038	__kmp_aff_depth - 1, core_level);
5039	int nth_per_core = __kmp_affinity_max_proc_per_core(
5040	__kmp_avail_proc, __kmp_aff_depth - 1, core_level);
5041
5042	// For performance gain consider the special case nthreads ==
5043	// __kmp_avail_proc
5044	if (nthreads == __kmp_avail_proc) {
5045	if (fine_gran) {
5046	int osID = __kmp_topology->at(tid).os_id;
5047	KMP_CPU_SET(osID, mask)(mask)->set(osID);
5048	} else {
5049	int core =
5050	__kmp_affinity_find_core(tid, __kmp_aff_depth - 1, core_level);
5051	for (int i = 0; i < __kmp_avail_proc; i++) {
5052	int osID = __kmp_topology->at(i).os_id;
5053	if (__kmp_affinity_find_core(i, __kmp_aff_depth - 1, core_level) ==
5054	core) {
5055	KMP_CPU_SET(osID, mask)(mask)->set(osID);
5056	}
5057	}
5058	}
5059	} else if (nthreads <= ncores) {
5060
5061	int core = 0;
5062	for (int i = 0; i < ncores; i++) {
5063	// Check if this core from procarr[] is in the mask
5064	int in_mask = 0;
5065	for (int j = 0; j < nth_per_core; j++) {
5066	if (procarr[i * nth_per_core + j] != -1) {
5067	in_mask = 1;
5068	break;
5069	}
5070	}
5071	if (in_mask) {
5072	if (tid == core) {
5073	for (int j = 0; j < nth_per_core; j++) {
5074	int osID = procarr[i * nth_per_core + j];
5075	if (osID != -1) {
5076	KMP_CPU_SET(osID, mask)(mask)->set(osID);
5077	// For fine granularity it is enough to set the first available
5078	// osID for this core
5079	if (fine_gran) {
5080	break;
5081	}
5082	}
5083	}
5084	break;
5085	} else {
5086	core++;
5087	}
5088	}
5089	}
5090	} else { // nthreads > ncores
5091	// Array to save the number of processors at each core
5092	int nproc_at_core = (int )KMP_ALLOCA(sizeof(int) * ncores)__builtin_alloca (sizeof(int) * ncores);
5093	// Array to save the number of cores with "x" available processors;
5094	int *ncores_with_x_procs =
5095	(int )KMP_ALLOCA(sizeof(int) (nth_per_core + 1))__builtin_alloca (sizeof(int) * (nth_per_core + 1));
5096	// Array to save the number of cores with # procs from x to nth_per_core
5097	int *ncores_with_x_to_max_procs =
5098	(int )KMP_ALLOCA(sizeof(int) (nth_per_core + 1))__builtin_alloca (sizeof(int) * (nth_per_core + 1));
5099
5100	for (int i = 0; i <= nth_per_core; i++) {
5101	ncores_with_x_procs[i] = 0;
5102	ncores_with_x_to_max_procs[i] = 0;
5103	}
5104
5105	for (int i = 0; i < ncores; i++) {
5106	int cnt = 0;
5107	for (int j = 0; j < nth_per_core; j++) {
5108	if (procarr[i * nth_per_core + j] != -1) {
5109	cnt++;
5110	}
5111	}
5112	nproc_at_core[i] = cnt;
5113	ncores_with_x_procs[cnt]++;
5114	}
5115
5116	for (int i = 0; i <= nth_per_core; i++) {
5117	for (int j = i; j <= nth_per_core; j++) {
5118	ncores_with_x_to_max_procs[i] += ncores_with_x_procs[j];
5119	}
5120	}
5121
5122	// Max number of processors
5123	int nproc = nth_per_core * ncores;
5124	// An array to keep number of threads per each context
5125	int newarr = (int )__kmp_allocate(sizeof(int) * nproc)___kmp_allocate((sizeof(int) * nproc), "openmp/runtime/src/kmp_affinity.cpp" , 5125);
5126	for (int i = 0; i < nproc; i++) {
5127	newarr[i] = 0;
5128	}
5129
5130	int nth = nthreads;
5131	int flag = 0;
5132	while (nth > 0) {
5133	for (int j = 1; j <= nth_per_core; j++) {
5134	int cnt = ncores_with_x_to_max_procs[j];
5135	for (int i = 0; i < ncores; i++) {
5136	// Skip the core with 0 processors
5137	if (nproc_at_core[i] == 0) {
5138	continue;
5139	}
5140	for (int k = 0; k < nth_per_core; k++) {
5141	if (procarr[i * nth_per_core + k] != -1) {
5142	if (newarr[i * nth_per_core + k] == 0) {
5143	newarr[i * nth_per_core + k] = 1;
5144	cnt--;
5145	nth--;
5146	break;
5147	} else {
5148	if (flag != 0) {
5149	newarr[i * nth_per_core + k]++;
5150	cnt--;
5151	nth--;
5152	break;
5153	}
5154	}
5155	}
5156	}
5157	if (cnt == 0 \|\| nth == 0) {
5158	break;
5159	}
5160	}
5161	if (nth == 0) {
5162	break;
5163	}
5164	}
5165	flag = 1;
5166	}
5167	int sum = 0;
5168	for (int i = 0; i < nproc; i++) {
5169	sum += newarr[i];
5170	if (sum > tid) {
5171	if (fine_gran) {
5172	int osID = procarr[i];
5173	KMP_CPU_SET(osID, mask)(mask)->set(osID);
5174	} else {
5175	int coreID = i / nth_per_core;
5176	for (int ii = 0; ii < nth_per_core; ii++) {
5177	int osID = procarr[coreID * nth_per_core + ii];
5178	if (osID != -1) {
5179	KMP_CPU_SET(osID, mask)(mask)->set(osID);
5180	}
5181	}
5182	}
5183	break;
5184	}
5185	}
5186	__kmp_free(newarr)___kmp_free((newarr), "openmp/runtime/src/kmp_affinity.cpp", 5186 );
5187	}
5188
5189	if (__kmp_affinity.flags.verbose) {
5190	char buf[KMP_AFFIN_MASK_PRINT_LEN1024];
5191	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN1024, mask);
5192	KMP_INFORM(BoundToOSProcSet, env_var, (kmp_int32)getpid(), __kmp_gettid(),__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_BoundToOSProcSet , env_var, (kmp_int32)getpid(), syscall(186), tid, buf), __kmp_msg_null )
5193	tid, buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_BoundToOSProcSet , env_var, (kmp_int32)getpid(), syscall(186), tid, buf), __kmp_msg_null );
5194	}
5195	__kmp_set_system_affinity(mask, TRUE)(mask)->set_system_affinity((!0));
5196	}
5197	}
5198
5199	#if KMP_OS_LINUX1 \|\| KMP_OS_FREEBSD0
5200	// We don't need this entry for Windows because
5201	// there is GetProcessAffinityMask() api
5202	//
5203	// The intended usage is indicated by these steps:
5204	// 1) The user gets the current affinity mask
5205	// 2) Then sets the affinity by calling this function
5206	// 3) Error check the return value
5207	// 4) Use non-OpenMP parallelization
5208	// 5) Reset the affinity to what was stored in step 1)
5209	#ifdef __cplusplus201703L
5210	extern "C"
5211	#endif
5212	int
5213	kmp_set_thread_affinity_mask_initial()
5214	// the function returns 0 on success,
5215	// -1 if we cannot bind thread
5216	// >0 (errno) if an error happened during binding
5217	{
5218	int gtid = __kmp_get_gtid()__kmp_get_global_thread_id();
5219	if (gtid < 0) {
5220	// Do not touch non-omp threads
5221	KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "if (kmp_a_debug >= 30) { __kmp_debug_printf ("kmp_set_thread_affinity_mask_initial: " "non-omp thread, returning\n"); }
5222	"non-omp thread, returning\n"))if (kmp_a_debug >= 30) { __kmp_debug_printf ("kmp_set_thread_affinity_mask_initial: " "non-omp thread, returning\n"); };
5223	return -1;
5224	}
5225	if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0) \|\| !__kmp_init_middle) {
5226	KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "if (kmp_a_debug >= 30) { __kmp_debug_printf ("kmp_set_thread_affinity_mask_initial: " "affinity not initialized, returning\n"); }
5227	"affinity not initialized, returning\n"))if (kmp_a_debug >= 30) { __kmp_debug_printf ("kmp_set_thread_affinity_mask_initial: " "affinity not initialized, returning\n"); };
5228	return -1;
5229	}
5230	KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "if (kmp_a_debug >= 30) { __kmp_debug_printf ("kmp_set_thread_affinity_mask_initial: " "set full mask for thread %d\n", gtid); }
5231	"set full mask for thread %d\n",if (kmp_a_debug >= 30) { __kmp_debug_printf ("kmp_set_thread_affinity_mask_initial: " "set full mask for thread %d\n", gtid); }
5232	gtid))if (kmp_a_debug >= 30) { __kmp_debug_printf ("kmp_set_thread_affinity_mask_initial: " "set full mask for thread %d\n", gtid); };
5233	KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL)if (!(__kmp_affin_fullMask != __null)) { __kmp_debug_assert("__kmp_affin_fullMask != __null" , "openmp/runtime/src/kmp_affinity.cpp", 5233); };
5234	return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE)(__kmp_affin_fullMask)->set_system_affinity(0);
5235	}
5236	#endif
5237
5238	#endif // KMP_AFFINITY_SUPPORTED