File: | projects/openmp/runtime/src/kmp_affinity.cpp |
Warning: | line 2901, column 5 Division by zero |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* | |||||
2 | * kmp_affinity.cpp -- affinity management | |||||
3 | */ | |||||
4 | ||||||
5 | //===----------------------------------------------------------------------===// | |||||
6 | // | |||||
7 | // The LLVM Compiler Infrastructure | |||||
8 | // | |||||
9 | // This file is dual licensed under the MIT and the University of Illinois Open | |||||
10 | // Source Licenses. See LICENSE.txt for details. | |||||
11 | // | |||||
12 | //===----------------------------------------------------------------------===// | |||||
13 | ||||||
14 | #include "kmp.h" | |||||
15 | #include "kmp_affinity.h" | |||||
16 | #include "kmp_i18n.h" | |||||
17 | #include "kmp_io.h" | |||||
18 | #include "kmp_str.h" | |||||
19 | #include "kmp_wrapper_getpid.h" | |||||
20 | #if KMP_USE_HIER_SCHED0 | |||||
21 | #include "kmp_dispatch_hier.h" | |||||
22 | #endif | |||||
23 | ||||||
24 | // Store the real or imagined machine hierarchy here | |||||
25 | static hierarchy_info machine_hierarchy; | |||||
26 | ||||||
27 | void __kmp_cleanup_hierarchy() { machine_hierarchy.fini(); } | |||||
28 | ||||||
29 | void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) { | |||||
30 | kmp_uint32 depth; | |||||
31 | // The test below is true if affinity is available, but set to "none". Need to | |||||
32 | // init on first use of hierarchical barrier. | |||||
33 | if (TCR_1(machine_hierarchy.uninitialized)(machine_hierarchy.uninitialized)) | |||||
34 | machine_hierarchy.init(NULL__null, nproc); | |||||
35 | ||||||
36 | // Adjust the hierarchy in case num threads exceeds original | |||||
37 | if (nproc > machine_hierarchy.base_num_threads) | |||||
38 | machine_hierarchy.resize(nproc); | |||||
39 | ||||||
40 | depth = machine_hierarchy.depth; | |||||
41 | KMP_DEBUG_ASSERT(depth > 0)if (!(depth > 0)) { __kmp_debug_assert("depth > 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 41); }; | |||||
42 | ||||||
43 | thr_bar->depth = depth; | |||||
44 | thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0] - 1; | |||||
45 | thr_bar->skip_per_level = machine_hierarchy.skipPerLevel; | |||||
46 | } | |||||
47 | ||||||
48 | #if KMP_AFFINITY_SUPPORTED1 | |||||
49 | ||||||
50 | bool KMPAffinity::picked_api = false; | |||||
51 | ||||||
52 | void *KMPAffinity::Mask::operator new(size_t n) { return __kmp_allocate(n)___kmp_allocate((n), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 52); } | |||||
53 | void *KMPAffinity::Mask::operator new[](size_t n) { return __kmp_allocate(n)___kmp_allocate((n), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 53); } | |||||
54 | void KMPAffinity::Mask::operator delete(void *p) { __kmp_free(p)___kmp_free((p), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 54); } | |||||
55 | void KMPAffinity::Mask::operator delete[](void *p) { __kmp_free(p)___kmp_free((p), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 55); } | |||||
56 | void *KMPAffinity::operator new(size_t n) { return __kmp_allocate(n)___kmp_allocate((n), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 56); } | |||||
57 | void KMPAffinity::operator delete(void *p) { __kmp_free(p)___kmp_free((p), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 57); } | |||||
58 | ||||||
59 | void KMPAffinity::pick_api() { | |||||
60 | KMPAffinity *affinity_dispatch; | |||||
61 | if (picked_api) | |||||
62 | return; | |||||
63 | #if KMP_USE_HWLOC0 | |||||
64 | // Only use Hwloc if affinity isn't explicitly disabled and | |||||
65 | // user requests Hwloc topology method | |||||
66 | if (__kmp_affinity_top_method == affinity_top_method_hwloc && | |||||
67 | __kmp_affinity_type != affinity_disabled) { | |||||
68 | affinity_dispatch = new KMPHwlocAffinity(); | |||||
69 | } else | |||||
70 | #endif | |||||
71 | { | |||||
72 | affinity_dispatch = new KMPNativeAffinity(); | |||||
73 | } | |||||
74 | __kmp_affinity_dispatch = affinity_dispatch; | |||||
75 | picked_api = true; | |||||
76 | } | |||||
77 | ||||||
78 | void KMPAffinity::destroy_api() { | |||||
79 | if (__kmp_affinity_dispatch != NULL__null) { | |||||
80 | delete __kmp_affinity_dispatch; | |||||
81 | __kmp_affinity_dispatch = NULL__null; | |||||
82 | picked_api = false; | |||||
83 | } | |||||
84 | } | |||||
85 | ||||||
86 | // Print the affinity mask to the character array in a pretty format. | |||||
87 | char *__kmp_affinity_print_mask(char *buf, int buf_len, | |||||
88 | kmp_affin_mask_t *mask) { | |||||
89 | KMP_ASSERT(buf_len >= 40)if (!(buf_len >= 40)) { __kmp_debug_assert("buf_len >= 40" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 89); }; | |||||
90 | char *scan = buf; | |||||
91 | char *end = buf + buf_len - 1; | |||||
92 | ||||||
93 | // Find first element / check for empty set. | |||||
94 | int i; | |||||
95 | i = mask->begin(); | |||||
96 | if (i == mask->end()) { | |||||
97 | KMP_SNPRINTFsnprintf(scan, end - scan + 1, "{<empty>}"); | |||||
98 | while (*scan != '\0') | |||||
99 | scan++; | |||||
100 | KMP_ASSERT(scan <= end)if (!(scan <= end)) { __kmp_debug_assert("scan <= end", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 100); }; | |||||
101 | return buf; | |||||
102 | } | |||||
103 | ||||||
104 | KMP_SNPRINTFsnprintf(scan, end - scan + 1, "{%d", i); | |||||
105 | while (*scan != '\0') | |||||
106 | scan++; | |||||
107 | i++; | |||||
108 | for (; i != mask->end(); i = mask->next(i)) { | |||||
109 | if (!KMP_CPU_ISSET(i, mask)(mask)->is_set(i)) { | |||||
110 | continue; | |||||
111 | } | |||||
112 | ||||||
113 | // Check for buffer overflow. A string of the form ",<n>" will have at most | |||||
114 | // 10 characters, plus we want to leave room to print ",...}" if the set is | |||||
115 | // too large to print for a total of 15 characters. We already left room for | |||||
116 | // '\0' in setting end. | |||||
117 | if (end - scan < 15) { | |||||
118 | break; | |||||
119 | } | |||||
120 | KMP_SNPRINTFsnprintf(scan, end - scan + 1, ",%-d", i); | |||||
121 | while (*scan != '\0') | |||||
122 | scan++; | |||||
123 | } | |||||
124 | if (i != mask->end()) { | |||||
125 | KMP_SNPRINTFsnprintf(scan, end - scan + 1, ",..."); | |||||
126 | while (*scan != '\0') | |||||
127 | scan++; | |||||
128 | } | |||||
129 | KMP_SNPRINTFsnprintf(scan, end - scan + 1, "}"); | |||||
130 | while (*scan != '\0') | |||||
131 | scan++; | |||||
132 | KMP_ASSERT(scan <= end)if (!(scan <= end)) { __kmp_debug_assert("scan <= end", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 132); }; | |||||
133 | return buf; | |||||
134 | } | |||||
135 | ||||||
136 | void __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) { | |||||
137 | KMP_CPU_ZERO(mask)(mask)->zero(); | |||||
138 | ||||||
139 | #if KMP_GROUP_AFFINITY0 | |||||
140 | ||||||
141 | if (__kmp_num_proc_groups > 1) { | |||||
142 | int group; | |||||
143 | KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL)if (!(__kmp_GetActiveProcessorCount != __null)) { __kmp_debug_assert ("__kmp_GetActiveProcessorCount != __null", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 143); }; | |||||
144 | for (group = 0; group < __kmp_num_proc_groups; group++) { | |||||
145 | int i; | |||||
146 | int num = __kmp_GetActiveProcessorCount(group); | |||||
147 | for (i = 0; i < num; i++) { | |||||
148 | KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask)(mask)->set(i + group * (8 * sizeof(DWORD_PTR))); | |||||
149 | } | |||||
150 | } | |||||
151 | } else | |||||
152 | ||||||
153 | #endif /* KMP_GROUP_AFFINITY */ | |||||
154 | ||||||
155 | { | |||||
156 | int proc; | |||||
157 | for (proc = 0; proc < __kmp_xproc; proc++) { | |||||
158 | KMP_CPU_SET(proc, mask)(mask)->set(proc); | |||||
159 | } | |||||
160 | } | |||||
161 | } | |||||
162 | ||||||
163 | // When sorting by labels, __kmp_affinity_assign_child_nums() must first be | |||||
164 | // called to renumber the labels from [0..n] and place them into the child_num | |||||
165 | // vector of the address object. This is done in case the labels used for | |||||
166 | // the children at one node of the hierarchy differ from those used for | |||||
167 | // another node at the same level. Example: suppose the machine has 2 nodes | |||||
168 | // with 2 packages each. The first node contains packages 601 and 602, and | |||||
169 | // second node contains packages 603 and 604. If we try to sort the table | |||||
170 | // for "scatter" affinity, the table will still be sorted 601, 602, 603, 604 | |||||
171 | // because we are paying attention to the labels themselves, not the ordinal | |||||
172 | // child numbers. By using the child numbers in the sort, the result is | |||||
173 | // {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604. | |||||
174 | static void __kmp_affinity_assign_child_nums(AddrUnsPair *address2os, | |||||
175 | int numAddrs) { | |||||
176 | KMP_DEBUG_ASSERT(numAddrs > 0)if (!(numAddrs > 0)) { __kmp_debug_assert("numAddrs > 0" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 176); }; | |||||
177 | int depth = address2os->first.depth; | |||||
178 | unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned))___kmp_allocate((depth * sizeof(unsigned)), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 178); | |||||
179 | unsigned *lastLabel = (unsigned *)__kmp_allocate(depth * sizeof(unsigned))___kmp_allocate((depth * sizeof(unsigned)), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 179); | |||||
180 | int labCt; | |||||
181 | for (labCt = 0; labCt < depth; labCt++) { | |||||
182 | address2os[0].first.childNums[labCt] = counts[labCt] = 0; | |||||
183 | lastLabel[labCt] = address2os[0].first.labels[labCt]; | |||||
184 | } | |||||
185 | int i; | |||||
186 | for (i = 1; i < numAddrs; i++) { | |||||
187 | for (labCt = 0; labCt < depth; labCt++) { | |||||
188 | if (address2os[i].first.labels[labCt] != lastLabel[labCt]) { | |||||
189 | int labCt2; | |||||
190 | for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) { | |||||
191 | counts[labCt2] = 0; | |||||
192 | lastLabel[labCt2] = address2os[i].first.labels[labCt2]; | |||||
193 | } | |||||
194 | counts[labCt]++; | |||||
195 | lastLabel[labCt] = address2os[i].first.labels[labCt]; | |||||
196 | break; | |||||
197 | } | |||||
198 | } | |||||
199 | for (labCt = 0; labCt < depth; labCt++) { | |||||
200 | address2os[i].first.childNums[labCt] = counts[labCt]; | |||||
201 | } | |||||
202 | for (; labCt < (int)Address::maxDepth; labCt++) { | |||||
203 | address2os[i].first.childNums[labCt] = 0; | |||||
204 | } | |||||
205 | } | |||||
206 | __kmp_free(lastLabel)___kmp_free((lastLabel), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 206); | |||||
207 | __kmp_free(counts)___kmp_free((counts), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 207); | |||||
208 | } | |||||
209 | ||||||
210 | // All of the __kmp_affinity_create_*_map() routines should set | |||||
211 | // __kmp_affinity_masks to a vector of affinity mask objects of length | |||||
212 | // __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and return | |||||
213 | // the number of levels in the machine topology tree (zero if | |||||
214 | // __kmp_affinity_type == affinity_none). | |||||
215 | // | |||||
216 | // All of the __kmp_affinity_create_*_map() routines should set | |||||
217 | // *__kmp_affin_fullMask to the affinity mask for the initialization thread. | |||||
218 | // They need to save and restore the mask, and it could be needed later, so | |||||
219 | // saving it is just an optimization to avoid calling kmp_get_system_affinity() | |||||
220 | // again. | |||||
221 | kmp_affin_mask_t *__kmp_affin_fullMask = NULL__null; | |||||
222 | ||||||
223 | static int nCoresPerPkg, nPackages; | |||||
224 | static int __kmp_nThreadsPerCore; | |||||
225 | #ifndef KMP_DFLT_NTH_CORES | |||||
226 | static int __kmp_ncores; | |||||
227 | #endif | |||||
228 | static int *__kmp_pu_os_idx = NULL__null; | |||||
229 | ||||||
230 | // __kmp_affinity_uniform_topology() doesn't work when called from | |||||
231 | // places which support arbitrarily many levels in the machine topology | |||||
232 | // map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map() | |||||
233 | // __kmp_affinity_create_x2apicid_map(). | |||||
234 | inline static bool __kmp_affinity_uniform_topology() { | |||||
235 | return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages); | |||||
236 | } | |||||
237 | ||||||
238 | // Print out the detailed machine topology map, i.e. the physical locations | |||||
239 | // of each OS proc. | |||||
240 | static void __kmp_affinity_print_topology(AddrUnsPair *address2os, int len, | |||||
241 | int depth, int pkgLevel, | |||||
242 | int coreLevel, int threadLevel) { | |||||
243 | int proc; | |||||
244 | ||||||
245 | KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_OSProcToPhysicalThreadMap , "KMP_AFFINITY"), __kmp_msg_null); | |||||
246 | for (proc = 0; proc < len; proc++) { | |||||
247 | int level; | |||||
248 | kmp_str_buf_t buf; | |||||
249 | __kmp_str_buf_init(&buf){ (&buf)->str = (&buf)->bulk; (&buf)->size = sizeof((&buf)->bulk); (&buf)->used = 0; (& buf)->bulk[0] = 0; }; | |||||
250 | for (level = 0; level < depth; level++) { | |||||
251 | if (level == threadLevel) { | |||||
252 | __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread)__kmp_i18n_catgets(kmp_i18n_str_Thread)); | |||||
253 | } else if (level == coreLevel) { | |||||
254 | __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core)__kmp_i18n_catgets(kmp_i18n_str_Core)); | |||||
255 | } else if (level == pkgLevel) { | |||||
256 | __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package)__kmp_i18n_catgets(kmp_i18n_str_Package)); | |||||
257 | } else if (level > pkgLevel) { | |||||
258 | __kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node)__kmp_i18n_catgets(kmp_i18n_str_Node), | |||||
259 | level - pkgLevel - 1); | |||||
260 | } else { | |||||
261 | __kmp_str_buf_print(&buf, "L%d ", level); | |||||
262 | } | |||||
263 | __kmp_str_buf_print(&buf, "%d ", address2os[proc].first.labels[level]); | |||||
264 | } | |||||
265 | KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_OSProcMapToPack , "KMP_AFFINITY", address2os[proc].second, buf.str), __kmp_msg_null ) | |||||
266 | buf.str)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_OSProcMapToPack , "KMP_AFFINITY", address2os[proc].second, buf.str), __kmp_msg_null ); | |||||
267 | __kmp_str_buf_free(&buf); | |||||
268 | } | |||||
269 | } | |||||
270 | ||||||
271 | #if KMP_USE_HWLOC0 | |||||
272 | ||||||
273 | static void __kmp_affinity_print_hwloc_tp(AddrUnsPair *addrP, int len, | |||||
274 | int depth, int *levels) { | |||||
275 | int proc; | |||||
276 | kmp_str_buf_t buf; | |||||
277 | __kmp_str_buf_init(&buf){ (&buf)->str = (&buf)->bulk; (&buf)->size = sizeof((&buf)->bulk); (&buf)->used = 0; (& buf)->bulk[0] = 0; }; | |||||
278 | KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_OSProcToPhysicalThreadMap , "KMP_AFFINITY"), __kmp_msg_null); | |||||
279 | for (proc = 0; proc < len; proc++) { | |||||
280 | __kmp_str_buf_print(&buf, "%s %d ", KMP_I18N_STR(Package)__kmp_i18n_catgets(kmp_i18n_str_Package), | |||||
281 | addrP[proc].first.labels[0]); | |||||
282 | if (depth > 1) { | |||||
283 | int level = 1; // iterate over levels | |||||
284 | int label = 1; // iterate over labels | |||||
285 | if (__kmp_numa_detected) | |||||
286 | // node level follows package | |||||
287 | if (levels[level++] > 0) | |||||
288 | __kmp_str_buf_print(&buf, "%s %d ", KMP_I18N_STR(Node)__kmp_i18n_catgets(kmp_i18n_str_Node), | |||||
289 | addrP[proc].first.labels[label++]); | |||||
290 | if (__kmp_tile_depth > 0) | |||||
291 | // tile level follows node if any, or package | |||||
292 | if (levels[level++] > 0) | |||||
293 | __kmp_str_buf_print(&buf, "%s %d ", KMP_I18N_STR(Tile)__kmp_i18n_catgets(kmp_i18n_str_Tile), | |||||
294 | addrP[proc].first.labels[label++]); | |||||
295 | if (levels[level++] > 0) | |||||
296 | // core level follows | |||||
297 | __kmp_str_buf_print(&buf, "%s %d ", KMP_I18N_STR(Core)__kmp_i18n_catgets(kmp_i18n_str_Core), | |||||
298 | addrP[proc].first.labels[label++]); | |||||
299 | if (levels[level++] > 0) | |||||
300 | // thread level is the latest | |||||
301 | __kmp_str_buf_print(&buf, "%s %d ", KMP_I18N_STR(Thread)__kmp_i18n_catgets(kmp_i18n_str_Thread), | |||||
302 | addrP[proc].first.labels[label++]); | |||||
303 | KMP_DEBUG_ASSERT(label == depth)if (!(label == depth)) { __kmp_debug_assert("label == depth", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 303); }; | |||||
304 | } | |||||
305 | KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", addrP[proc].second, buf.str)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_OSProcMapToPack , "KMP_AFFINITY", addrP[proc].second, buf.str), __kmp_msg_null ); | |||||
306 | __kmp_str_buf_clear(&buf); | |||||
307 | } | |||||
308 | __kmp_str_buf_free(&buf); | |||||
309 | } | |||||
310 | ||||||
311 | static int nNodePerPkg, nTilePerPkg, nTilePerNode, nCorePerNode, nCorePerTile; | |||||
312 | ||||||
313 | // This function removes the topology levels that are radix 1 and don't offer | |||||
314 | // further information about the topology. The most common example is when you | |||||
315 | // have one thread context per core, we don't want the extra thread context | |||||
316 | // level if it offers no unique labels. So they are removed. | |||||
317 | // return value: the new depth of address2os | |||||
318 | static int __kmp_affinity_remove_radix_one_levels(AddrUnsPair *addrP, int nTh, | |||||
319 | int depth, int *levels) { | |||||
320 | int level; | |||||
321 | int i; | |||||
322 | int radix1_detected; | |||||
323 | int new_depth = depth; | |||||
324 | for (level = depth - 1; level > 0; --level) { | |||||
325 | // Detect if this level is radix 1 | |||||
326 | radix1_detected = 1; | |||||
327 | for (i = 1; i < nTh; ++i) { | |||||
328 | if (addrP[0].first.labels[level] != addrP[i].first.labels[level]) { | |||||
329 | // There are differing label values for this level so it stays | |||||
330 | radix1_detected = 0; | |||||
331 | break; | |||||
332 | } | |||||
333 | } | |||||
334 | if (!radix1_detected) | |||||
335 | continue; | |||||
336 | // Radix 1 was detected | |||||
337 | --new_depth; | |||||
338 | levels[level] = -1; // mark level as not present in address2os array | |||||
339 | if (level == new_depth) { | |||||
340 | // "turn off" deepest level, just decrement the depth that removes | |||||
341 | // the level from address2os array | |||||
342 | for (i = 0; i < nTh; ++i) { | |||||
343 | addrP[i].first.depth--; | |||||
344 | } | |||||
345 | } else { | |||||
346 | // For other levels, we move labels over and also reduce the depth | |||||
347 | int j; | |||||
348 | for (j = level; j < new_depth; ++j) { | |||||
349 | for (i = 0; i < nTh; ++i) { | |||||
350 | addrP[i].first.labels[j] = addrP[i].first.labels[j + 1]; | |||||
351 | addrP[i].first.depth--; | |||||
352 | } | |||||
353 | levels[j + 1] -= 1; | |||||
354 | } | |||||
355 | } | |||||
356 | } | |||||
357 | return new_depth; | |||||
358 | } | |||||
359 | ||||||
360 | // Returns the number of objects of type 'type' below 'obj' within the topology | |||||
361 | // tree structure. e.g., if obj is a HWLOC_OBJ_PACKAGE object, and type is | |||||
362 | // HWLOC_OBJ_PU, then this will return the number of PU's under the SOCKET | |||||
363 | // object. | |||||
364 | static int __kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj, | |||||
365 | hwloc_obj_type_t type) { | |||||
366 | int retval = 0; | |||||
367 | hwloc_obj_t first; | |||||
368 | for (first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type, | |||||
369 | obj->logical_index, type, 0); | |||||
370 | first != NULL__null && | |||||
371 | hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, obj->type, first) == | |||||
372 | obj; | |||||
373 | first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type, | |||||
374 | first)) { | |||||
375 | ++retval; | |||||
376 | } | |||||
377 | return retval; | |||||
378 | } | |||||
379 | ||||||
380 | static int __kmp_hwloc_count_children_by_depth(hwloc_topology_t t, | |||||
381 | hwloc_obj_t o, unsigned depth, | |||||
382 | hwloc_obj_t *f) { | |||||
383 | if (o->depth == depth) { | |||||
384 | if (*f == NULL__null) | |||||
385 | *f = o; // output first descendant found | |||||
386 | return 1; | |||||
387 | } | |||||
388 | int sum = 0; | |||||
389 | for (unsigned i = 0; i < o->arity; i++) | |||||
390 | sum += __kmp_hwloc_count_children_by_depth(t, o->children[i], depth, f); | |||||
391 | return sum; // will be 0 if no one found (as PU arity is 0) | |||||
392 | } | |||||
393 | ||||||
394 | static int __kmp_hwloc_count_children_by_type(hwloc_topology_t t, hwloc_obj_t o, | |||||
395 | hwloc_obj_type_t type, | |||||
396 | hwloc_obj_t *f) { | |||||
397 | if (!hwloc_compare_types(o->type, type)) { | |||||
398 | if (*f == NULL__null) | |||||
399 | *f = o; // output first descendant found | |||||
400 | return 1; | |||||
401 | } | |||||
402 | int sum = 0; | |||||
403 | for (unsigned i = 0; i < o->arity; i++) | |||||
404 | sum += __kmp_hwloc_count_children_by_type(t, o->children[i], type, f); | |||||
405 | return sum; // will be 0 if no one found (as PU arity is 0) | |||||
406 | } | |||||
407 | ||||||
408 | static int __kmp_hwloc_process_obj_core_pu(AddrUnsPair *addrPair, | |||||
409 | int &nActiveThreads, | |||||
410 | int &num_active_cores, | |||||
411 | hwloc_obj_t obj, int depth, | |||||
412 | int *labels) { | |||||
413 | hwloc_obj_t core = NULL__null; | |||||
414 | hwloc_topology_t &tp = __kmp_hwloc_topology; | |||||
415 | int NC = __kmp_hwloc_count_children_by_type(tp, obj, HWLOC_OBJ_CORE, &core); | |||||
416 | for (int core_id = 0; core_id < NC; ++core_id, core = core->next_cousin) { | |||||
417 | hwloc_obj_t pu = NULL__null; | |||||
418 | KMP_DEBUG_ASSERT(core != NULL)if (!(core != __null)) { __kmp_debug_assert("core != __null", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 418); }; | |||||
419 | int num_active_threads = 0; | |||||
420 | int NT = __kmp_hwloc_count_children_by_type(tp, core, HWLOC_OBJ_PU, &pu); | |||||
421 | // int NT = core->arity; pu = core->first_child; // faster? | |||||
422 | for (int pu_id = 0; pu_id < NT; ++pu_id, pu = pu->next_cousin) { | |||||
423 | KMP_DEBUG_ASSERT(pu != NULL)if (!(pu != __null)) { __kmp_debug_assert("pu != __null", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 423); }; | |||||
424 | if (!KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(pu->os_index)) | |||||
425 | continue; // skip inactive (inaccessible) unit | |||||
426 | Address addr(depth + 2); | |||||
427 | KA_TRACE(20, ("Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n" , obj->os_index, obj->logical_index, core->os_index, core->logical_index, pu->os_index, pu->logical_index ); } | |||||
428 | obj->os_index, obj->logical_index, core->os_index,if (kmp_a_debug >= 20) { __kmp_debug_printf ("Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n" , obj->os_index, obj->logical_index, core->os_index, core->logical_index, pu->os_index, pu->logical_index ); } | |||||
429 | core->logical_index, pu->os_index, pu->logical_index))if (kmp_a_debug >= 20) { __kmp_debug_printf ("Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n" , obj->os_index, obj->logical_index, core->os_index, core->logical_index, pu->os_index, pu->logical_index ); }; | |||||
430 | for (int i = 0; i < depth; ++i) | |||||
431 | addr.labels[i] = labels[i]; // package, etc. | |||||
432 | addr.labels[depth] = core_id; // core | |||||
433 | addr.labels[depth + 1] = pu_id; // pu | |||||
434 | addrPair[nActiveThreads] = AddrUnsPair(addr, pu->os_index); | |||||
435 | __kmp_pu_os_idx[nActiveThreads] = pu->os_index; | |||||
436 | nActiveThreads++; | |||||
437 | ++num_active_threads; // count active threads per core | |||||
438 | } | |||||
439 | if (num_active_threads) { // were there any active threads on the core? | |||||
440 | ++__kmp_ncores; // count total active cores | |||||
441 | ++num_active_cores; // count active cores per socket | |||||
442 | if (num_active_threads > __kmp_nThreadsPerCore) | |||||
443 | __kmp_nThreadsPerCore = num_active_threads; // calc maximum | |||||
444 | } | |||||
445 | } | |||||
446 | return 0; | |||||
447 | } | |||||
448 | ||||||
449 | // Check if NUMA node detected below the package, | |||||
450 | // and if tile object is detected and return its depth | |||||
451 | static int __kmp_hwloc_check_numa() { | |||||
452 | hwloc_topology_t &tp = __kmp_hwloc_topology; | |||||
453 | hwloc_obj_t hT, hC, hL, hN, hS; // hwloc objects (pointers to) | |||||
454 | int depth; | |||||
455 | ||||||
456 | // Get some PU | |||||
457 | hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, 0); | |||||
458 | if (hT == NULL__null) // something has gone wrong | |||||
459 | return 1; | |||||
460 | ||||||
461 | // check NUMA node below PACKAGE | |||||
462 | hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT); | |||||
463 | hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT); | |||||
464 | KMP_DEBUG_ASSERT(hS != NULL)if (!(hS != __null)) { __kmp_debug_assert("hS != __null", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 464); }; | |||||
465 | if (hN != NULL__null && hN->depth > hS->depth) { | |||||
466 | __kmp_numa_detected = TRUE(!0); // socket includes node(s) | |||||
467 | if (__kmp_affinity_gran == affinity_gran_node) { | |||||
468 | __kmp_affinity_gran == affinity_gran_numa; | |||||
469 | } | |||||
470 | } | |||||
471 | ||||||
472 | // check tile, get object by depth because of multiple caches possible | |||||
473 | depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED); | |||||
474 | hL = hwloc_get_ancestor_obj_by_depth(tp, depth, hT); | |||||
475 | hC = NULL__null; // not used, but reset it here just in case | |||||
476 | if (hL != NULL__null && | |||||
477 | __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC) > 1) | |||||
478 | __kmp_tile_depth = depth; // tile consists of multiple cores | |||||
479 | return 0; | |||||
480 | } | |||||
481 | ||||||
482 | static int __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os, | |||||
483 | kmp_i18n_id_t *const msg_id) { | |||||
484 | hwloc_topology_t &tp = __kmp_hwloc_topology; // shortcut of a long name | |||||
485 | *address2os = NULL__null; | |||||
486 | *msg_id = kmp_i18n_null; | |||||
487 | ||||||
488 | // Save the affinity mask for the current thread. | |||||
489 | kmp_affin_mask_t *oldMask; | |||||
490 | KMP_CPU_ALLOC(oldMask)(oldMask = __kmp_affinity_dispatch->allocate_mask()); | |||||
491 | __kmp_get_system_affinity(oldMask, TRUE)(oldMask)->get_system_affinity((!0)); | |||||
492 | __kmp_hwloc_check_numa(); | |||||
493 | ||||||
494 | if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) { | |||||
495 | // Hack to try and infer the machine topology using only the data | |||||
496 | // available from cpuid on the current thread, and __kmp_xproc. | |||||
497 | KMP_ASSERT(__kmp_affinity_type == affinity_none)if (!(__kmp_affinity_type == affinity_none)) { __kmp_debug_assert ("__kmp_affinity_type == affinity_none", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 497); }; | |||||
498 | ||||||
499 | nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj( | |||||
500 | hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0), HWLOC_OBJ_CORE); | |||||
501 | __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj( | |||||
502 | hwloc_get_obj_by_type(tp, HWLOC_OBJ_CORE, 0), HWLOC_OBJ_PU); | |||||
503 | __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore; | |||||
504 | nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg; | |||||
505 | if (__kmp_affinity_verbose) { | |||||
506 | KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffNotCapableUseLocCpuidL11 , "KMP_AFFINITY"), __kmp_msg_null); | |||||
507 | KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc , "KMP_AFFINITY", __kmp_avail_proc), __kmp_msg_null); | |||||
508 | if (__kmp_affinity_uniform_topology()) { | |||||
509 | KMP_INFORM(Uniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Uniform , "KMP_AFFINITY"), __kmp_msg_null); | |||||
510 | } else { | |||||
511 | KMP_INFORM(NonUniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_NonUniform , "KMP_AFFINITY"), __kmp_msg_null); | |||||
512 | } | |||||
513 | KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology , "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore , __kmp_ncores), __kmp_msg_null) | |||||
514 | __kmp_nThreadsPerCore, __kmp_ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology , "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore , __kmp_ncores), __kmp_msg_null); | |||||
515 | } | |||||
516 | KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask); | |||||
517 | return 0; | |||||
518 | } | |||||
519 | ||||||
520 | int depth = 3; | |||||
521 | int levels[5] = {0, 1, 2, 3, 4}; // package, [node,] [tile,] core, thread | |||||
522 | int labels[3] = {0}; // package [,node] [,tile] - head of lables array | |||||
523 | if (__kmp_numa_detected) | |||||
524 | ++depth; | |||||
525 | if (__kmp_tile_depth) | |||||
526 | ++depth; | |||||
527 | ||||||
528 | // Allocate the data structure to be returned. | |||||
529 | AddrUnsPair *retval = | |||||
530 | (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc)___kmp_allocate((sizeof(AddrUnsPair) * __kmp_avail_proc), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 530); | |||||
531 | KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL)if (!(__kmp_pu_os_idx == __null)) { __kmp_debug_assert("__kmp_pu_os_idx == __null" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 531); }; | |||||
532 | __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc)___kmp_allocate((sizeof(int) * __kmp_avail_proc), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 532); | |||||
533 | ||||||
534 | // When affinity is off, this routine will still be called to set | |||||
535 | // __kmp_ncores, as well as __kmp_nThreadsPerCore, | |||||
536 | // nCoresPerPkg, & nPackages. Make sure all these vars are set | |||||
537 | // correctly, and return if affinity is not enabled. | |||||
538 | ||||||
539 | hwloc_obj_t socket, node, tile; | |||||
540 | int nActiveThreads = 0; | |||||
541 | int socket_id = 0; | |||||
542 | // re-calculate globals to count only accessible resources | |||||
543 | __kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0; | |||||
544 | nNodePerPkg = nTilePerPkg = nTilePerNode = nCorePerNode = nCorePerTile = 0; | |||||
545 | for (socket = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0); socket != NULL__null; | |||||
546 | socket = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, socket), | |||||
547 | socket_id++) { | |||||
548 | labels[0] = socket_id; | |||||
549 | if (__kmp_numa_detected) { | |||||
550 | int NN; | |||||
551 | int n_active_nodes = 0; | |||||
552 | node = NULL__null; | |||||
553 | NN = __kmp_hwloc_count_children_by_type(tp, socket, HWLOC_OBJ_NUMANODE, | |||||
554 | &node); | |||||
555 | for (int node_id = 0; node_id < NN; ++node_id, node = node->next_cousin) { | |||||
556 | labels[1] = node_id; | |||||
557 | if (__kmp_tile_depth) { | |||||
558 | // NUMA + tiles | |||||
559 | int NT; | |||||
560 | int n_active_tiles = 0; | |||||
561 | tile = NULL__null; | |||||
562 | NT = __kmp_hwloc_count_children_by_depth(tp, node, __kmp_tile_depth, | |||||
563 | &tile); | |||||
564 | for (int tl_id = 0; tl_id < NT; ++tl_id, tile = tile->next_cousin) { | |||||
565 | labels[2] = tl_id; | |||||
566 | int n_active_cores = 0; | |||||
567 | __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads, | |||||
568 | n_active_cores, tile, 3, labels); | |||||
569 | if (n_active_cores) { // were there any active cores on the socket? | |||||
570 | ++n_active_tiles; // count active tiles per node | |||||
571 | if (n_active_cores > nCorePerTile) | |||||
572 | nCorePerTile = n_active_cores; // calc maximum | |||||
573 | } | |||||
574 | } | |||||
575 | if (n_active_tiles) { // were there any active tiles on the socket? | |||||
576 | ++n_active_nodes; // count active nodes per package | |||||
577 | if (n_active_tiles > nTilePerNode) | |||||
578 | nTilePerNode = n_active_tiles; // calc maximum | |||||
579 | } | |||||
580 | } else { | |||||
581 | // NUMA, no tiles | |||||
582 | int n_active_cores = 0; | |||||
583 | __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads, | |||||
584 | n_active_cores, node, 2, labels); | |||||
585 | if (n_active_cores) { // were there any active cores on the socket? | |||||
586 | ++n_active_nodes; // count active nodes per package | |||||
587 | if (n_active_cores > nCorePerNode) | |||||
588 | nCorePerNode = n_active_cores; // calc maximum | |||||
589 | } | |||||
590 | } | |||||
591 | } | |||||
592 | if (n_active_nodes) { // were there any active nodes on the socket? | |||||
593 | ++nPackages; // count total active packages | |||||
594 | if (n_active_nodes > nNodePerPkg) | |||||
595 | nNodePerPkg = n_active_nodes; // calc maximum | |||||
596 | } | |||||
597 | } else { | |||||
598 | if (__kmp_tile_depth) { | |||||
599 | // no NUMA, tiles | |||||
600 | int NT; | |||||
601 | int n_active_tiles = 0; | |||||
602 | tile = NULL__null; | |||||
603 | NT = __kmp_hwloc_count_children_by_depth(tp, socket, __kmp_tile_depth, | |||||
604 | &tile); | |||||
605 | for (int tl_id = 0; tl_id < NT; ++tl_id, tile = tile->next_cousin) { | |||||
606 | labels[1] = tl_id; | |||||
607 | int n_active_cores = 0; | |||||
608 | __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads, | |||||
609 | n_active_cores, tile, 2, labels); | |||||
610 | if (n_active_cores) { // were there any active cores on the socket? | |||||
611 | ++n_active_tiles; // count active tiles per package | |||||
612 | if (n_active_cores > nCorePerTile) | |||||
613 | nCorePerTile = n_active_cores; // calc maximum | |||||
614 | } | |||||
615 | } | |||||
616 | if (n_active_tiles) { // were there any active tiles on the socket? | |||||
617 | ++nPackages; // count total active packages | |||||
618 | if (n_active_tiles > nTilePerPkg) | |||||
619 | nTilePerPkg = n_active_tiles; // calc maximum | |||||
620 | } | |||||
621 | } else { | |||||
622 | // no NUMA, no tiles | |||||
623 | int n_active_cores = 0; | |||||
624 | __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads, n_active_cores, | |||||
625 | socket, 1, labels); | |||||
626 | if (n_active_cores) { // were there any active cores on the socket? | |||||
627 | ++nPackages; // count total active packages | |||||
628 | if (n_active_cores > nCoresPerPkg) | |||||
629 | nCoresPerPkg = n_active_cores; // calc maximum | |||||
630 | } | |||||
631 | } | |||||
632 | } | |||||
633 | } | |||||
634 | ||||||
635 | // If there's only one thread context to bind to, return now. | |||||
636 | KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc)if (!(nActiveThreads == __kmp_avail_proc)) { __kmp_debug_assert ("nActiveThreads == __kmp_avail_proc", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 636); }; | |||||
637 | KMP_ASSERT(nActiveThreads > 0)if (!(nActiveThreads > 0)) { __kmp_debug_assert("nActiveThreads > 0" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 637); }; | |||||
638 | if (nActiveThreads == 1) { | |||||
639 | __kmp_ncores = nPackages = 1; | |||||
640 | __kmp_nThreadsPerCore = nCoresPerPkg = 1; | |||||
641 | if (__kmp_affinity_verbose) { | |||||
642 | char buf[KMP_AFFIN_MASK_PRINT_LEN1024]; | |||||
643 | __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN1024, oldMask); | |||||
644 | ||||||
645 | KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffUsingHwloc , "KMP_AFFINITY"), __kmp_msg_null); | |||||
646 | if (__kmp_affinity_respect_mask) { | |||||
647 | KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetRespect , "KMP_AFFINITY", buf), __kmp_msg_null); | |||||
648 | } else { | |||||
649 | KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetNotRespect , "KMP_AFFINITY", buf), __kmp_msg_null); | |||||
650 | } | |||||
651 | KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc , "KMP_AFFINITY", __kmp_avail_proc), __kmp_msg_null); | |||||
652 | KMP_INFORM(Uniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Uniform , "KMP_AFFINITY"), __kmp_msg_null); | |||||
653 | KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology , "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore , __kmp_ncores), __kmp_msg_null) | |||||
654 | __kmp_nThreadsPerCore, __kmp_ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology , "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore , __kmp_ncores), __kmp_msg_null); | |||||
655 | } | |||||
656 | ||||||
657 | if (__kmp_affinity_type == affinity_none) { | |||||
658 | __kmp_free(retval)___kmp_free((retval), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 658); | |||||
659 | KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask); | |||||
660 | return 0; | |||||
661 | } | |||||
662 | ||||||
663 | // Form an Address object which only includes the package level. | |||||
664 | Address addr(1); | |||||
665 | addr.labels[0] = retval[0].first.labels[0]; | |||||
666 | retval[0].first = addr; | |||||
667 | ||||||
668 | if (__kmp_affinity_gran_levels < 0) { | |||||
669 | __kmp_affinity_gran_levels = 0; | |||||
670 | } | |||||
671 | ||||||
672 | if (__kmp_affinity_verbose) { | |||||
673 | __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1); | |||||
674 | } | |||||
675 | ||||||
676 | *address2os = retval; | |||||
677 | KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask); | |||||
678 | return 1; | |||||
679 | } | |||||
680 | ||||||
681 | // Sort the table by physical Id. | |||||
682 | qsort(retval, nActiveThreads, sizeof(*retval), | |||||
683 | __kmp_affinity_cmp_Address_labels); | |||||
684 | ||||||
685 | // Check to see if the machine topology is uniform | |||||
686 | int nPUs = nPackages * __kmp_nThreadsPerCore; | |||||
687 | if (__kmp_numa_detected) { | |||||
688 | if (__kmp_tile_depth) { // NUMA + tiles | |||||
689 | nPUs *= (nNodePerPkg * nTilePerNode * nCorePerTile); | |||||
690 | } else { // NUMA, no tiles | |||||
691 | nPUs *= (nNodePerPkg * nCorePerNode); | |||||
692 | } | |||||
693 | } else { | |||||
694 | if (__kmp_tile_depth) { // no NUMA, tiles | |||||
695 | nPUs *= (nTilePerPkg * nCorePerTile); | |||||
696 | } else { // no NUMA, no tiles | |||||
697 | nPUs *= nCoresPerPkg; | |||||
698 | } | |||||
699 | } | |||||
700 | unsigned uniform = (nPUs == nActiveThreads); | |||||
701 | ||||||
702 | // Print the machine topology summary. | |||||
703 | if (__kmp_affinity_verbose) { | |||||
704 | char mask[KMP_AFFIN_MASK_PRINT_LEN1024]; | |||||
705 | __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN1024, oldMask); | |||||
706 | if (__kmp_affinity_respect_mask) { | |||||
707 | KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetRespect , "KMP_AFFINITY", mask), __kmp_msg_null); | |||||
708 | } else { | |||||
709 | KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetNotRespect , "KMP_AFFINITY", mask), __kmp_msg_null); | |||||
710 | } | |||||
711 | KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc , "KMP_AFFINITY", __kmp_avail_proc), __kmp_msg_null); | |||||
712 | if (uniform) { | |||||
713 | KMP_INFORM(Uniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Uniform , "KMP_AFFINITY"), __kmp_msg_null); | |||||
714 | } else { | |||||
715 | KMP_INFORM(NonUniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_NonUniform , "KMP_AFFINITY"), __kmp_msg_null); | |||||
716 | } | |||||
717 | if (__kmp_numa_detected) { | |||||
718 | if (__kmp_tile_depth) { // NUMA + tiles | |||||
719 | KMP_INFORM(TopologyExtraNoTi, "KMP_AFFINITY", nPackages, nNodePerPkg,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyExtraNoTi , "KMP_AFFINITY", nPackages, nNodePerPkg, nTilePerNode, nCorePerTile , __kmp_nThreadsPerCore, __kmp_ncores), __kmp_msg_null) | |||||
720 | nTilePerNode, nCorePerTile, __kmp_nThreadsPerCore,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyExtraNoTi , "KMP_AFFINITY", nPackages, nNodePerPkg, nTilePerNode, nCorePerTile , __kmp_nThreadsPerCore, __kmp_ncores), __kmp_msg_null) | |||||
721 | __kmp_ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyExtraNoTi , "KMP_AFFINITY", nPackages, nNodePerPkg, nTilePerNode, nCorePerTile , __kmp_nThreadsPerCore, __kmp_ncores), __kmp_msg_null); | |||||
722 | } else { // NUMA, no tiles | |||||
723 | KMP_INFORM(TopologyExtraNode, "KMP_AFFINITY", nPackages, nNodePerPkg,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyExtraNode , "KMP_AFFINITY", nPackages, nNodePerPkg, nCorePerNode, __kmp_nThreadsPerCore , __kmp_ncores), __kmp_msg_null) | |||||
724 | nCorePerNode, __kmp_nThreadsPerCore, __kmp_ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyExtraNode , "KMP_AFFINITY", nPackages, nNodePerPkg, nCorePerNode, __kmp_nThreadsPerCore , __kmp_ncores), __kmp_msg_null); | |||||
725 | nPUs *= (nNodePerPkg * nCorePerNode); | |||||
726 | } | |||||
727 | } else { | |||||
728 | if (__kmp_tile_depth) { // no NUMA, tiles | |||||
729 | KMP_INFORM(TopologyExtraTile, "KMP_AFFINITY", nPackages, nTilePerPkg,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyExtraTile , "KMP_AFFINITY", nPackages, nTilePerPkg, nCorePerTile, __kmp_nThreadsPerCore , __kmp_ncores), __kmp_msg_null) | |||||
730 | nCorePerTile, __kmp_nThreadsPerCore, __kmp_ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyExtraTile , "KMP_AFFINITY", nPackages, nTilePerPkg, nCorePerTile, __kmp_nThreadsPerCore , __kmp_ncores), __kmp_msg_null); | |||||
731 | } else { // no NUMA, no tiles | |||||
732 | kmp_str_buf_t buf; | |||||
733 | __kmp_str_buf_init(&buf){ (&buf)->str = (&buf)->bulk; (&buf)->size = sizeof((&buf)->bulk); (&buf)->used = 0; (& buf)->bulk[0] = 0; }; | |||||
734 | __kmp_str_buf_print(&buf, "%d", nPackages); | |||||
735 | KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyExtra , "KMP_AFFINITY", buf.str, nCoresPerPkg, __kmp_nThreadsPerCore , __kmp_ncores), __kmp_msg_null) | |||||
736 | __kmp_nThreadsPerCore, __kmp_ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyExtra , "KMP_AFFINITY", buf.str, nCoresPerPkg, __kmp_nThreadsPerCore , __kmp_ncores), __kmp_msg_null); | |||||
737 | __kmp_str_buf_free(&buf); | |||||
738 | } | |||||
739 | } | |||||
740 | } | |||||
741 | ||||||
742 | if (__kmp_affinity_type == affinity_none) { | |||||
743 | __kmp_free(retval)___kmp_free((retval), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 743); | |||||
744 | KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask); | |||||
745 | return 0; | |||||
746 | } | |||||
747 | ||||||
748 | int depth_full = depth; // number of levels before compressing | |||||
749 | // Find any levels with radiix 1, and remove them from the map | |||||
750 | // (except for the package level). | |||||
751 | depth = __kmp_affinity_remove_radix_one_levels(retval, nActiveThreads, depth, | |||||
752 | levels); | |||||
753 | KMP_DEBUG_ASSERT(__kmp_affinity_gran != affinity_gran_default)if (!(__kmp_affinity_gran != affinity_gran_default)) { __kmp_debug_assert ("__kmp_affinity_gran != affinity_gran_default", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 753); }; | |||||
754 | if (__kmp_affinity_gran_levels < 0) { | |||||
755 | // Set the granularity level based on what levels are modeled | |||||
756 | // in the machine topology map. | |||||
757 | __kmp_affinity_gran_levels = 0; // lowest level (e.g. fine) | |||||
758 | if (__kmp_affinity_gran > affinity_gran_thread) { | |||||
759 | for (int i = 1; i <= depth_full; ++i) { | |||||
760 | if (__kmp_affinity_gran <= i) // only count deeper levels | |||||
761 | break; | |||||
762 | if (levels[depth_full - i] > 0) | |||||
763 | __kmp_affinity_gran_levels++; | |||||
764 | } | |||||
765 | } | |||||
766 | if (__kmp_affinity_gran > affinity_gran_package) | |||||
767 | __kmp_affinity_gran_levels++; // e.g. granularity = group | |||||
768 | } | |||||
769 | ||||||
770 | if (__kmp_affinity_verbose) | |||||
771 | __kmp_affinity_print_hwloc_tp(retval, nActiveThreads, depth, levels); | |||||
772 | ||||||
773 | KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask); | |||||
774 | *address2os = retval; | |||||
775 | return depth; | |||||
776 | } | |||||
777 | #endif // KMP_USE_HWLOC | |||||
778 | ||||||
779 | // If we don't know how to retrieve the machine's processor topology, or | |||||
780 | // encounter an error in doing so, this routine is called to form a "flat" | |||||
781 | // mapping of os thread id's <-> processor id's. | |||||
782 | static int __kmp_affinity_create_flat_map(AddrUnsPair **address2os, | |||||
783 | kmp_i18n_id_t *const msg_id) { | |||||
784 | *address2os = NULL__null; | |||||
785 | *msg_id = kmp_i18n_null; | |||||
786 | ||||||
787 | // Even if __kmp_affinity_type == affinity_none, this routine might still | |||||
788 | // called to set __kmp_ncores, as well as | |||||
789 | // __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages. | |||||
790 | if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) { | |||||
791 | KMP_ASSERT(__kmp_affinity_type == affinity_none)if (!(__kmp_affinity_type == affinity_none)) { __kmp_debug_assert ("__kmp_affinity_type == affinity_none", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 791); }; | |||||
792 | __kmp_ncores = nPackages = __kmp_xproc; | |||||
793 | __kmp_nThreadsPerCore = nCoresPerPkg = 1; | |||||
794 | if (__kmp_affinity_verbose) { | |||||
795 | KMP_INFORM(AffFlatTopology, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffFlatTopology , "KMP_AFFINITY"), __kmp_msg_null); | |||||
796 | KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc , "KMP_AFFINITY", __kmp_avail_proc), __kmp_msg_null); | |||||
797 | KMP_INFORM(Uniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Uniform , "KMP_AFFINITY"), __kmp_msg_null); | |||||
798 | KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology , "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore , __kmp_ncores), __kmp_msg_null) | |||||
799 | __kmp_nThreadsPerCore, __kmp_ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology , "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore , __kmp_ncores), __kmp_msg_null); | |||||
800 | } | |||||
801 | return 0; | |||||
802 | } | |||||
803 | ||||||
804 | // When affinity is off, this routine will still be called to set | |||||
805 | // __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages. | |||||
806 | // Make sure all these vars are set correctly, and return now if affinity is | |||||
807 | // not enabled. | |||||
808 | __kmp_ncores = nPackages = __kmp_avail_proc; | |||||
809 | __kmp_nThreadsPerCore = nCoresPerPkg = 1; | |||||
810 | if (__kmp_affinity_verbose) { | |||||
811 | char buf[KMP_AFFIN_MASK_PRINT_LEN1024]; | |||||
812 | __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN1024, | |||||
813 | __kmp_affin_fullMask); | |||||
814 | ||||||
815 | KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffCapableUseFlat , "KMP_AFFINITY"), __kmp_msg_null); | |||||
816 | if (__kmp_affinity_respect_mask) { | |||||
817 | KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetRespect , "KMP_AFFINITY", buf), __kmp_msg_null); | |||||
818 | } else { | |||||
819 | KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetNotRespect , "KMP_AFFINITY", buf), __kmp_msg_null); | |||||
820 | } | |||||
821 | KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc , "KMP_AFFINITY", __kmp_avail_proc), __kmp_msg_null); | |||||
822 | KMP_INFORM(Uniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Uniform , "KMP_AFFINITY"), __kmp_msg_null); | |||||
823 | KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology , "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore , __kmp_ncores), __kmp_msg_null) | |||||
824 | __kmp_nThreadsPerCore, __kmp_ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology , "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore , __kmp_ncores), __kmp_msg_null); | |||||
825 | } | |||||
826 | KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL)if (!(__kmp_pu_os_idx == __null)) { __kmp_debug_assert("__kmp_pu_os_idx == __null" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 826); }; | |||||
827 | __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc)___kmp_allocate((sizeof(int) * __kmp_avail_proc), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 827); | |||||
828 | if (__kmp_affinity_type == affinity_none) { | |||||
829 | int avail_ct = 0; | |||||
830 | int i; | |||||
831 | KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask)for (i = (__kmp_affin_fullMask)->begin(); (int)i != (__kmp_affin_fullMask )->end(); i = (__kmp_affin_fullMask)->next(i)) { | |||||
832 | if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(i)) | |||||
833 | continue; | |||||
834 | __kmp_pu_os_idx[avail_ct++] = i; // suppose indices are flat | |||||
835 | } | |||||
836 | return 0; | |||||
837 | } | |||||
838 | ||||||
839 | // Contruct the data structure to be returned. | |||||
840 | *address2os = | |||||
841 | (AddrUnsPair *)__kmp_allocate(sizeof(**address2os) * __kmp_avail_proc)___kmp_allocate((sizeof(**address2os) * __kmp_avail_proc), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 841); | |||||
842 | int avail_ct = 0; | |||||
843 | int i; | |||||
844 | KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask)for (i = (__kmp_affin_fullMask)->begin(); (int)i != (__kmp_affin_fullMask )->end(); i = (__kmp_affin_fullMask)->next(i)) { | |||||
845 | // Skip this proc if it is not included in the machine model. | |||||
846 | if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(i)) { | |||||
847 | continue; | |||||
848 | } | |||||
849 | __kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat | |||||
850 | Address addr(1); | |||||
851 | addr.labels[0] = i; | |||||
852 | (*address2os)[avail_ct++] = AddrUnsPair(addr, i); | |||||
853 | } | |||||
854 | if (__kmp_affinity_verbose) { | |||||
855 | KMP_INFORM(OSProcToPackage, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_OSProcToPackage , "KMP_AFFINITY"), __kmp_msg_null); | |||||
856 | } | |||||
857 | ||||||
858 | if (__kmp_affinity_gran_levels < 0) { | |||||
859 | // Only the package level is modeled in the machine topology map, | |||||
860 | // so the #levels of granularity is either 0 or 1. | |||||
861 | if (__kmp_affinity_gran > affinity_gran_package) { | |||||
862 | __kmp_affinity_gran_levels = 1; | |||||
863 | } else { | |||||
864 | __kmp_affinity_gran_levels = 0; | |||||
865 | } | |||||
866 | } | |||||
867 | return 1; | |||||
868 | } | |||||
869 | ||||||
870 | #if KMP_GROUP_AFFINITY0 | |||||
871 | ||||||
872 | // If multiple Windows* OS processor groups exist, we can create a 2-level | |||||
873 | // topology map with the groups at level 0 and the individual procs at level 1. | |||||
874 | // This facilitates letting the threads float among all procs in a group, | |||||
875 | // if granularity=group (the default when there are multiple groups). | |||||
876 | static int __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os, | |||||
877 | kmp_i18n_id_t *const msg_id) { | |||||
878 | *address2os = NULL__null; | |||||
879 | *msg_id = kmp_i18n_null; | |||||
880 | ||||||
881 | // If we aren't affinity capable, then return now. | |||||
882 | // The flat mapping will be used. | |||||
883 | if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) { | |||||
884 | // FIXME set *msg_id | |||||
885 | return -1; | |||||
886 | } | |||||
887 | ||||||
888 | // Contruct the data structure to be returned. | |||||
889 | *address2os = | |||||
890 | (AddrUnsPair *)__kmp_allocate(sizeof(**address2os) * __kmp_avail_proc)___kmp_allocate((sizeof(**address2os) * __kmp_avail_proc), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 890); | |||||
891 | KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL)if (!(__kmp_pu_os_idx == __null)) { __kmp_debug_assert("__kmp_pu_os_idx == __null" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 891); }; | |||||
892 | __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc)___kmp_allocate((sizeof(int) * __kmp_avail_proc), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 892); | |||||
893 | int avail_ct = 0; | |||||
894 | int i; | |||||
895 | KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask)for (i = (__kmp_affin_fullMask)->begin(); (int)i != (__kmp_affin_fullMask )->end(); i = (__kmp_affin_fullMask)->next(i)) { | |||||
896 | // Skip this proc if it is not included in the machine model. | |||||
897 | if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(i)) { | |||||
898 | continue; | |||||
899 | } | |||||
900 | __kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat | |||||
901 | Address addr(2); | |||||
902 | addr.labels[0] = i / (CHAR_BIT8 * sizeof(DWORD_PTR)); | |||||
903 | addr.labels[1] = i % (CHAR_BIT8 * sizeof(DWORD_PTR)); | |||||
904 | (*address2os)[avail_ct++] = AddrUnsPair(addr, i); | |||||
905 | ||||||
906 | if (__kmp_affinity_verbose) { | |||||
907 | KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffOSProcToGroup , "KMP_AFFINITY", i, addr.labels[0], addr.labels[1]), __kmp_msg_null ) | |||||
908 | addr.labels[1])__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffOSProcToGroup , "KMP_AFFINITY", i, addr.labels[0], addr.labels[1]), __kmp_msg_null ); | |||||
909 | } | |||||
910 | } | |||||
911 | ||||||
912 | if (__kmp_affinity_gran_levels < 0) { | |||||
913 | if (__kmp_affinity_gran == affinity_gran_group) { | |||||
914 | __kmp_affinity_gran_levels = 1; | |||||
915 | } else if ((__kmp_affinity_gran == affinity_gran_fine) || | |||||
916 | (__kmp_affinity_gran == affinity_gran_thread)) { | |||||
917 | __kmp_affinity_gran_levels = 0; | |||||
918 | } else { | |||||
919 | const char *gran_str = NULL__null; | |||||
920 | if (__kmp_affinity_gran == affinity_gran_core) { | |||||
921 | gran_str = "core"; | |||||
922 | } else if (__kmp_affinity_gran == affinity_gran_package) { | |||||
923 | gran_str = "package"; | |||||
924 | } else if (__kmp_affinity_gran == affinity_gran_node) { | |||||
925 | gran_str = "node"; | |||||
926 | } else { | |||||
927 | KMP_ASSERT(0)if (!(0)) { __kmp_debug_assert("0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 927); }; | |||||
928 | } | |||||
929 | ||||||
930 | // Warning: can't use affinity granularity \"gran\" with group topology | |||||
931 | // method, using "thread" | |||||
932 | __kmp_affinity_gran_levels = 0; | |||||
933 | } | |||||
934 | } | |||||
935 | return 2; | |||||
936 | } | |||||
937 | ||||||
938 | #endif /* KMP_GROUP_AFFINITY */ | |||||
939 | ||||||
940 | #if KMP_ARCH_X860 || KMP_ARCH_X86_641 | |||||
941 | ||||||
942 | static int __kmp_cpuid_mask_width(int count) { | |||||
943 | int r = 0; | |||||
944 | ||||||
945 | while ((1 << r) < count) | |||||
946 | ++r; | |||||
947 | return r; | |||||
948 | } | |||||
949 | ||||||
950 | class apicThreadInfo { | |||||
951 | public: | |||||
952 | unsigned osId; // param to __kmp_affinity_bind_thread | |||||
953 | unsigned apicId; // from cpuid after binding | |||||
954 | unsigned maxCoresPerPkg; // "" | |||||
955 | unsigned maxThreadsPerPkg; // "" | |||||
956 | unsigned pkgId; // inferred from above values | |||||
957 | unsigned coreId; // "" | |||||
958 | unsigned threadId; // "" | |||||
959 | }; | |||||
960 | ||||||
961 | static int __kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a, | |||||
962 | const void *b) { | |||||
963 | const apicThreadInfo *aa = (const apicThreadInfo *)a; | |||||
964 | const apicThreadInfo *bb = (const apicThreadInfo *)b; | |||||
965 | if (aa->pkgId < bb->pkgId) | |||||
966 | return -1; | |||||
967 | if (aa->pkgId > bb->pkgId) | |||||
968 | return 1; | |||||
969 | if (aa->coreId < bb->coreId) | |||||
970 | return -1; | |||||
971 | if (aa->coreId > bb->coreId) | |||||
972 | return 1; | |||||
973 | if (aa->threadId < bb->threadId) | |||||
974 | return -1; | |||||
975 | if (aa->threadId > bb->threadId) | |||||
976 | return 1; | |||||
977 | return 0; | |||||
978 | } | |||||
979 | ||||||
980 | // On IA-32 architecture and Intel(R) 64 architecture, we attempt to use | |||||
981 | // an algorithm which cycles through the available os threads, setting | |||||
982 | // the current thread's affinity mask to that thread, and then retrieves | |||||
983 | // the Apic Id for each thread context using the cpuid instruction. | |||||
984 | static int __kmp_affinity_create_apicid_map(AddrUnsPair **address2os, | |||||
985 | kmp_i18n_id_t *const msg_id) { | |||||
986 | kmp_cpuid buf; | |||||
987 | *address2os = NULL__null; | |||||
988 | *msg_id = kmp_i18n_null; | |||||
989 | ||||||
990 | // Check if cpuid leaf 4 is supported. | |||||
991 | __kmp_x86_cpuid(0, 0, &buf); | |||||
992 | if (buf.eax < 4) { | |||||
993 | *msg_id = kmp_i18n_str_NoLeaf4Support; | |||||
994 | return -1; | |||||
995 | } | |||||
996 | ||||||
997 | // The algorithm used starts by setting the affinity to each available thread | |||||
998 | // and retrieving info from the cpuid instruction, so if we are not capable of | |||||
999 | // calling __kmp_get_system_affinity() and _kmp_get_system_affinity(), then we | |||||
1000 | // need to do something else - use the defaults that we calculated from | |||||
1001 | // issuing cpuid without binding to each proc. | |||||
1002 | if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) { | |||||
1003 | // Hack to try and infer the machine topology using only the data | |||||
1004 | // available from cpuid on the current thread, and __kmp_xproc. | |||||
1005 | KMP_ASSERT(__kmp_affinity_type == affinity_none)if (!(__kmp_affinity_type == affinity_none)) { __kmp_debug_assert ("__kmp_affinity_type == affinity_none", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1005); }; | |||||
1006 | ||||||
1007 | // Get an upper bound on the number of threads per package using cpuid(1). | |||||
1008 | // On some OS/chps combinations where HT is supported by the chip but is | |||||
1009 | // disabled, this value will be 2 on a single core chip. Usually, it will be | |||||
1010 | // 2 if HT is enabled and 1 if HT is disabled. | |||||
1011 | __kmp_x86_cpuid(1, 0, &buf); | |||||
1012 | int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff; | |||||
1013 | if (maxThreadsPerPkg == 0) { | |||||
1014 | maxThreadsPerPkg = 1; | |||||
1015 | } | |||||
1016 | ||||||
1017 | // The num cores per pkg comes from cpuid(4). 1 must be added to the encoded | |||||
1018 | // value. | |||||
1019 | // | |||||
1020 | // The author of cpu_count.cpp treated this only an upper bound on the | |||||
1021 | // number of cores, but I haven't seen any cases where it was greater than | |||||
1022 | // the actual number of cores, so we will treat it as exact in this block of | |||||
1023 | // code. | |||||
1024 | // | |||||
1025 | // First, we need to check if cpuid(4) is supported on this chip. To see if | |||||
1026 | // cpuid(n) is supported, issue cpuid(0) and check if eax has the value n or | |||||
1027 | // greater. | |||||
1028 | __kmp_x86_cpuid(0, 0, &buf); | |||||
1029 | if (buf.eax >= 4) { | |||||
1030 | __kmp_x86_cpuid(4, 0, &buf); | |||||
1031 | nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1; | |||||
1032 | } else { | |||||
1033 | nCoresPerPkg = 1; | |||||
1034 | } | |||||
1035 | ||||||
1036 | // There is no way to reliably tell if HT is enabled without issuing the | |||||
1037 | // cpuid instruction from every thread, can correlating the cpuid info, so | |||||
1038 | // if the machine is not affinity capable, we assume that HT is off. We have | |||||
1039 | // seen quite a few machines where maxThreadsPerPkg is 2, yet the machine | |||||
1040 | // does not support HT. | |||||
1041 | // | |||||
1042 | // - Older OSes are usually found on machines with older chips, which do not | |||||
1043 | // support HT. | |||||
1044 | // - The performance penalty for mistakenly identifying a machine as HT when | |||||
1045 | // it isn't (which results in blocktime being incorrecly set to 0) is | |||||
1046 | // greater than the penalty when for mistakenly identifying a machine as | |||||
1047 | // being 1 thread/core when it is really HT enabled (which results in | |||||
1048 | // blocktime being incorrectly set to a positive value). | |||||
1049 | __kmp_ncores = __kmp_xproc; | |||||
1050 | nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg; | |||||
1051 | __kmp_nThreadsPerCore = 1; | |||||
1052 | if (__kmp_affinity_verbose) { | |||||
1053 | KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffNotCapableUseLocCpuid , "KMP_AFFINITY"), __kmp_msg_null); | |||||
1054 | KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc , "KMP_AFFINITY", __kmp_avail_proc), __kmp_msg_null); | |||||
1055 | if (__kmp_affinity_uniform_topology()) { | |||||
1056 | KMP_INFORM(Uniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Uniform , "KMP_AFFINITY"), __kmp_msg_null); | |||||
1057 | } else { | |||||
1058 | KMP_INFORM(NonUniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_NonUniform , "KMP_AFFINITY"), __kmp_msg_null); | |||||
1059 | } | |||||
1060 | KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology , "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore , __kmp_ncores), __kmp_msg_null) | |||||
1061 | __kmp_nThreadsPerCore, __kmp_ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology , "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore , __kmp_ncores), __kmp_msg_null); | |||||
1062 | } | |||||
1063 | return 0; | |||||
1064 | } | |||||
1065 | ||||||
1066 | // From here on, we can assume that it is safe to call | |||||
1067 | // __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if | |||||
1068 | // __kmp_affinity_type = affinity_none. | |||||
1069 | ||||||
1070 | // Save the affinity mask for the current thread. | |||||
1071 | kmp_affin_mask_t *oldMask; | |||||
1072 | KMP_CPU_ALLOC(oldMask)(oldMask = __kmp_affinity_dispatch->allocate_mask()); | |||||
1073 | KMP_ASSERT(oldMask != NULL)if (!(oldMask != __null)) { __kmp_debug_assert("oldMask != NULL" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1073); }; | |||||
1074 | __kmp_get_system_affinity(oldMask, TRUE)(oldMask)->get_system_affinity((!0)); | |||||
1075 | ||||||
1076 | // Run through each of the available contexts, binding the current thread | |||||
1077 | // to it, and obtaining the pertinent information using the cpuid instr. | |||||
1078 | // | |||||
1079 | // The relevant information is: | |||||
1080 | // - Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context | |||||
1081 | // has a uniqie Apic Id, which is of the form pkg# : core# : thread#. | |||||
1082 | // - Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The value | |||||
1083 | // of this field determines the width of the core# + thread# fields in the | |||||
1084 | // Apic Id. It is also an upper bound on the number of threads per | |||||
1085 | // package, but it has been verified that situations happen were it is not | |||||
1086 | // exact. In particular, on certain OS/chip combinations where Intel(R) | |||||
1087 | // Hyper-Threading Technology is supported by the chip but has been | |||||
1088 | // disabled, the value of this field will be 2 (for a single core chip). | |||||
1089 | // On other OS/chip combinations supporting Intel(R) Hyper-Threading | |||||
1090 | // Technology, the value of this field will be 1 when Intel(R) | |||||
1091 | // Hyper-Threading Technology is disabled and 2 when it is enabled. | |||||
1092 | // - Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The value | |||||
1093 | // of this field (+1) determines the width of the core# field in the Apic | |||||
1094 | // Id. The comments in "cpucount.cpp" say that this value is an upper | |||||
1095 | // bound, but the IA-32 architecture manual says that it is exactly the | |||||
1096 | // number of cores per package, and I haven't seen any case where it | |||||
1097 | // wasn't. | |||||
1098 | // | |||||
1099 | // From this information, deduce the package Id, core Id, and thread Id, | |||||
1100 | // and set the corresponding fields in the apicThreadInfo struct. | |||||
1101 | unsigned i; | |||||
1102 | apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(___kmp_allocate((__kmp_avail_proc * sizeof(apicThreadInfo)), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1103) | |||||
1103 | __kmp_avail_proc * sizeof(apicThreadInfo))___kmp_allocate((__kmp_avail_proc * sizeof(apicThreadInfo)), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1103); | |||||
1104 | unsigned nApics = 0; | |||||
1105 | KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask)for (i = (__kmp_affin_fullMask)->begin(); (int)i != (__kmp_affin_fullMask )->end(); i = (__kmp_affin_fullMask)->next(i)) { | |||||
1106 | // Skip this proc if it is not included in the machine model. | |||||
1107 | if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(i)) { | |||||
1108 | continue; | |||||
1109 | } | |||||
1110 | KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc)if (!((int)nApics < __kmp_avail_proc)) { __kmp_debug_assert ("(int)nApics < __kmp_avail_proc", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1110); }; | |||||
1111 | ||||||
1112 | __kmp_affinity_dispatch->bind_thread(i); | |||||
1113 | threadInfo[nApics].osId = i; | |||||
1114 | ||||||
1115 | // The apic id and max threads per pkg come from cpuid(1). | |||||
1116 | __kmp_x86_cpuid(1, 0, &buf); | |||||
1117 | if (((buf.edx >> 9) & 1) == 0) { | |||||
1118 | __kmp_set_system_affinity(oldMask, TRUE)(oldMask)->set_system_affinity((!0)); | |||||
1119 | __kmp_free(threadInfo)___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1119); | |||||
1120 | KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask); | |||||
1121 | *msg_id = kmp_i18n_str_ApicNotPresent; | |||||
1122 | return -1; | |||||
1123 | } | |||||
1124 | threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff; | |||||
1125 | threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff; | |||||
1126 | if (threadInfo[nApics].maxThreadsPerPkg == 0) { | |||||
1127 | threadInfo[nApics].maxThreadsPerPkg = 1; | |||||
1128 | } | |||||
1129 | ||||||
1130 | // Max cores per pkg comes from cpuid(4). 1 must be added to the encoded | |||||
1131 | // value. | |||||
1132 | // | |||||
1133 | // First, we need to check if cpuid(4) is supported on this chip. To see if | |||||
1134 | // cpuid(n) is supported, issue cpuid(0) and check if eax has the value n | |||||
1135 | // or greater. | |||||
1136 | __kmp_x86_cpuid(0, 0, &buf); | |||||
1137 | if (buf.eax >= 4) { | |||||
1138 | __kmp_x86_cpuid(4, 0, &buf); | |||||
1139 | threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1; | |||||
1140 | } else { | |||||
1141 | threadInfo[nApics].maxCoresPerPkg = 1; | |||||
1142 | } | |||||
1143 | ||||||
1144 | // Infer the pkgId / coreId / threadId using only the info obtained locally. | |||||
1145 | int widthCT = __kmp_cpuid_mask_width(threadInfo[nApics].maxThreadsPerPkg); | |||||
1146 | threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT; | |||||
1147 | ||||||
1148 | int widthC = __kmp_cpuid_mask_width(threadInfo[nApics].maxCoresPerPkg); | |||||
1149 | int widthT = widthCT - widthC; | |||||
1150 | if (widthT < 0) { | |||||
1151 | // I've never seen this one happen, but I suppose it could, if the cpuid | |||||
1152 | // instruction on a chip was really screwed up. Make sure to restore the | |||||
1153 | // affinity mask before the tail call. | |||||
1154 | __kmp_set_system_affinity(oldMask, TRUE)(oldMask)->set_system_affinity((!0)); | |||||
1155 | __kmp_free(threadInfo)___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1155); | |||||
1156 | KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask); | |||||
1157 | *msg_id = kmp_i18n_str_InvalidCpuidInfo; | |||||
1158 | return -1; | |||||
1159 | } | |||||
1160 | ||||||
1161 | int maskC = (1 << widthC) - 1; | |||||
1162 | threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT) & maskC; | |||||
1163 | ||||||
1164 | int maskT = (1 << widthT) - 1; | |||||
1165 | threadInfo[nApics].threadId = threadInfo[nApics].apicId & maskT; | |||||
1166 | ||||||
1167 | nApics++; | |||||
1168 | } | |||||
1169 | ||||||
1170 | // We've collected all the info we need. | |||||
1171 | // Restore the old affinity mask for this thread. | |||||
1172 | __kmp_set_system_affinity(oldMask, TRUE)(oldMask)->set_system_affinity((!0)); | |||||
1173 | ||||||
1174 | // If there's only one thread context to bind to, form an Address object | |||||
1175 | // with depth 1 and return immediately (or, if affinity is off, set | |||||
1176 | // address2os to NULL and return). | |||||
1177 | // | |||||
1178 | // If it is configured to omit the package level when there is only a single | |||||
1179 | // package, the logic at the end of this routine won't work if there is only | |||||
1180 | // a single thread - it would try to form an Address object with depth 0. | |||||
1181 | KMP_ASSERT(nApics > 0)if (!(nApics > 0)) { __kmp_debug_assert("nApics > 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1181); }; | |||||
1182 | if (nApics == 1) { | |||||
1183 | __kmp_ncores = nPackages = 1; | |||||
1184 | __kmp_nThreadsPerCore = nCoresPerPkg = 1; | |||||
1185 | if (__kmp_affinity_verbose) { | |||||
1186 | char buf[KMP_AFFIN_MASK_PRINT_LEN1024]; | |||||
1187 | __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN1024, oldMask); | |||||
1188 | ||||||
1189 | KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffUseGlobCpuid , "KMP_AFFINITY"), __kmp_msg_null); | |||||
1190 | if (__kmp_affinity_respect_mask) { | |||||
1191 | KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetRespect , "KMP_AFFINITY", buf), __kmp_msg_null); | |||||
1192 | } else { | |||||
1193 | KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetNotRespect , "KMP_AFFINITY", buf), __kmp_msg_null); | |||||
1194 | } | |||||
1195 | KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc , "KMP_AFFINITY", __kmp_avail_proc), __kmp_msg_null); | |||||
1196 | KMP_INFORM(Uniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Uniform , "KMP_AFFINITY"), __kmp_msg_null); | |||||
1197 | KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology , "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore , __kmp_ncores), __kmp_msg_null) | |||||
1198 | __kmp_nThreadsPerCore, __kmp_ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology , "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore , __kmp_ncores), __kmp_msg_null); | |||||
1199 | } | |||||
1200 | ||||||
1201 | if (__kmp_affinity_type == affinity_none) { | |||||
1202 | __kmp_free(threadInfo)___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1202); | |||||
1203 | KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask); | |||||
1204 | return 0; | |||||
1205 | } | |||||
1206 | ||||||
1207 | *address2os = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair))___kmp_allocate((sizeof(AddrUnsPair)), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1207); | |||||
1208 | Address addr(1); | |||||
1209 | addr.labels[0] = threadInfo[0].pkgId; | |||||
1210 | (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId); | |||||
1211 | ||||||
1212 | if (__kmp_affinity_gran_levels < 0) { | |||||
1213 | __kmp_affinity_gran_levels = 0; | |||||
1214 | } | |||||
1215 | ||||||
1216 | if (__kmp_affinity_verbose) { | |||||
1217 | __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1); | |||||
1218 | } | |||||
1219 | ||||||
1220 | __kmp_free(threadInfo)___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1220); | |||||
1221 | KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask); | |||||
1222 | return 1; | |||||
1223 | } | |||||
1224 | ||||||
1225 | // Sort the threadInfo table by physical Id. | |||||
1226 | qsort(threadInfo, nApics, sizeof(*threadInfo), | |||||
1227 | __kmp_affinity_cmp_apicThreadInfo_phys_id); | |||||
1228 | ||||||
1229 | // The table is now sorted by pkgId / coreId / threadId, but we really don't | |||||
1230 | // know the radix of any of the fields. pkgId's may be sparsely assigned among | |||||
1231 | // the chips on a system. Although coreId's are usually assigned | |||||
1232 | // [0 .. coresPerPkg-1] and threadId's are usually assigned | |||||
1233 | // [0..threadsPerCore-1], we don't want to make any such assumptions. | |||||
1234 | // | |||||
1235 | // For that matter, we don't know what coresPerPkg and threadsPerCore (or the | |||||
1236 | // total # packages) are at this point - we want to determine that now. We | |||||
1237 | // only have an upper bound on the first two figures. | |||||
1238 | // | |||||
1239 | // We also perform a consistency check at this point: the values returned by | |||||
1240 | // the cpuid instruction for any thread bound to a given package had better | |||||
1241 | // return the same info for maxThreadsPerPkg and maxCoresPerPkg. | |||||
1242 | nPackages = 1; | |||||
1243 | nCoresPerPkg = 1; | |||||
1244 | __kmp_nThreadsPerCore = 1; | |||||
1245 | unsigned nCores = 1; | |||||
1246 | ||||||
1247 | unsigned pkgCt = 1; // to determine radii | |||||
1248 | unsigned lastPkgId = threadInfo[0].pkgId; | |||||
1249 | unsigned coreCt = 1; | |||||
1250 | unsigned lastCoreId = threadInfo[0].coreId; | |||||
1251 | unsigned threadCt = 1; | |||||
1252 | unsigned lastThreadId = threadInfo[0].threadId; | |||||
1253 | ||||||
1254 | // intra-pkg consist checks | |||||
1255 | unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg; | |||||
1256 | unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg; | |||||
1257 | ||||||
1258 | for (i = 1; i < nApics; i++) { | |||||
1259 | if (threadInfo[i].pkgId != lastPkgId) { | |||||
1260 | nCores++; | |||||
1261 | pkgCt++; | |||||
1262 | lastPkgId = threadInfo[i].pkgId; | |||||
1263 | if ((int)coreCt > nCoresPerPkg) | |||||
1264 | nCoresPerPkg = coreCt; | |||||
1265 | coreCt = 1; | |||||
1266 | lastCoreId = threadInfo[i].coreId; | |||||
1267 | if ((int)threadCt > __kmp_nThreadsPerCore) | |||||
1268 | __kmp_nThreadsPerCore = threadCt; | |||||
1269 | threadCt = 1; | |||||
1270 | lastThreadId = threadInfo[i].threadId; | |||||
1271 | ||||||
1272 | // This is a different package, so go on to the next iteration without | |||||
1273 | // doing any consistency checks. Reset the consistency check vars, though. | |||||
1274 | prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg; | |||||
1275 | prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg; | |||||
1276 | continue; | |||||
1277 | } | |||||
1278 | ||||||
1279 | if (threadInfo[i].coreId != lastCoreId) { | |||||
1280 | nCores++; | |||||
1281 | coreCt++; | |||||
1282 | lastCoreId = threadInfo[i].coreId; | |||||
1283 | if ((int)threadCt > __kmp_nThreadsPerCore) | |||||
1284 | __kmp_nThreadsPerCore = threadCt; | |||||
1285 | threadCt = 1; | |||||
1286 | lastThreadId = threadInfo[i].threadId; | |||||
1287 | } else if (threadInfo[i].threadId != lastThreadId) { | |||||
1288 | threadCt++; | |||||
1289 | lastThreadId = threadInfo[i].threadId; | |||||
1290 | } else { | |||||
1291 | __kmp_free(threadInfo)___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1291); | |||||
1292 | KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask); | |||||
1293 | *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique; | |||||
1294 | return -1; | |||||
1295 | } | |||||
1296 | ||||||
1297 | // Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg | |||||
1298 | // fields agree between all the threads bounds to a given package. | |||||
1299 | if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg) || | |||||
1300 | (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) { | |||||
1301 | __kmp_free(threadInfo)___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1301); | |||||
1302 | KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask); | |||||
1303 | *msg_id = kmp_i18n_str_InconsistentCpuidInfo; | |||||
1304 | return -1; | |||||
1305 | } | |||||
1306 | } | |||||
1307 | nPackages = pkgCt; | |||||
1308 | if ((int)coreCt > nCoresPerPkg) | |||||
1309 | nCoresPerPkg = coreCt; | |||||
1310 | if ((int)threadCt > __kmp_nThreadsPerCore) | |||||
1311 | __kmp_nThreadsPerCore = threadCt; | |||||
1312 | ||||||
1313 | // When affinity is off, this routine will still be called to set | |||||
1314 | // __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages. | |||||
1315 | // Make sure all these vars are set correctly, and return now if affinity is | |||||
1316 | // not enabled. | |||||
1317 | __kmp_ncores = nCores; | |||||
1318 | if (__kmp_affinity_verbose) { | |||||
1319 | char buf[KMP_AFFIN_MASK_PRINT_LEN1024]; | |||||
1320 | __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN1024, oldMask); | |||||
1321 | ||||||
1322 | KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffUseGlobCpuid , "KMP_AFFINITY"), __kmp_msg_null); | |||||
1323 | if (__kmp_affinity_respect_mask) { | |||||
1324 | KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetRespect , "KMP_AFFINITY", buf), __kmp_msg_null); | |||||
1325 | } else { | |||||
1326 | KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetNotRespect , "KMP_AFFINITY", buf), __kmp_msg_null); | |||||
1327 | } | |||||
1328 | KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc , "KMP_AFFINITY", __kmp_avail_proc), __kmp_msg_null); | |||||
1329 | if (__kmp_affinity_uniform_topology()) { | |||||
1330 | KMP_INFORM(Uniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Uniform , "KMP_AFFINITY"), __kmp_msg_null); | |||||
1331 | } else { | |||||
1332 | KMP_INFORM(NonUniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_NonUniform , "KMP_AFFINITY"), __kmp_msg_null); | |||||
1333 | } | |||||
1334 | KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology , "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore , __kmp_ncores), __kmp_msg_null) | |||||
1335 | __kmp_nThreadsPerCore, __kmp_ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology , "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore , __kmp_ncores), __kmp_msg_null); | |||||
1336 | } | |||||
1337 | KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL)if (!(__kmp_pu_os_idx == __null)) { __kmp_debug_assert("__kmp_pu_os_idx == __null" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1337); }; | |||||
1338 | KMP_DEBUG_ASSERT(nApics == (unsigned)__kmp_avail_proc)if (!(nApics == (unsigned)__kmp_avail_proc)) { __kmp_debug_assert ("nApics == (unsigned)__kmp_avail_proc", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1338); }; | |||||
1339 | __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc)___kmp_allocate((sizeof(int) * __kmp_avail_proc), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1339); | |||||
1340 | for (i = 0; i < nApics; ++i) { | |||||
1341 | __kmp_pu_os_idx[i] = threadInfo[i].osId; | |||||
1342 | } | |||||
1343 | if (__kmp_affinity_type == affinity_none) { | |||||
1344 | __kmp_free(threadInfo)___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1344); | |||||
1345 | KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask); | |||||
1346 | return 0; | |||||
1347 | } | |||||
1348 | ||||||
1349 | // Now that we've determined the number of packages, the number of cores per | |||||
1350 | // package, and the number of threads per core, we can construct the data | |||||
1351 | // structure that is to be returned. | |||||
1352 | int pkgLevel = 0; | |||||
1353 | int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1; | |||||
1354 | int threadLevel = | |||||
1355 | (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1); | |||||
1356 | unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0); | |||||
1357 | ||||||
1358 | KMP_ASSERT(depth > 0)if (!(depth > 0)) { __kmp_debug_assert("depth > 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1358); }; | |||||
1359 | *address2os = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * nApics)___kmp_allocate((sizeof(AddrUnsPair) * nApics), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1359); | |||||
1360 | ||||||
1361 | for (i = 0; i < nApics; ++i) { | |||||
1362 | Address addr(depth); | |||||
1363 | unsigned os = threadInfo[i].osId; | |||||
1364 | int d = 0; | |||||
1365 | ||||||
1366 | if (pkgLevel >= 0) { | |||||
1367 | addr.labels[d++] = threadInfo[i].pkgId; | |||||
1368 | } | |||||
1369 | if (coreLevel >= 0) { | |||||
1370 | addr.labels[d++] = threadInfo[i].coreId; | |||||
1371 | } | |||||
1372 | if (threadLevel >= 0) { | |||||
1373 | addr.labels[d++] = threadInfo[i].threadId; | |||||
1374 | } | |||||
1375 | (*address2os)[i] = AddrUnsPair(addr, os); | |||||
1376 | } | |||||
1377 | ||||||
1378 | if (__kmp_affinity_gran_levels < 0) { | |||||
1379 | // Set the granularity level based on what levels are modeled in the machine | |||||
1380 | // topology map. | |||||
1381 | __kmp_affinity_gran_levels = 0; | |||||
1382 | if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) { | |||||
1383 | __kmp_affinity_gran_levels++; | |||||
1384 | } | |||||
1385 | if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) { | |||||
1386 | __kmp_affinity_gran_levels++; | |||||
1387 | } | |||||
1388 | if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) { | |||||
1389 | __kmp_affinity_gran_levels++; | |||||
1390 | } | |||||
1391 | } | |||||
1392 | ||||||
1393 | if (__kmp_affinity_verbose) { | |||||
1394 | __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel, | |||||
1395 | coreLevel, threadLevel); | |||||
1396 | } | |||||
1397 | ||||||
1398 | __kmp_free(threadInfo)___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1398); | |||||
1399 | KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask); | |||||
1400 | return depth; | |||||
1401 | } | |||||
1402 | ||||||
1403 | // Intel(R) microarchitecture code name Nehalem, Dunnington and later | |||||
1404 | // architectures support a newer interface for specifying the x2APIC Ids, | |||||
1405 | // based on cpuid leaf 11. | |||||
1406 | static int __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os, | |||||
1407 | kmp_i18n_id_t *const msg_id) { | |||||
1408 | kmp_cpuid buf; | |||||
1409 | *address2os = NULL__null; | |||||
1410 | *msg_id = kmp_i18n_null; | |||||
1411 | ||||||
1412 | // Check to see if cpuid leaf 11 is supported. | |||||
1413 | __kmp_x86_cpuid(0, 0, &buf); | |||||
1414 | if (buf.eax < 11) { | |||||
1415 | *msg_id = kmp_i18n_str_NoLeaf11Support; | |||||
1416 | return -1; | |||||
1417 | } | |||||
1418 | __kmp_x86_cpuid(11, 0, &buf); | |||||
1419 | if (buf.ebx == 0) { | |||||
1420 | *msg_id = kmp_i18n_str_NoLeaf11Support; | |||||
1421 | return -1; | |||||
1422 | } | |||||
1423 | ||||||
1424 | // Find the number of levels in the machine topology. While we're at it, get | |||||
1425 | // the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will try to | |||||
1426 | // get more accurate values later by explicitly counting them, but get | |||||
1427 | // reasonable defaults now, in case we return early. | |||||
1428 | int level; | |||||
1429 | int threadLevel = -1; | |||||
1430 | int coreLevel = -1; | |||||
1431 | int pkgLevel = -1; | |||||
1432 | __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1; | |||||
1433 | ||||||
1434 | for (level = 0;; level++) { | |||||
1435 | if (level > 31) { | |||||
1436 | // FIXME: Hack for DPD200163180 | |||||
1437 | // | |||||
1438 | // If level is big then something went wrong -> exiting | |||||
1439 | // | |||||
1440 | // There could actually be 32 valid levels in the machine topology, but so | |||||
1441 | // far, the only machine we have seen which does not exit this loop before | |||||
1442 | // iteration 32 has fubar x2APIC settings. | |||||
1443 | // | |||||
1444 | // For now, just reject this case based upon loop trip count. | |||||
1445 | *msg_id = kmp_i18n_str_InvalidCpuidInfo; | |||||
1446 | return -1; | |||||
1447 | } | |||||
1448 | __kmp_x86_cpuid(11, level, &buf); | |||||
1449 | if (buf.ebx == 0) { | |||||
1450 | if (pkgLevel < 0) { | |||||
1451 | // Will infer nPackages from __kmp_xproc | |||||
1452 | pkgLevel = level; | |||||
1453 | level++; | |||||
1454 | } | |||||
1455 | break; | |||||
1456 | } | |||||
1457 | int kind = (buf.ecx >> 8) & 0xff; | |||||
1458 | if (kind == 1) { | |||||
1459 | // SMT level | |||||
1460 | threadLevel = level; | |||||
1461 | coreLevel = -1; | |||||
1462 | pkgLevel = -1; | |||||
1463 | __kmp_nThreadsPerCore = buf.ebx & 0xffff; | |||||
1464 | if (__kmp_nThreadsPerCore == 0) { | |||||
1465 | *msg_id = kmp_i18n_str_InvalidCpuidInfo; | |||||
1466 | return -1; | |||||
1467 | } | |||||
1468 | } else if (kind == 2) { | |||||
1469 | // core level | |||||
1470 | coreLevel = level; | |||||
1471 | pkgLevel = -1; | |||||
1472 | nCoresPerPkg = buf.ebx & 0xffff; | |||||
1473 | if (nCoresPerPkg == 0) { | |||||
1474 | *msg_id = kmp_i18n_str_InvalidCpuidInfo; | |||||
1475 | return -1; | |||||
1476 | } | |||||
1477 | } else { | |||||
1478 | if (level <= 0) { | |||||
1479 | *msg_id = kmp_i18n_str_InvalidCpuidInfo; | |||||
1480 | return -1; | |||||
1481 | } | |||||
1482 | if (pkgLevel >= 0) { | |||||
1483 | continue; | |||||
1484 | } | |||||
1485 | pkgLevel = level; | |||||
1486 | nPackages = buf.ebx & 0xffff; | |||||
1487 | if (nPackages == 0) { | |||||
1488 | *msg_id = kmp_i18n_str_InvalidCpuidInfo; | |||||
1489 | return -1; | |||||
1490 | } | |||||
1491 | } | |||||
1492 | } | |||||
1493 | int depth = level; | |||||
1494 | ||||||
1495 | // In the above loop, "level" was counted from the finest level (usually | |||||
1496 | // thread) to the coarsest. The caller expects that we will place the labels | |||||
1497 | // in (*address2os)[].first.labels[] in the inverse order, so we need to | |||||
1498 | // invert the vars saying which level means what. | |||||
1499 | if (threadLevel >= 0) { | |||||
1500 | threadLevel = depth - threadLevel - 1; | |||||
1501 | } | |||||
1502 | if (coreLevel >= 0) { | |||||
1503 | coreLevel = depth - coreLevel - 1; | |||||
1504 | } | |||||
1505 | KMP_DEBUG_ASSERT(pkgLevel >= 0)if (!(pkgLevel >= 0)) { __kmp_debug_assert("pkgLevel >= 0" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1505); }; | |||||
1506 | pkgLevel = depth - pkgLevel - 1; | |||||
1507 | ||||||
1508 | // The algorithm used starts by setting the affinity to each available thread | |||||
1509 | // and retrieving info from the cpuid instruction, so if we are not capable of | |||||
1510 | // calling __kmp_get_system_affinity() and _kmp_get_system_affinity(), then we | |||||
1511 | // need to do something else - use the defaults that we calculated from | |||||
1512 | // issuing cpuid without binding to each proc. | |||||
1513 | if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) { | |||||
1514 | // Hack to try and infer the machine topology using only the data | |||||
1515 | // available from cpuid on the current thread, and __kmp_xproc. | |||||
1516 | KMP_ASSERT(__kmp_affinity_type == affinity_none)if (!(__kmp_affinity_type == affinity_none)) { __kmp_debug_assert ("__kmp_affinity_type == affinity_none", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1516); }; | |||||
1517 | ||||||
1518 | __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore; | |||||
1519 | nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg; | |||||
1520 | if (__kmp_affinity_verbose) { | |||||
1521 | KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffNotCapableUseLocCpuidL11 , "KMP_AFFINITY"), __kmp_msg_null); | |||||
1522 | KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc , "KMP_AFFINITY", __kmp_avail_proc), __kmp_msg_null); | |||||
1523 | if (__kmp_affinity_uniform_topology()) { | |||||
1524 | KMP_INFORM(Uniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Uniform , "KMP_AFFINITY"), __kmp_msg_null); | |||||
1525 | } else { | |||||
1526 | KMP_INFORM(NonUniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_NonUniform , "KMP_AFFINITY"), __kmp_msg_null); | |||||
1527 | } | |||||
1528 | KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology , "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore , __kmp_ncores), __kmp_msg_null) | |||||
1529 | __kmp_nThreadsPerCore, __kmp_ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology , "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore , __kmp_ncores), __kmp_msg_null); | |||||
1530 | } | |||||
1531 | return 0; | |||||
1532 | } | |||||
1533 | ||||||
1534 | // From here on, we can assume that it is safe to call | |||||
1535 | // __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if | |||||
1536 | // __kmp_affinity_type = affinity_none. | |||||
1537 | ||||||
1538 | // Save the affinity mask for the current thread. | |||||
1539 | kmp_affin_mask_t *oldMask; | |||||
1540 | KMP_CPU_ALLOC(oldMask)(oldMask = __kmp_affinity_dispatch->allocate_mask()); | |||||
1541 | __kmp_get_system_affinity(oldMask, TRUE)(oldMask)->get_system_affinity((!0)); | |||||
1542 | ||||||
1543 | // Allocate the data structure to be returned. | |||||
1544 | AddrUnsPair *retval = | |||||
1545 | (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc)___kmp_allocate((sizeof(AddrUnsPair) * __kmp_avail_proc), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1545); | |||||
1546 | ||||||
1547 | // Run through each of the available contexts, binding the current thread | |||||
1548 | // to it, and obtaining the pertinent information using the cpuid instr. | |||||
1549 | unsigned int proc; | |||||
1550 | int nApics = 0; | |||||
1551 | KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask)for (proc = (__kmp_affin_fullMask)->begin(); (int)proc != ( __kmp_affin_fullMask)->end(); proc = (__kmp_affin_fullMask )->next(proc)) { | |||||
1552 | // Skip this proc if it is not included in the machine model. | |||||
1553 | if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(proc)) { | |||||
1554 | continue; | |||||
1555 | } | |||||
1556 | KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc)if (!(nApics < __kmp_avail_proc)) { __kmp_debug_assert("nApics < __kmp_avail_proc" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1556); }; | |||||
1557 | ||||||
1558 | __kmp_affinity_dispatch->bind_thread(proc); | |||||
1559 | ||||||
1560 | // Extract labels for each level in the machine topology map from Apic ID. | |||||
1561 | Address addr(depth); | |||||
1562 | int prev_shift = 0; | |||||
1563 | ||||||
1564 | for (level = 0; level < depth; level++) { | |||||
1565 | __kmp_x86_cpuid(11, level, &buf); | |||||
1566 | unsigned apicId = buf.edx; | |||||
1567 | if (buf.ebx == 0) { | |||||
1568 | if (level != depth - 1) { | |||||
1569 | KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask); | |||||
1570 | *msg_id = kmp_i18n_str_InconsistentCpuidInfo; | |||||
1571 | return -1; | |||||
1572 | } | |||||
1573 | addr.labels[depth - level - 1] = apicId >> prev_shift; | |||||
1574 | level++; | |||||
1575 | break; | |||||
1576 | } | |||||
1577 | int shift = buf.eax & 0x1f; | |||||
1578 | int mask = (1 << shift) - 1; | |||||
1579 | addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift; | |||||
1580 | prev_shift = shift; | |||||
1581 | } | |||||
1582 | if (level != depth) { | |||||
1583 | KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask); | |||||
1584 | *msg_id = kmp_i18n_str_InconsistentCpuidInfo; | |||||
1585 | return -1; | |||||
1586 | } | |||||
1587 | ||||||
1588 | retval[nApics] = AddrUnsPair(addr, proc); | |||||
1589 | nApics++; | |||||
1590 | } | |||||
1591 | ||||||
1592 | // We've collected all the info we need. | |||||
1593 | // Restore the old affinity mask for this thread. | |||||
1594 | __kmp_set_system_affinity(oldMask, TRUE)(oldMask)->set_system_affinity((!0)); | |||||
1595 | ||||||
1596 | // If there's only one thread context to bind to, return now. | |||||
1597 | KMP_ASSERT(nApics > 0)if (!(nApics > 0)) { __kmp_debug_assert("nApics > 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1597); }; | |||||
1598 | if (nApics == 1) { | |||||
1599 | __kmp_ncores = nPackages = 1; | |||||
1600 | __kmp_nThreadsPerCore = nCoresPerPkg = 1; | |||||
1601 | if (__kmp_affinity_verbose) { | |||||
1602 | char buf[KMP_AFFIN_MASK_PRINT_LEN1024]; | |||||
1603 | __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN1024, oldMask); | |||||
1604 | ||||||
1605 | KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffUseGlobCpuidL11 , "KMP_AFFINITY"), __kmp_msg_null); | |||||
1606 | if (__kmp_affinity_respect_mask) { | |||||
1607 | KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetRespect , "KMP_AFFINITY", buf), __kmp_msg_null); | |||||
1608 | } else { | |||||
1609 | KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetNotRespect , "KMP_AFFINITY", buf), __kmp_msg_null); | |||||
1610 | } | |||||
1611 | KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc , "KMP_AFFINITY", __kmp_avail_proc), __kmp_msg_null); | |||||
1612 | KMP_INFORM(Uniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Uniform , "KMP_AFFINITY"), __kmp_msg_null); | |||||
1613 | KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology , "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore , __kmp_ncores), __kmp_msg_null) | |||||
1614 | __kmp_nThreadsPerCore, __kmp_ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology , "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore , __kmp_ncores), __kmp_msg_null); | |||||
1615 | } | |||||
1616 | ||||||
1617 | if (__kmp_affinity_type == affinity_none) { | |||||
1618 | __kmp_free(retval)___kmp_free((retval), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1618); | |||||
1619 | KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask); | |||||
1620 | return 0; | |||||
1621 | } | |||||
1622 | ||||||
1623 | // Form an Address object which only includes the package level. | |||||
1624 | Address addr(1); | |||||
1625 | addr.labels[0] = retval[0].first.labels[pkgLevel]; | |||||
1626 | retval[0].first = addr; | |||||
1627 | ||||||
1628 | if (__kmp_affinity_gran_levels < 0) { | |||||
1629 | __kmp_affinity_gran_levels = 0; | |||||
1630 | } | |||||
1631 | ||||||
1632 | if (__kmp_affinity_verbose) { | |||||
1633 | __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1); | |||||
1634 | } | |||||
1635 | ||||||
1636 | *address2os = retval; | |||||
1637 | KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask); | |||||
1638 | return 1; | |||||
1639 | } | |||||
1640 | ||||||
1641 | // Sort the table by physical Id. | |||||
1642 | qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels); | |||||
1643 | ||||||
1644 | // Find the radix at each of the levels. | |||||
1645 | unsigned *totals = (unsigned *)__kmp_allocate(depth * sizeof(unsigned))___kmp_allocate((depth * sizeof(unsigned)), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1645); | |||||
1646 | unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned))___kmp_allocate((depth * sizeof(unsigned)), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1646); | |||||
1647 | unsigned *maxCt = (unsigned *)__kmp_allocate(depth * sizeof(unsigned))___kmp_allocate((depth * sizeof(unsigned)), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1647); | |||||
1648 | unsigned *last = (unsigned *)__kmp_allocate(depth * sizeof(unsigned))___kmp_allocate((depth * sizeof(unsigned)), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1648); | |||||
1649 | for (level = 0; level < depth; level++) { | |||||
1650 | totals[level] = 1; | |||||
1651 | maxCt[level] = 1; | |||||
1652 | counts[level] = 1; | |||||
1653 | last[level] = retval[0].first.labels[level]; | |||||
1654 | } | |||||
1655 | ||||||
1656 | // From here on, the iteration variable "level" runs from the finest level to | |||||
1657 | // the coarsest, i.e. we iterate forward through | |||||
1658 | // (*address2os)[].first.labels[] - in the previous loops, we iterated | |||||
1659 | // backwards. | |||||
1660 | for (proc = 1; (int)proc < nApics; proc++) { | |||||
1661 | int level; | |||||
1662 | for (level = 0; level < depth; level++) { | |||||
1663 | if (retval[proc].first.labels[level] != last[level]) { | |||||
1664 | int j; | |||||
1665 | for (j = level + 1; j < depth; j++) { | |||||
1666 | totals[j]++; | |||||
1667 | counts[j] = 1; | |||||
1668 | // The line below causes printing incorrect topology information in | |||||
1669 | // case the max value for some level (maxCt[level]) is encountered | |||||
1670 | // earlier than some less value while going through the array. For | |||||
1671 | // example, let pkg0 has 4 cores and pkg1 has 2 cores. Then | |||||
1672 | // maxCt[1] == 2 | |||||
1673 | // whereas it must be 4. | |||||
1674 | // TODO!!! Check if it can be commented safely | |||||
1675 | // maxCt[j] = 1; | |||||
1676 | last[j] = retval[proc].first.labels[j]; | |||||
1677 | } | |||||
1678 | totals[level]++; | |||||
1679 | counts[level]++; | |||||
1680 | if (counts[level] > maxCt[level]) { | |||||
1681 | maxCt[level] = counts[level]; | |||||
1682 | } | |||||
1683 | last[level] = retval[proc].first.labels[level]; | |||||
1684 | break; | |||||
1685 | } else if (level == depth - 1) { | |||||
1686 | __kmp_free(last)___kmp_free((last), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1686); | |||||
1687 | __kmp_free(maxCt)___kmp_free((maxCt), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1687); | |||||
1688 | __kmp_free(counts)___kmp_free((counts), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1688); | |||||
1689 | __kmp_free(totals)___kmp_free((totals), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1689); | |||||
1690 | __kmp_free(retval)___kmp_free((retval), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1690); | |||||
1691 | KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask); | |||||
1692 | *msg_id = kmp_i18n_str_x2ApicIDsNotUnique; | |||||
1693 | return -1; | |||||
1694 | } | |||||
1695 | } | |||||
1696 | } | |||||
1697 | ||||||
1698 | // When affinity is off, this routine will still be called to set | |||||
1699 | // __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages. | |||||
1700 | // Make sure all these vars are set correctly, and return if affinity is not | |||||
1701 | // enabled. | |||||
1702 | if (threadLevel >= 0) { | |||||
1703 | __kmp_nThreadsPerCore = maxCt[threadLevel]; | |||||
1704 | } else { | |||||
1705 | __kmp_nThreadsPerCore = 1; | |||||
1706 | } | |||||
1707 | nPackages = totals[pkgLevel]; | |||||
1708 | ||||||
1709 | if (coreLevel >= 0) { | |||||
1710 | __kmp_ncores = totals[coreLevel]; | |||||
1711 | nCoresPerPkg = maxCt[coreLevel]; | |||||
1712 | } else { | |||||
1713 | __kmp_ncores = nPackages; | |||||
1714 | nCoresPerPkg = 1; | |||||
1715 | } | |||||
1716 | ||||||
1717 | // Check to see if the machine topology is uniform | |||||
1718 | unsigned prod = maxCt[0]; | |||||
1719 | for (level = 1; level < depth; level++) { | |||||
1720 | prod *= maxCt[level]; | |||||
1721 | } | |||||
1722 | bool uniform = (prod == totals[level - 1]); | |||||
1723 | ||||||
1724 | // Print the machine topology summary. | |||||
1725 | if (__kmp_affinity_verbose) { | |||||
1726 | char mask[KMP_AFFIN_MASK_PRINT_LEN1024]; | |||||
1727 | __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN1024, oldMask); | |||||
1728 | ||||||
1729 | KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffUseGlobCpuidL11 , "KMP_AFFINITY"), __kmp_msg_null); | |||||
1730 | if (__kmp_affinity_respect_mask) { | |||||
1731 | KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetRespect , "KMP_AFFINITY", mask), __kmp_msg_null); | |||||
1732 | } else { | |||||
1733 | KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetNotRespect , "KMP_AFFINITY", mask), __kmp_msg_null); | |||||
1734 | } | |||||
1735 | KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc , "KMP_AFFINITY", __kmp_avail_proc), __kmp_msg_null); | |||||
1736 | if (uniform) { | |||||
1737 | KMP_INFORM(Uniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Uniform , "KMP_AFFINITY"), __kmp_msg_null); | |||||
1738 | } else { | |||||
1739 | KMP_INFORM(NonUniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_NonUniform , "KMP_AFFINITY"), __kmp_msg_null); | |||||
1740 | } | |||||
1741 | ||||||
1742 | kmp_str_buf_t buf; | |||||
1743 | __kmp_str_buf_init(&buf){ (&buf)->str = (&buf)->bulk; (&buf)->size = sizeof((&buf)->bulk); (&buf)->used = 0; (& buf)->bulk[0] = 0; }; | |||||
1744 | ||||||
1745 | __kmp_str_buf_print(&buf, "%d", totals[0]); | |||||
1746 | for (level = 1; level <= pkgLevel; level++) { | |||||
1747 | __kmp_str_buf_print(&buf, " x %d", maxCt[level]); | |||||
1748 | } | |||||
1749 | KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyExtra , "KMP_AFFINITY", buf.str, nCoresPerPkg, __kmp_nThreadsPerCore , __kmp_ncores), __kmp_msg_null) | |||||
1750 | __kmp_nThreadsPerCore, __kmp_ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyExtra , "KMP_AFFINITY", buf.str, nCoresPerPkg, __kmp_nThreadsPerCore , __kmp_ncores), __kmp_msg_null); | |||||
1751 | ||||||
1752 | __kmp_str_buf_free(&buf); | |||||
1753 | } | |||||
1754 | KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL)if (!(__kmp_pu_os_idx == __null)) { __kmp_debug_assert("__kmp_pu_os_idx == __null" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1754); }; | |||||
1755 | KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc)if (!(nApics == __kmp_avail_proc)) { __kmp_debug_assert("nApics == __kmp_avail_proc" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1755); }; | |||||
1756 | __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc)___kmp_allocate((sizeof(int) * __kmp_avail_proc), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1756); | |||||
1757 | for (proc = 0; (int)proc < nApics; ++proc) { | |||||
1758 | __kmp_pu_os_idx[proc] = retval[proc].second; | |||||
1759 | } | |||||
1760 | if (__kmp_affinity_type == affinity_none) { | |||||
1761 | __kmp_free(last)___kmp_free((last), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1761); | |||||
1762 | __kmp_free(maxCt)___kmp_free((maxCt), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1762); | |||||
1763 | __kmp_free(counts)___kmp_free((counts), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1763); | |||||
1764 | __kmp_free(totals)___kmp_free((totals), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1764); | |||||
1765 | __kmp_free(retval)___kmp_free((retval), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1765); | |||||
1766 | KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask); | |||||
1767 | return 0; | |||||
1768 | } | |||||
1769 | ||||||
1770 | // Find any levels with radiix 1, and remove them from the map | |||||
1771 | // (except for the package level). | |||||
1772 | int new_depth = 0; | |||||
1773 | for (level = 0; level < depth; level++) { | |||||
1774 | if ((maxCt[level] == 1) && (level != pkgLevel)) { | |||||
1775 | continue; | |||||
1776 | } | |||||
1777 | new_depth++; | |||||
1778 | } | |||||
1779 | ||||||
1780 | // If we are removing any levels, allocate a new vector to return, | |||||
1781 | // and copy the relevant information to it. | |||||
1782 | if (new_depth != depth) { | |||||
1783 | AddrUnsPair *new_retval = | |||||
1784 | (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * nApics)___kmp_allocate((sizeof(AddrUnsPair) * nApics), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1784); | |||||
1785 | for (proc = 0; (int)proc < nApics; proc++) { | |||||
1786 | Address addr(new_depth); | |||||
1787 | new_retval[proc] = AddrUnsPair(addr, retval[proc].second); | |||||
1788 | } | |||||
1789 | int new_level = 0; | |||||
1790 | int newPkgLevel = -1; | |||||
1791 | int newCoreLevel = -1; | |||||
1792 | int newThreadLevel = -1; | |||||
1793 | for (level = 0; level < depth; level++) { | |||||
1794 | if ((maxCt[level] == 1) && (level != pkgLevel)) { | |||||
1795 | // Remove this level. Never remove the package level | |||||
1796 | continue; | |||||
1797 | } | |||||
1798 | if (level == pkgLevel) { | |||||
1799 | newPkgLevel = new_level; | |||||
1800 | } | |||||
1801 | if (level == coreLevel) { | |||||
1802 | newCoreLevel = new_level; | |||||
1803 | } | |||||
1804 | if (level == threadLevel) { | |||||
1805 | newThreadLevel = new_level; | |||||
1806 | } | |||||
1807 | for (proc = 0; (int)proc < nApics; proc++) { | |||||
1808 | new_retval[proc].first.labels[new_level] = | |||||
1809 | retval[proc].first.labels[level]; | |||||
1810 | } | |||||
1811 | new_level++; | |||||
1812 | } | |||||
1813 | ||||||
1814 | __kmp_free(retval)___kmp_free((retval), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1814); | |||||
1815 | retval = new_retval; | |||||
1816 | depth = new_depth; | |||||
1817 | pkgLevel = newPkgLevel; | |||||
1818 | coreLevel = newCoreLevel; | |||||
1819 | threadLevel = newThreadLevel; | |||||
1820 | } | |||||
1821 | ||||||
1822 | if (__kmp_affinity_gran_levels < 0) { | |||||
1823 | // Set the granularity level based on what levels are modeled | |||||
1824 | // in the machine topology map. | |||||
1825 | __kmp_affinity_gran_levels = 0; | |||||
1826 | if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) { | |||||
1827 | __kmp_affinity_gran_levels++; | |||||
1828 | } | |||||
1829 | if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) { | |||||
1830 | __kmp_affinity_gran_levels++; | |||||
1831 | } | |||||
1832 | if (__kmp_affinity_gran > affinity_gran_package) { | |||||
1833 | __kmp_affinity_gran_levels++; | |||||
1834 | } | |||||
1835 | } | |||||
1836 | ||||||
1837 | if (__kmp_affinity_verbose) { | |||||
1838 | __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel, coreLevel, | |||||
1839 | threadLevel); | |||||
1840 | } | |||||
1841 | ||||||
1842 | __kmp_free(last)___kmp_free((last), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1842); | |||||
1843 | __kmp_free(maxCt)___kmp_free((maxCt), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1843); | |||||
1844 | __kmp_free(counts)___kmp_free((counts), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1844); | |||||
1845 | __kmp_free(totals)___kmp_free((totals), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1845); | |||||
1846 | KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask); | |||||
1847 | *address2os = retval; | |||||
1848 | return depth; | |||||
1849 | } | |||||
1850 | ||||||
1851 | #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ | |||||
1852 | ||||||
1853 | #define osIdIndex0 0 | |||||
1854 | #define threadIdIndex1 1 | |||||
1855 | #define coreIdIndex2 2 | |||||
1856 | #define pkgIdIndex3 3 | |||||
1857 | #define nodeIdIndex4 4 | |||||
1858 | ||||||
1859 | typedef unsigned *ProcCpuInfo; | |||||
1860 | static unsigned maxIndex = pkgIdIndex3; | |||||
1861 | ||||||
1862 | static int __kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a, | |||||
1863 | const void *b) { | |||||
1864 | unsigned i; | |||||
1865 | const unsigned *aa = *(unsigned *const *)a; | |||||
1866 | const unsigned *bb = *(unsigned *const *)b; | |||||
1867 | for (i = maxIndex;; i--) { | |||||
1868 | if (aa[i] < bb[i]) | |||||
1869 | return -1; | |||||
1870 | if (aa[i] > bb[i]) | |||||
1871 | return 1; | |||||
1872 | if (i == osIdIndex0) | |||||
1873 | break; | |||||
1874 | } | |||||
1875 | return 0; | |||||
1876 | } | |||||
1877 | ||||||
1878 | #if KMP_USE_HIER_SCHED0 | |||||
1879 | // Set the array sizes for the hierarchy layers | |||||
1880 | static void __kmp_dispatch_set_hierarchy_values() { | |||||
1881 | // Set the maximum number of L1's to number of cores | |||||
1882 | // Set the maximum number of L2's to to either number of cores / 2 for | |||||
1883 | // Intel(R) Xeon Phi(TM) coprocessor formally codenamed Knights Landing | |||||
1884 | // Or the number of cores for Intel(R) Xeon(R) processors | |||||
1885 | // Set the maximum number of NUMA nodes and L3's to number of packages | |||||
1886 | __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1] = | |||||
1887 | nPackages * nCoresPerPkg * __kmp_nThreadsPerCore; | |||||
1888 | __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L1 + 1] = __kmp_ncores; | |||||
1889 | #if KMP_ARCH_X86_641 && (KMP_OS_LINUX1 || KMP_OS_WINDOWS0) | |||||
1890 | if (__kmp_mic_type >= mic3) | |||||
1891 | __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores / 2; | |||||
1892 | else | |||||
1893 | #endif // KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) | |||||
1894 | __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores; | |||||
1895 | __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L3 + 1] = nPackages; | |||||
1896 | __kmp_hier_max_units[kmp_hier_layer_e::LAYER_NUMA + 1] = nPackages; | |||||
1897 | __kmp_hier_max_units[kmp_hier_layer_e::LAYER_LOOP + 1] = 1; | |||||
1898 | // Set the number of threads per unit | |||||
1899 | // Number of hardware threads per L1/L2/L3/NUMA/LOOP | |||||
1900 | __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_THREAD + 1] = 1; | |||||
1901 | __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L1 + 1] = | |||||
1902 | __kmp_nThreadsPerCore; | |||||
1903 | #if KMP_ARCH_X86_641 && (KMP_OS_LINUX1 || KMP_OS_WINDOWS0) | |||||
1904 | if (__kmp_mic_type >= mic3) | |||||
1905 | __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] = | |||||
1906 | 2 * __kmp_nThreadsPerCore; | |||||
1907 | else | |||||
1908 | #endif // KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) | |||||
1909 | __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] = | |||||
1910 | __kmp_nThreadsPerCore; | |||||
1911 | __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L3 + 1] = | |||||
1912 | nCoresPerPkg * __kmp_nThreadsPerCore; | |||||
1913 | __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_NUMA + 1] = | |||||
1914 | nCoresPerPkg * __kmp_nThreadsPerCore; | |||||
1915 | __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_LOOP + 1] = | |||||
1916 | nPackages * nCoresPerPkg * __kmp_nThreadsPerCore; | |||||
1917 | } | |||||
1918 | ||||||
1919 | // Return the index into the hierarchy for this tid and layer type (L1, L2, etc) | |||||
1920 | // i.e., this thread's L1 or this thread's L2, etc. | |||||
1921 | int __kmp_dispatch_get_index(int tid, kmp_hier_layer_e type) { | |||||
1922 | int index = type + 1; | |||||
1923 | int num_hw_threads = __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1]; | |||||
1924 | KMP_DEBUG_ASSERT(type != kmp_hier_layer_e::LAYER_LAST)if (!(type != kmp_hier_layer_e::LAYER_LAST)) { __kmp_debug_assert ("type != kmp_hier_layer_e::LAYER_LAST", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1924); }; | |||||
1925 | if (type == kmp_hier_layer_e::LAYER_THREAD) | |||||
1926 | return tid; | |||||
1927 | else if (type == kmp_hier_layer_e::LAYER_LOOP) | |||||
1928 | return 0; | |||||
1929 | KMP_DEBUG_ASSERT(__kmp_hier_max_units[index] != 0)if (!(__kmp_hier_max_units[index] != 0)) { __kmp_debug_assert ("__kmp_hier_max_units[index] != 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1929); }; | |||||
1930 | if (tid >= num_hw_threads) | |||||
1931 | tid = tid % num_hw_threads; | |||||
1932 | return (tid / __kmp_hier_threads_per[index]) % __kmp_hier_max_units[index]; | |||||
1933 | } | |||||
1934 | ||||||
1935 | // Return the number of t1's per t2 | |||||
1936 | int __kmp_dispatch_get_t1_per_t2(kmp_hier_layer_e t1, kmp_hier_layer_e t2) { | |||||
1937 | int i1 = t1 + 1; | |||||
1938 | int i2 = t2 + 1; | |||||
1939 | KMP_DEBUG_ASSERT(i1 <= i2)if (!(i1 <= i2)) { __kmp_debug_assert("i1 <= i2", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1939); }; | |||||
1940 | KMP_DEBUG_ASSERT(t1 != kmp_hier_layer_e::LAYER_LAST)if (!(t1 != kmp_hier_layer_e::LAYER_LAST)) { __kmp_debug_assert ("t1 != kmp_hier_layer_e::LAYER_LAST", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1940); }; | |||||
1941 | KMP_DEBUG_ASSERT(t2 != kmp_hier_layer_e::LAYER_LAST)if (!(t2 != kmp_hier_layer_e::LAYER_LAST)) { __kmp_debug_assert ("t2 != kmp_hier_layer_e::LAYER_LAST", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1941); }; | |||||
1942 | KMP_DEBUG_ASSERT(__kmp_hier_threads_per[i1] != 0)if (!(__kmp_hier_threads_per[i1] != 0)) { __kmp_debug_assert( "__kmp_hier_threads_per[i1] != 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 1942); }; | |||||
1943 | // (nthreads/t2) / (nthreads/t1) = t1 / t2 | |||||
1944 | return __kmp_hier_threads_per[i2] / __kmp_hier_threads_per[i1]; | |||||
1945 | } | |||||
1946 | #endif // KMP_USE_HIER_SCHED | |||||
1947 | ||||||
1948 | // Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the | |||||
1949 | // affinity map. | |||||
1950 | static int __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os, | |||||
1951 | int *line, | |||||
1952 | kmp_i18n_id_t *const msg_id, | |||||
1953 | FILE *f) { | |||||
1954 | *address2os = NULL__null; | |||||
1955 | *msg_id = kmp_i18n_null; | |||||
1956 | ||||||
1957 | // Scan of the file, and count the number of "processor" (osId) fields, | |||||
1958 | // and find the highest value of <n> for a node_<n> field. | |||||
1959 | char buf[256]; | |||||
1960 | unsigned num_records = 0; | |||||
1961 | while (!feof(f)) { | |||||
1962 | buf[sizeof(buf) - 1] = 1; | |||||
1963 | if (!fgets(buf, sizeof(buf), f)) { | |||||
1964 | // Read errors presumably because of EOF | |||||
1965 | break; | |||||
1966 | } | |||||
1967 | ||||||
1968 | char s1[] = "processor"; | |||||
1969 | if (strncmp(buf, s1, sizeof(s1) - 1) == 0) { | |||||
1970 | num_records++; | |||||
1971 | continue; | |||||
1972 | } | |||||
1973 | ||||||
1974 | // FIXME - this will match "node_<n> <garbage>" | |||||
1975 | unsigned level; | |||||
1976 | if (KMP_SSCANFsscanf(buf, "node_%u id", &level) == 1) { | |||||
1977 | if (nodeIdIndex4 + level >= maxIndex) { | |||||
1978 | maxIndex = nodeIdIndex4 + level; | |||||
1979 | } | |||||
1980 | continue; | |||||
1981 | } | |||||
1982 | } | |||||
1983 | ||||||
1984 | // Check for empty file / no valid processor records, or too many. The number | |||||
1985 | // of records can't exceed the number of valid bits in the affinity mask. | |||||
1986 | if (num_records == 0) { | |||||
1987 | *line = 0; | |||||
1988 | *msg_id = kmp_i18n_str_NoProcRecords; | |||||
1989 | return -1; | |||||
1990 | } | |||||
1991 | if (num_records > (unsigned)__kmp_xproc) { | |||||
1992 | *line = 0; | |||||
1993 | *msg_id = kmp_i18n_str_TooManyProcRecords; | |||||
1994 | return -1; | |||||
1995 | } | |||||
1996 | ||||||
1997 | // Set the file pointer back to the begginning, so that we can scan the file | |||||
1998 | // again, this time performing a full parse of the data. Allocate a vector of | |||||
1999 | // ProcCpuInfo object, where we will place the data. Adding an extra element | |||||
2000 | // at the end allows us to remove a lot of extra checks for termination | |||||
2001 | // conditions. | |||||
2002 | if (fseek(f, 0, SEEK_SET0) != 0) { | |||||
2003 | *line = 0; | |||||
2004 | *msg_id = kmp_i18n_str_CantRewindCpuinfo; | |||||
2005 | return -1; | |||||
2006 | } | |||||
2007 | ||||||
2008 | // Allocate the array of records to store the proc info in. The dummy | |||||
2009 | // element at the end makes the logic in filling them out easier to code. | |||||
2010 | unsigned **threadInfo = | |||||
2011 | (unsigned **)__kmp_allocate((num_records + 1) * sizeof(unsigned *))___kmp_allocate(((num_records + 1) * sizeof(unsigned *)), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2011); | |||||
2012 | unsigned i; | |||||
2013 | for (i = 0; i <= num_records; i++) { | |||||
2014 | threadInfo[i] = | |||||
2015 | (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned))___kmp_allocate(((maxIndex + 1) * sizeof(unsigned)), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2015); | |||||
2016 | } | |||||
2017 | ||||||
2018 | #define CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo [i]), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2018); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2018); \ | |||||
2019 | for (i = 0; i <= num_records; i++) { \ | |||||
2020 | __kmp_free(threadInfo[i])___kmp_free((threadInfo[i]), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2020); \ | |||||
2021 | } \ | |||||
2022 | __kmp_free(threadInfo)___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2022); | |||||
2023 | ||||||
2024 | // A value of UINT_MAX means that we didn't find the field | |||||
2025 | unsigned __index; | |||||
2026 | ||||||
2027 | #define INIT_PROC_INFO(p)for (__index = 0; __index <= maxIndex; __index++) { (p)[__index ] = (2147483647 *2U +1U); } \ | |||||
2028 | for (__index = 0; __index <= maxIndex; __index++) { \ | |||||
2029 | (p)[__index] = UINT_MAX(2147483647 *2U +1U); \ | |||||
2030 | } | |||||
2031 | ||||||
2032 | for (i = 0; i <= num_records; i++) { | |||||
2033 | INIT_PROC_INFO(threadInfo[i])for (__index = 0; __index <= maxIndex; __index++) { (threadInfo [i])[__index] = (2147483647 *2U +1U); }; | |||||
2034 | } | |||||
2035 | ||||||
2036 | unsigned num_avail = 0; | |||||
2037 | *line = 0; | |||||
2038 | while (!feof(f)) { | |||||
2039 | // Create an inner scoping level, so that all the goto targets at the end of | |||||
2040 | // the loop appear in an outer scoping level. This avoids warnings about | |||||
2041 | // jumping past an initialization to a target in the same block. | |||||
2042 | { | |||||
2043 | buf[sizeof(buf) - 1] = 1; | |||||
2044 | bool long_line = false; | |||||
2045 | if (!fgets(buf, sizeof(buf), f)) { | |||||
2046 | // Read errors presumably because of EOF | |||||
2047 | // If there is valid data in threadInfo[num_avail], then fake | |||||
2048 | // a blank line in ensure that the last address gets parsed. | |||||
2049 | bool valid = false; | |||||
2050 | for (i = 0; i <= maxIndex; i++) { | |||||
2051 | if (threadInfo[num_avail][i] != UINT_MAX(2147483647 *2U +1U)) { | |||||
2052 | valid = true; | |||||
2053 | } | |||||
2054 | } | |||||
2055 | if (!valid) { | |||||
2056 | break; | |||||
2057 | } | |||||
2058 | buf[0] = 0; | |||||
2059 | } else if (!buf[sizeof(buf) - 1]) { | |||||
2060 | // The line is longer than the buffer. Set a flag and don't | |||||
2061 | // emit an error if we were going to ignore the line, anyway. | |||||
2062 | long_line = true; | |||||
2063 | ||||||
2064 | #define CHECK_LINEif (long_line) { for (i = 0; i <= num_records; i++) { ___kmp_free ((threadInfo[i]), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2064); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2064);; *msg_id = kmp_i18n_str_LongLineCpuinfo; return -1; } \ | |||||
2065 | if (long_line) { \ | |||||
2066 | CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo [i]), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2066); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2066);; \ | |||||
2067 | *msg_id = kmp_i18n_str_LongLineCpuinfo; \ | |||||
2068 | return -1; \ | |||||
2069 | } | |||||
2070 | } | |||||
2071 | (*line)++; | |||||
2072 | ||||||
2073 | char s1[] = "processor"; | |||||
2074 | if (strncmp(buf, s1, sizeof(s1) - 1) == 0) { | |||||
2075 | CHECK_LINEif (long_line) { for (i = 0; i <= num_records; i++) { ___kmp_free ((threadInfo[i]), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2075); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2075);; *msg_id = kmp_i18n_str_LongLineCpuinfo; return -1; }; | |||||
2076 | char *p = strchr(buf + sizeof(s1) - 1, ':'); | |||||
2077 | unsigned val; | |||||
2078 | if ((p == NULL__null) || (KMP_SSCANFsscanf(p + 1, "%u\n", &val) != 1)) | |||||
2079 | goto no_val; | |||||
2080 | if (threadInfo[num_avail][osIdIndex0] != UINT_MAX(2147483647 *2U +1U)) | |||||
2081 | #if KMP_ARCH_AARCH640 | |||||
2082 | // Handle the old AArch64 /proc/cpuinfo layout differently, | |||||
2083 | // it contains all of the 'processor' entries listed in a | |||||
2084 | // single 'Processor' section, therefore the normal looking | |||||
2085 | // for duplicates in that section will always fail. | |||||
2086 | num_avail++; | |||||
2087 | #else | |||||
2088 | goto dup_field; | |||||
2089 | #endif | |||||
2090 | threadInfo[num_avail][osIdIndex0] = val; | |||||
2091 | #if KMP_OS_LINUX1 && !(KMP_ARCH_X860 || KMP_ARCH_X86_641) | |||||
2092 | char path[256]; | |||||
2093 | KMP_SNPRINTFsnprintf( | |||||
2094 | path, sizeof(path), | |||||
2095 | "/sys/devices/system/cpu/cpu%u/topology/physical_package_id", | |||||
2096 | threadInfo[num_avail][osIdIndex0]); | |||||
2097 | __kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex3]); | |||||
2098 | ||||||
2099 | KMP_SNPRINTFsnprintf(path, sizeof(path), | |||||
2100 | "/sys/devices/system/cpu/cpu%u/topology/core_id", | |||||
2101 | threadInfo[num_avail][osIdIndex0]); | |||||
2102 | __kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex2]); | |||||
2103 | continue; | |||||
2104 | #else | |||||
2105 | } | |||||
2106 | char s2[] = "physical id"; | |||||
2107 | if (strncmp(buf, s2, sizeof(s2) - 1) == 0) { | |||||
2108 | CHECK_LINEif (long_line) { for (i = 0; i <= num_records; i++) { ___kmp_free ((threadInfo[i]), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2108); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2108);; *msg_id = kmp_i18n_str_LongLineCpuinfo; return -1; }; | |||||
2109 | char *p = strchr(buf + sizeof(s2) - 1, ':'); | |||||
2110 | unsigned val; | |||||
2111 | if ((p == NULL__null) || (KMP_SSCANFsscanf(p + 1, "%u\n", &val) != 1)) | |||||
2112 | goto no_val; | |||||
2113 | if (threadInfo[num_avail][pkgIdIndex3] != UINT_MAX(2147483647 *2U +1U)) | |||||
2114 | goto dup_field; | |||||
2115 | threadInfo[num_avail][pkgIdIndex3] = val; | |||||
2116 | continue; | |||||
2117 | } | |||||
2118 | char s3[] = "core id"; | |||||
2119 | if (strncmp(buf, s3, sizeof(s3) - 1) == 0) { | |||||
2120 | CHECK_LINEif (long_line) { for (i = 0; i <= num_records; i++) { ___kmp_free ((threadInfo[i]), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2120); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2120);; *msg_id = kmp_i18n_str_LongLineCpuinfo; return -1; }; | |||||
2121 | char *p = strchr(buf + sizeof(s3) - 1, ':'); | |||||
2122 | unsigned val; | |||||
2123 | if ((p == NULL__null) || (KMP_SSCANFsscanf(p + 1, "%u\n", &val) != 1)) | |||||
2124 | goto no_val; | |||||
2125 | if (threadInfo[num_avail][coreIdIndex2] != UINT_MAX(2147483647 *2U +1U)) | |||||
2126 | goto dup_field; | |||||
2127 | threadInfo[num_avail][coreIdIndex2] = val; | |||||
2128 | continue; | |||||
2129 | #endif // KMP_OS_LINUX && USE_SYSFS_INFO | |||||
2130 | } | |||||
2131 | char s4[] = "thread id"; | |||||
2132 | if (strncmp(buf, s4, sizeof(s4) - 1) == 0) { | |||||
2133 | CHECK_LINEif (long_line) { for (i = 0; i <= num_records; i++) { ___kmp_free ((threadInfo[i]), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2133); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2133);; *msg_id = kmp_i18n_str_LongLineCpuinfo; return -1; }; | |||||
2134 | char *p = strchr(buf + sizeof(s4) - 1, ':'); | |||||
2135 | unsigned val; | |||||
2136 | if ((p == NULL__null) || (KMP_SSCANFsscanf(p + 1, "%u\n", &val) != 1)) | |||||
2137 | goto no_val; | |||||
2138 | if (threadInfo[num_avail][threadIdIndex1] != UINT_MAX(2147483647 *2U +1U)) | |||||
2139 | goto dup_field; | |||||
2140 | threadInfo[num_avail][threadIdIndex1] = val; | |||||
2141 | continue; | |||||
2142 | } | |||||
2143 | unsigned level; | |||||
2144 | if (KMP_SSCANFsscanf(buf, "node_%u id", &level) == 1) { | |||||
2145 | CHECK_LINEif (long_line) { for (i = 0; i <= num_records; i++) { ___kmp_free ((threadInfo[i]), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2145); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2145);; *msg_id = kmp_i18n_str_LongLineCpuinfo; return -1; }; | |||||
2146 | char *p = strchr(buf + sizeof(s4) - 1, ':'); | |||||
2147 | unsigned val; | |||||
2148 | if ((p == NULL__null) || (KMP_SSCANFsscanf(p + 1, "%u\n", &val) != 1)) | |||||
2149 | goto no_val; | |||||
2150 | KMP_ASSERT(nodeIdIndex + level <= maxIndex)if (!(4 + level <= maxIndex)) { __kmp_debug_assert("nodeIdIndex + level <= maxIndex" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2150); }; | |||||
2151 | if (threadInfo[num_avail][nodeIdIndex4 + level] != UINT_MAX(2147483647 *2U +1U)) | |||||
2152 | goto dup_field; | |||||
2153 | threadInfo[num_avail][nodeIdIndex4 + level] = val; | |||||
2154 | continue; | |||||
2155 | } | |||||
2156 | ||||||
2157 | // We didn't recognize the leading token on the line. There are lots of | |||||
2158 | // leading tokens that we don't recognize - if the line isn't empty, go on | |||||
2159 | // to the next line. | |||||
2160 | if ((*buf != 0) && (*buf != '\n')) { | |||||
2161 | // If the line is longer than the buffer, read characters | |||||
2162 | // until we find a newline. | |||||
2163 | if (long_line) { | |||||
2164 | int ch; | |||||
2165 | while (((ch = fgetc(f)) != EOF(-1)) && (ch != '\n')) | |||||
2166 | ; | |||||
2167 | } | |||||
2168 | continue; | |||||
2169 | } | |||||
2170 | ||||||
2171 | // A newline has signalled the end of the processor record. | |||||
2172 | // Check that there aren't too many procs specified. | |||||
2173 | if ((int)num_avail == __kmp_xproc) { | |||||
2174 | CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo [i]), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2174); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2174);; | |||||
2175 | *msg_id = kmp_i18n_str_TooManyEntries; | |||||
2176 | return -1; | |||||
2177 | } | |||||
2178 | ||||||
2179 | // Check for missing fields. The osId field must be there, and we | |||||
2180 | // currently require that the physical id field is specified, also. | |||||
2181 | if (threadInfo[num_avail][osIdIndex0] == UINT_MAX(2147483647 *2U +1U)) { | |||||
2182 | CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo [i]), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2182); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2182);; | |||||
2183 | *msg_id = kmp_i18n_str_MissingProcField; | |||||
2184 | return -1; | |||||
2185 | } | |||||
2186 | if (threadInfo[0][pkgIdIndex3] == UINT_MAX(2147483647 *2U +1U)) { | |||||
2187 | CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo [i]), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2187); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2187);; | |||||
2188 | *msg_id = kmp_i18n_str_MissingPhysicalIDField; | |||||
2189 | return -1; | |||||
2190 | } | |||||
2191 | ||||||
2192 | // Skip this proc if it is not included in the machine model. | |||||
2193 | if (!KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex],(__kmp_affin_fullMask)->is_set(threadInfo[num_avail][0]) | |||||
2194 | __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(threadInfo[num_avail][0])) { | |||||
2195 | INIT_PROC_INFO(threadInfo[num_avail])for (__index = 0; __index <= maxIndex; __index++) { (threadInfo [num_avail])[__index] = (2147483647 *2U +1U); }; | |||||
2196 | continue; | |||||
2197 | } | |||||
2198 | ||||||
2199 | // We have a successful parse of this proc's info. | |||||
2200 | // Increment the counter, and prepare for the next proc. | |||||
2201 | num_avail++; | |||||
2202 | KMP_ASSERT(num_avail <= num_records)if (!(num_avail <= num_records)) { __kmp_debug_assert("num_avail <= num_records" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2202); }; | |||||
2203 | INIT_PROC_INFO(threadInfo[num_avail])for (__index = 0; __index <= maxIndex; __index++) { (threadInfo [num_avail])[__index] = (2147483647 *2U +1U); }; | |||||
2204 | } | |||||
2205 | continue; | |||||
2206 | ||||||
2207 | no_val: | |||||
2208 | CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo [i]), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2208); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2208);; | |||||
2209 | *msg_id = kmp_i18n_str_MissingValCpuinfo; | |||||
2210 | return -1; | |||||
2211 | ||||||
2212 | dup_field: | |||||
2213 | CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo [i]), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2213); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2213);; | |||||
2214 | *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo; | |||||
2215 | return -1; | |||||
2216 | } | |||||
2217 | *line = 0; | |||||
2218 | ||||||
2219 | #if KMP_MIC0 && REDUCE_TEAM_SIZE | |||||
2220 | unsigned teamSize = 0; | |||||
2221 | #endif // KMP_MIC && REDUCE_TEAM_SIZE | |||||
2222 | ||||||
2223 | // check for num_records == __kmp_xproc ??? | |||||
2224 | ||||||
2225 | // If there's only one thread context to bind to, form an Address object with | |||||
2226 | // depth 1 and return immediately (or, if affinity is off, set address2os to | |||||
2227 | // NULL and return). | |||||
2228 | // | |||||
2229 | // If it is configured to omit the package level when there is only a single | |||||
2230 | // package, the logic at the end of this routine won't work if there is only a | |||||
2231 | // single thread - it would try to form an Address object with depth 0. | |||||
2232 | KMP_ASSERT(num_avail > 0)if (!(num_avail > 0)) { __kmp_debug_assert("num_avail > 0" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2232); }; | |||||
2233 | KMP_ASSERT(num_avail <= num_records)if (!(num_avail <= num_records)) { __kmp_debug_assert("num_avail <= num_records" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2233); }; | |||||
2234 | if (num_avail == 1) { | |||||
2235 | __kmp_ncores = 1; | |||||
2236 | __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1; | |||||
2237 | if (__kmp_affinity_verbose) { | |||||
2238 | if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) { | |||||
2239 | KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffNotCapableUseCpuinfo , "KMP_AFFINITY"), __kmp_msg_null); | |||||
2240 | KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc , "KMP_AFFINITY", __kmp_avail_proc), __kmp_msg_null); | |||||
2241 | KMP_INFORM(Uniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Uniform , "KMP_AFFINITY"), __kmp_msg_null); | |||||
2242 | } else { | |||||
2243 | char buf[KMP_AFFIN_MASK_PRINT_LEN1024]; | |||||
2244 | __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN1024, | |||||
2245 | __kmp_affin_fullMask); | |||||
2246 | KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffCapableUseCpuinfo , "KMP_AFFINITY"), __kmp_msg_null); | |||||
2247 | if (__kmp_affinity_respect_mask) { | |||||
2248 | KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetRespect , "KMP_AFFINITY", buf), __kmp_msg_null); | |||||
2249 | } else { | |||||
2250 | KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetNotRespect , "KMP_AFFINITY", buf), __kmp_msg_null); | |||||
2251 | } | |||||
2252 | KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc , "KMP_AFFINITY", __kmp_avail_proc), __kmp_msg_null); | |||||
2253 | KMP_INFORM(Uniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Uniform , "KMP_AFFINITY"), __kmp_msg_null); | |||||
2254 | } | |||||
2255 | int index; | |||||
2256 | kmp_str_buf_t buf; | |||||
2257 | __kmp_str_buf_init(&buf){ (&buf)->str = (&buf)->bulk; (&buf)->size = sizeof((&buf)->bulk); (&buf)->used = 0; (& buf)->bulk[0] = 0; }; | |||||
2258 | __kmp_str_buf_print(&buf, "1"); | |||||
2259 | for (index = maxIndex - 1; index > pkgIdIndex3; index--) { | |||||
2260 | __kmp_str_buf_print(&buf, " x 1"); | |||||
2261 | } | |||||
2262 | KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyExtra , "KMP_AFFINITY", buf.str, 1, 1, 1), __kmp_msg_null); | |||||
2263 | __kmp_str_buf_free(&buf); | |||||
2264 | } | |||||
2265 | ||||||
2266 | if (__kmp_affinity_type == affinity_none) { | |||||
2267 | CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo [i]), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2267); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2267);; | |||||
2268 | return 0; | |||||
2269 | } | |||||
2270 | ||||||
2271 | *address2os = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair))___kmp_allocate((sizeof(AddrUnsPair)), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2271); | |||||
2272 | Address addr(1); | |||||
2273 | addr.labels[0] = threadInfo[0][pkgIdIndex3]; | |||||
2274 | (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex0]); | |||||
2275 | ||||||
2276 | if (__kmp_affinity_gran_levels < 0) { | |||||
2277 | __kmp_affinity_gran_levels = 0; | |||||
2278 | } | |||||
2279 | ||||||
2280 | if (__kmp_affinity_verbose) { | |||||
2281 | __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1); | |||||
2282 | } | |||||
2283 | ||||||
2284 | CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo [i]), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2284); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2284);; | |||||
2285 | return 1; | |||||
2286 | } | |||||
2287 | ||||||
2288 | // Sort the threadInfo table by physical Id. | |||||
2289 | qsort(threadInfo, num_avail, sizeof(*threadInfo), | |||||
2290 | __kmp_affinity_cmp_ProcCpuInfo_phys_id); | |||||
2291 | ||||||
2292 | // The table is now sorted by pkgId / coreId / threadId, but we really don't | |||||
2293 | // know the radix of any of the fields. pkgId's may be sparsely assigned among | |||||
2294 | // the chips on a system. Although coreId's are usually assigned | |||||
2295 | // [0 .. coresPerPkg-1] and threadId's are usually assigned | |||||
2296 | // [0..threadsPerCore-1], we don't want to make any such assumptions. | |||||
2297 | // | |||||
2298 | // For that matter, we don't know what coresPerPkg and threadsPerCore (or the | |||||
2299 | // total # packages) are at this point - we want to determine that now. We | |||||
2300 | // only have an upper bound on the first two figures. | |||||
2301 | unsigned *counts = | |||||
2302 | (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned))___kmp_allocate(((maxIndex + 1) * sizeof(unsigned)), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2302); | |||||
2303 | unsigned *maxCt = | |||||
2304 | (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned))___kmp_allocate(((maxIndex + 1) * sizeof(unsigned)), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2304); | |||||
2305 | unsigned *totals = | |||||
2306 | (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned))___kmp_allocate(((maxIndex + 1) * sizeof(unsigned)), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2306); | |||||
2307 | unsigned *lastId = | |||||
2308 | (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned))___kmp_allocate(((maxIndex + 1) * sizeof(unsigned)), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2308); | |||||
2309 | ||||||
2310 | bool assign_thread_ids = false; | |||||
2311 | unsigned threadIdCt; | |||||
2312 | unsigned index; | |||||
2313 | ||||||
2314 | restart_radix_check: | |||||
2315 | threadIdCt = 0; | |||||
2316 | ||||||
2317 | // Initialize the counter arrays with data from threadInfo[0]. | |||||
2318 | if (assign_thread_ids) { | |||||
2319 | if (threadInfo[0][threadIdIndex1] == UINT_MAX(2147483647 *2U +1U)) { | |||||
2320 | threadInfo[0][threadIdIndex1] = threadIdCt++; | |||||
2321 | } else if (threadIdCt <= threadInfo[0][threadIdIndex1]) { | |||||
2322 | threadIdCt = threadInfo[0][threadIdIndex1] + 1; | |||||
2323 | } | |||||
2324 | } | |||||
2325 | for (index = 0; index <= maxIndex; index++) { | |||||
2326 | counts[index] = 1; | |||||
2327 | maxCt[index] = 1; | |||||
2328 | totals[index] = 1; | |||||
2329 | lastId[index] = threadInfo[0][index]; | |||||
2330 | ; | |||||
2331 | } | |||||
2332 | ||||||
2333 | // Run through the rest of the OS procs. | |||||
2334 | for (i = 1; i < num_avail; i++) { | |||||
2335 | // Find the most significant index whose id differs from the id for the | |||||
2336 | // previous OS proc. | |||||
2337 | for (index = maxIndex; index >= threadIdIndex1; index--) { | |||||
2338 | if (assign_thread_ids && (index == threadIdIndex1)) { | |||||
2339 | // Auto-assign the thread id field if it wasn't specified. | |||||
2340 | if (threadInfo[i][threadIdIndex1] == UINT_MAX(2147483647 *2U +1U)) { | |||||
2341 | threadInfo[i][threadIdIndex1] = threadIdCt++; | |||||
2342 | } | |||||
2343 | // Apparently the thread id field was specified for some entries and not | |||||
2344 | // others. Start the thread id counter off at the next higher thread id. | |||||
2345 | else if (threadIdCt <= threadInfo[i][threadIdIndex1]) { | |||||
2346 | threadIdCt = threadInfo[i][threadIdIndex1] + 1; | |||||
2347 | } | |||||
2348 | } | |||||
2349 | if (threadInfo[i][index] != lastId[index]) { | |||||
2350 | // Run through all indices which are less significant, and reset the | |||||
2351 | // counts to 1. At all levels up to and including index, we need to | |||||
2352 | // increment the totals and record the last id. | |||||
2353 | unsigned index2; | |||||
2354 | for (index2 = threadIdIndex1; index2 < index; index2++) { | |||||
2355 | totals[index2]++; | |||||
2356 | if (counts[index2] > maxCt[index2]) { | |||||
2357 | maxCt[index2] = counts[index2]; | |||||
2358 | } | |||||
2359 | counts[index2] = 1; | |||||
2360 | lastId[index2] = threadInfo[i][index2]; | |||||
2361 | } | |||||
2362 | counts[index]++; | |||||
2363 | totals[index]++; | |||||
2364 | lastId[index] = threadInfo[i][index]; | |||||
2365 | ||||||
2366 | if (assign_thread_ids && (index > threadIdIndex1)) { | |||||
2367 | ||||||
2368 | #if KMP_MIC0 && REDUCE_TEAM_SIZE | |||||
2369 | // The default team size is the total #threads in the machine | |||||
2370 | // minus 1 thread for every core that has 3 or more threads. | |||||
2371 | teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1); | |||||
2372 | #endif // KMP_MIC && REDUCE_TEAM_SIZE | |||||
2373 | ||||||
2374 | // Restart the thread counter, as we are on a new core. | |||||
2375 | threadIdCt = 0; | |||||
2376 | ||||||
2377 | // Auto-assign the thread id field if it wasn't specified. | |||||
2378 | if (threadInfo[i][threadIdIndex1] == UINT_MAX(2147483647 *2U +1U)) { | |||||
2379 | threadInfo[i][threadIdIndex1] = threadIdCt++; | |||||
2380 | } | |||||
2381 | ||||||
2382 | // Aparrently the thread id field was specified for some entries and | |||||
2383 | // not others. Start the thread id counter off at the next higher | |||||
2384 | // thread id. | |||||
2385 | else if (threadIdCt <= threadInfo[i][threadIdIndex1]) { | |||||
2386 | threadIdCt = threadInfo[i][threadIdIndex1] + 1; | |||||
2387 | } | |||||
2388 | } | |||||
2389 | break; | |||||
2390 | } | |||||
2391 | } | |||||
2392 | if (index < threadIdIndex1) { | |||||
2393 | // If thread ids were specified, it is an error if they are not unique. | |||||
2394 | // Also, check that we waven't already restarted the loop (to be safe - | |||||
2395 | // shouldn't need to). | |||||
2396 | if ((threadInfo[i][threadIdIndex1] != UINT_MAX(2147483647 *2U +1U)) || assign_thread_ids) { | |||||
2397 | __kmp_free(lastId)___kmp_free((lastId), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2397); | |||||
2398 | __kmp_free(totals)___kmp_free((totals), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2398); | |||||
2399 | __kmp_free(maxCt)___kmp_free((maxCt), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2399); | |||||
2400 | __kmp_free(counts)___kmp_free((counts), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2400); | |||||
2401 | CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo [i]), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2401); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2401);; | |||||
2402 | *msg_id = kmp_i18n_str_PhysicalIDsNotUnique; | |||||
2403 | return -1; | |||||
2404 | } | |||||
2405 | ||||||
2406 | // If the thread ids were not specified and we see entries entries that | |||||
2407 | // are duplicates, start the loop over and assign the thread ids manually. | |||||
2408 | assign_thread_ids = true; | |||||
2409 | goto restart_radix_check; | |||||
2410 | } | |||||
2411 | } | |||||
2412 | ||||||
2413 | #if KMP_MIC0 && REDUCE_TEAM_SIZE | |||||
2414 | // The default team size is the total #threads in the machine | |||||
2415 | // minus 1 thread for every core that has 3 or more threads. | |||||
2416 | teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1); | |||||
2417 | #endif // KMP_MIC && REDUCE_TEAM_SIZE | |||||
2418 | ||||||
2419 | for (index = threadIdIndex1; index <= maxIndex; index++) { | |||||
2420 | if (counts[index] > maxCt[index]) { | |||||
2421 | maxCt[index] = counts[index]; | |||||
2422 | } | |||||
2423 | } | |||||
2424 | ||||||
2425 | __kmp_nThreadsPerCore = maxCt[threadIdIndex1]; | |||||
2426 | nCoresPerPkg = maxCt[coreIdIndex2]; | |||||
2427 | nPackages = totals[pkgIdIndex3]; | |||||
2428 | ||||||
2429 | // Check to see if the machine topology is uniform | |||||
2430 | unsigned prod = totals[maxIndex]; | |||||
2431 | for (index = threadIdIndex1; index < maxIndex; index++) { | |||||
2432 | prod *= maxCt[index]; | |||||
2433 | } | |||||
2434 | bool uniform = (prod == totals[threadIdIndex1]); | |||||
2435 | ||||||
2436 | // When affinity is off, this routine will still be called to set | |||||
2437 | // __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages. | |||||
2438 | // Make sure all these vars are set correctly, and return now if affinity is | |||||
2439 | // not enabled. | |||||
2440 | __kmp_ncores = totals[coreIdIndex2]; | |||||
2441 | ||||||
2442 | if (__kmp_affinity_verbose) { | |||||
2443 | if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) { | |||||
2444 | KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffNotCapableUseCpuinfo , "KMP_AFFINITY"), __kmp_msg_null); | |||||
2445 | KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc , "KMP_AFFINITY", __kmp_avail_proc), __kmp_msg_null); | |||||
2446 | if (uniform) { | |||||
2447 | KMP_INFORM(Uniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Uniform , "KMP_AFFINITY"), __kmp_msg_null); | |||||
2448 | } else { | |||||
2449 | KMP_INFORM(NonUniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_NonUniform , "KMP_AFFINITY"), __kmp_msg_null); | |||||
2450 | } | |||||
2451 | } else { | |||||
2452 | char buf[KMP_AFFIN_MASK_PRINT_LEN1024]; | |||||
2453 | __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN1024, | |||||
2454 | __kmp_affin_fullMask); | |||||
2455 | KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffCapableUseCpuinfo , "KMP_AFFINITY"), __kmp_msg_null); | |||||
2456 | if (__kmp_affinity_respect_mask) { | |||||
2457 | KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetRespect , "KMP_AFFINITY", buf), __kmp_msg_null); | |||||
2458 | } else { | |||||
2459 | KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetNotRespect , "KMP_AFFINITY", buf), __kmp_msg_null); | |||||
2460 | } | |||||
2461 | KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc , "KMP_AFFINITY", __kmp_avail_proc), __kmp_msg_null); | |||||
2462 | if (uniform) { | |||||
2463 | KMP_INFORM(Uniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Uniform , "KMP_AFFINITY"), __kmp_msg_null); | |||||
2464 | } else { | |||||
2465 | KMP_INFORM(NonUniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_NonUniform , "KMP_AFFINITY"), __kmp_msg_null); | |||||
2466 | } | |||||
2467 | } | |||||
2468 | kmp_str_buf_t buf; | |||||
2469 | __kmp_str_buf_init(&buf){ (&buf)->str = (&buf)->bulk; (&buf)->size = sizeof((&buf)->bulk); (&buf)->used = 0; (& buf)->bulk[0] = 0; }; | |||||
2470 | ||||||
2471 | __kmp_str_buf_print(&buf, "%d", totals[maxIndex]); | |||||
2472 | for (index = maxIndex - 1; index >= pkgIdIndex3; index--) { | |||||
2473 | __kmp_str_buf_print(&buf, " x %d", maxCt[index]); | |||||
2474 | } | |||||
2475 | KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyExtra , "KMP_AFFINITY", buf.str, maxCt[2], maxCt[1], __kmp_ncores), __kmp_msg_null) | |||||
2476 | maxCt[threadIdIndex], __kmp_ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyExtra , "KMP_AFFINITY", buf.str, maxCt[2], maxCt[1], __kmp_ncores), __kmp_msg_null); | |||||
2477 | ||||||
2478 | __kmp_str_buf_free(&buf); | |||||
2479 | } | |||||
2480 | ||||||
2481 | #if KMP_MIC0 && REDUCE_TEAM_SIZE | |||||
2482 | // Set the default team size. | |||||
2483 | if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) { | |||||
2484 | __kmp_dflt_team_nth = teamSize; | |||||
2485 | KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_affinity_create_cpuinfo_map: setting " "__kmp_dflt_team_nth = %d\n", __kmp_dflt_team_nth); } | |||||
2486 | "__kmp_dflt_team_nth = %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_affinity_create_cpuinfo_map: setting " "__kmp_dflt_team_nth = %d\n", __kmp_dflt_team_nth); } | |||||
2487 | __kmp_dflt_team_nth))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_affinity_create_cpuinfo_map: setting " "__kmp_dflt_team_nth = %d\n", __kmp_dflt_team_nth); }; | |||||
2488 | } | |||||
2489 | #endif // KMP_MIC && REDUCE_TEAM_SIZE | |||||
2490 | ||||||
2491 | KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL)if (!(__kmp_pu_os_idx == __null)) { __kmp_debug_assert("__kmp_pu_os_idx == __null" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2491); }; | |||||
2492 | KMP_DEBUG_ASSERT(num_avail == (unsigned)__kmp_avail_proc)if (!(num_avail == (unsigned)__kmp_avail_proc)) { __kmp_debug_assert ("num_avail == (unsigned)__kmp_avail_proc", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2492); }; | |||||
2493 | __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc)___kmp_allocate((sizeof(int) * __kmp_avail_proc), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2493); | |||||
2494 | for (i = 0; i < num_avail; ++i) { // fill the os indices | |||||
2495 | __kmp_pu_os_idx[i] = threadInfo[i][osIdIndex0]; | |||||
2496 | } | |||||
2497 | ||||||
2498 | if (__kmp_affinity_type == affinity_none) { | |||||
2499 | __kmp_free(lastId)___kmp_free((lastId), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2499); | |||||
2500 | __kmp_free(totals)___kmp_free((totals), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2500); | |||||
2501 | __kmp_free(maxCt)___kmp_free((maxCt), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2501); | |||||
2502 | __kmp_free(counts)___kmp_free((counts), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2502); | |||||
2503 | CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo [i]), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2503); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2503);; | |||||
2504 | return 0; | |||||
2505 | } | |||||
2506 | ||||||
2507 | // Count the number of levels which have more nodes at that level than at the | |||||
2508 | // parent's level (with there being an implicit root node of the top level). | |||||
2509 | // This is equivalent to saying that there is at least one node at this level | |||||
2510 | // which has a sibling. These levels are in the map, and the package level is | |||||
2511 | // always in the map. | |||||
2512 | bool *inMap = (bool *)__kmp_allocate((maxIndex + 1) * sizeof(bool))___kmp_allocate(((maxIndex + 1) * sizeof(bool)), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2512); | |||||
2513 | for (index = threadIdIndex1; index < maxIndex; index++) { | |||||
2514 | KMP_ASSERT(totals[index] >= totals[index + 1])if (!(totals[index] >= totals[index + 1])) { __kmp_debug_assert ("totals[index] >= totals[index + 1]", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2514); }; | |||||
2515 | inMap[index] = (totals[index] > totals[index + 1]); | |||||
2516 | } | |||||
2517 | inMap[maxIndex] = (totals[maxIndex] > 1); | |||||
2518 | inMap[pkgIdIndex3] = true; | |||||
2519 | ||||||
2520 | int depth = 0; | |||||
2521 | for (index = threadIdIndex1; index <= maxIndex; index++) { | |||||
2522 | if (inMap[index]) { | |||||
2523 | depth++; | |||||
2524 | } | |||||
2525 | } | |||||
2526 | KMP_ASSERT(depth > 0)if (!(depth > 0)) { __kmp_debug_assert("depth > 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2526); }; | |||||
2527 | ||||||
2528 | // Construct the data structure that is to be returned. | |||||
2529 | *address2os = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * num_avail)___kmp_allocate((sizeof(AddrUnsPair) * num_avail), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2529); | |||||
2530 | int pkgLevel = -1; | |||||
2531 | int coreLevel = -1; | |||||
2532 | int threadLevel = -1; | |||||
2533 | ||||||
2534 | for (i = 0; i < num_avail; ++i) { | |||||
2535 | Address addr(depth); | |||||
2536 | unsigned os = threadInfo[i][osIdIndex0]; | |||||
2537 | int src_index; | |||||
2538 | int dst_index = 0; | |||||
2539 | ||||||
2540 | for (src_index = maxIndex; src_index >= threadIdIndex1; src_index--) { | |||||
2541 | if (!inMap[src_index]) { | |||||
2542 | continue; | |||||
2543 | } | |||||
2544 | addr.labels[dst_index] = threadInfo[i][src_index]; | |||||
2545 | if (src_index == pkgIdIndex3) { | |||||
2546 | pkgLevel = dst_index; | |||||
2547 | } else if (src_index == coreIdIndex2) { | |||||
2548 | coreLevel = dst_index; | |||||
2549 | } else if (src_index == threadIdIndex1) { | |||||
2550 | threadLevel = dst_index; | |||||
2551 | } | |||||
2552 | dst_index++; | |||||
2553 | } | |||||
2554 | (*address2os)[i] = AddrUnsPair(addr, os); | |||||
2555 | } | |||||
2556 | ||||||
2557 | if (__kmp_affinity_gran_levels < 0) { | |||||
2558 | // Set the granularity level based on what levels are modeled | |||||
2559 | // in the machine topology map. | |||||
2560 | unsigned src_index; | |||||
2561 | __kmp_affinity_gran_levels = 0; | |||||
2562 | for (src_index = threadIdIndex1; src_index <= maxIndex; src_index++) { | |||||
2563 | if (!inMap[src_index]) { | |||||
2564 | continue; | |||||
2565 | } | |||||
2566 | switch (src_index) { | |||||
2567 | case threadIdIndex1: | |||||
2568 | if (__kmp_affinity_gran > affinity_gran_thread) { | |||||
2569 | __kmp_affinity_gran_levels++; | |||||
2570 | } | |||||
2571 | ||||||
2572 | break; | |||||
2573 | case coreIdIndex2: | |||||
2574 | if (__kmp_affinity_gran > affinity_gran_core) { | |||||
2575 | __kmp_affinity_gran_levels++; | |||||
2576 | } | |||||
2577 | break; | |||||
2578 | ||||||
2579 | case pkgIdIndex3: | |||||
2580 | if (__kmp_affinity_gran > affinity_gran_package) { | |||||
2581 | __kmp_affinity_gran_levels++; | |||||
2582 | } | |||||
2583 | break; | |||||
2584 | } | |||||
2585 | } | |||||
2586 | } | |||||
2587 | ||||||
2588 | if (__kmp_affinity_verbose) { | |||||
2589 | __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel, | |||||
2590 | coreLevel, threadLevel); | |||||
2591 | } | |||||
2592 | ||||||
2593 | __kmp_free(inMap)___kmp_free((inMap), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2593); | |||||
2594 | __kmp_free(lastId)___kmp_free((lastId), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2594); | |||||
2595 | __kmp_free(totals)___kmp_free((totals), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2595); | |||||
2596 | __kmp_free(maxCt)___kmp_free((maxCt), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2596); | |||||
2597 | __kmp_free(counts)___kmp_free((counts), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2597); | |||||
2598 | CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo [i]), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2598); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2598);; | |||||
2599 | return depth; | |||||
2600 | } | |||||
2601 | ||||||
2602 | // Create and return a table of affinity masks, indexed by OS thread ID. | |||||
2603 | // This routine handles OR'ing together all the affinity masks of threads | |||||
2604 | // that are sufficiently close, if granularity > fine. | |||||
2605 | static kmp_affin_mask_t *__kmp_create_masks(unsigned *maxIndex, | |||||
2606 | unsigned *numUnique, | |||||
2607 | AddrUnsPair *address2os, | |||||
2608 | unsigned numAddrs) { | |||||
2609 | // First form a table of affinity masks in order of OS thread id. | |||||
2610 | unsigned depth; | |||||
2611 | unsigned maxOsId; | |||||
2612 | unsigned i; | |||||
2613 | ||||||
2614 | KMP_ASSERT(numAddrs > 0)if (!(numAddrs > 0)) { __kmp_debug_assert("numAddrs > 0" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2614); }; | |||||
2615 | depth = address2os[0].first.depth; | |||||
2616 | ||||||
2617 | maxOsId = 0; | |||||
2618 | for (i = numAddrs - 1;; --i) { | |||||
2619 | unsigned osId = address2os[i].second; | |||||
2620 | if (osId > maxOsId) { | |||||
2621 | maxOsId = osId; | |||||
2622 | } | |||||
2623 | if (i == 0) | |||||
2624 | break; | |||||
2625 | } | |||||
2626 | kmp_affin_mask_t *osId2Mask; | |||||
2627 | KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId + 1))(osId2Mask = __kmp_affinity_dispatch->allocate_mask_array( (maxOsId + 1))); | |||||
2628 | ||||||
2629 | // Sort the address2os table according to physical order. Doing so will put | |||||
2630 | // all threads on the same core/package/node in consecutive locations. | |||||
2631 | qsort(address2os, numAddrs, sizeof(*address2os), | |||||
2632 | __kmp_affinity_cmp_Address_labels); | |||||
2633 | ||||||
2634 | KMP_ASSERT(__kmp_affinity_gran_levels >= 0)if (!(__kmp_affinity_gran_levels >= 0)) { __kmp_debug_assert ("__kmp_affinity_gran_levels >= 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2634); }; | |||||
2635 | if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) { | |||||
2636 | KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_ThreadsMigrate , "KMP_AFFINITY", __kmp_affinity_gran_levels), __kmp_msg_null ); | |||||
2637 | } | |||||
2638 | if (__kmp_affinity_gran_levels >= (int)depth) { | |||||
2639 | if (__kmp_affinity_verbose || | |||||
2640 | (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) { | |||||
2641 | KMP_WARNING(AffThreadsMayMigrate)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffThreadsMayMigrate ), __kmp_msg_null); | |||||
2642 | } | |||||
2643 | } | |||||
2644 | ||||||
2645 | // Run through the table, forming the masks for all threads on each core. | |||||
2646 | // Threads on the same core will have identical "Address" objects, not | |||||
2647 | // considering the last level, which must be the thread id. All threads on a | |||||
2648 | // core will appear consecutively. | |||||
2649 | unsigned unique = 0; | |||||
2650 | unsigned j = 0; // index of 1st thread on core | |||||
2651 | unsigned leader = 0; | |||||
2652 | Address *leaderAddr = &(address2os[0].first); | |||||
2653 | kmp_affin_mask_t *sum; | |||||
2654 | KMP_CPU_ALLOC_ON_STACK(sum)(sum = __kmp_affinity_dispatch->allocate_mask()); | |||||
2655 | KMP_CPU_ZERO(sum)(sum)->zero(); | |||||
2656 | KMP_CPU_SET(address2os[0].second, sum)(sum)->set(address2os[0].second); | |||||
2657 | for (i = 1; i < numAddrs; i++) { | |||||
2658 | // If this thread is sufficiently close to the leader (within the | |||||
2659 | // granularity setting), then set the bit for this os thread in the | |||||
2660 | // affinity mask for this group, and go on to the next thread. | |||||
2661 | if (leaderAddr->isClose(address2os[i].first, __kmp_affinity_gran_levels)) { | |||||
2662 | KMP_CPU_SET(address2os[i].second, sum)(sum)->set(address2os[i].second); | |||||
2663 | continue; | |||||
2664 | } | |||||
2665 | ||||||
2666 | // For every thread in this group, copy the mask to the thread's entry in | |||||
2667 | // the osId2Mask table. Mark the first address as a leader. | |||||
2668 | for (; j < i; j++) { | |||||
2669 | unsigned osId = address2os[j].second; | |||||
2670 | KMP_DEBUG_ASSERT(osId <= maxOsId)if (!(osId <= maxOsId)) { __kmp_debug_assert("osId <= maxOsId" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2670); }; | |||||
2671 | kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId)__kmp_affinity_dispatch->index_mask_array(osId2Mask, osId); | |||||
2672 | KMP_CPU_COPY(mask, sum)(mask)->copy(sum); | |||||
2673 | address2os[j].first.leader = (j == leader); | |||||
2674 | } | |||||
2675 | unique++; | |||||
2676 | ||||||
2677 | // Start a new mask. | |||||
2678 | leader = i; | |||||
2679 | leaderAddr = &(address2os[i].first); | |||||
2680 | KMP_CPU_ZERO(sum)(sum)->zero(); | |||||
2681 | KMP_CPU_SET(address2os[i].second, sum)(sum)->set(address2os[i].second); | |||||
2682 | } | |||||
2683 | ||||||
2684 | // For every thread in last group, copy the mask to the thread's | |||||
2685 | // entry in the osId2Mask table. | |||||
2686 | for (; j < i; j++) { | |||||
2687 | unsigned osId = address2os[j].second; | |||||
2688 | KMP_DEBUG_ASSERT(osId <= maxOsId)if (!(osId <= maxOsId)) { __kmp_debug_assert("osId <= maxOsId" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2688); }; | |||||
2689 | kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId)__kmp_affinity_dispatch->index_mask_array(osId2Mask, osId); | |||||
2690 | KMP_CPU_COPY(mask, sum)(mask)->copy(sum); | |||||
2691 | address2os[j].first.leader = (j == leader); | |||||
2692 | } | |||||
2693 | unique++; | |||||
2694 | KMP_CPU_FREE_FROM_STACK(sum)__kmp_affinity_dispatch->deallocate_mask(sum); | |||||
2695 | ||||||
2696 | *maxIndex = maxOsId; | |||||
2697 | *numUnique = unique; | |||||
2698 | return osId2Mask; | |||||
2699 | } | |||||
2700 | ||||||
2701 | // Stuff for the affinity proclist parsers. It's easier to declare these vars | |||||
2702 | // as file-static than to try and pass them through the calling sequence of | |||||
2703 | // the recursive-descent OMP_PLACES parser. | |||||
2704 | static kmp_affin_mask_t *newMasks; | |||||
2705 | static int numNewMasks; | |||||
2706 | static int nextNewMask; | |||||
2707 | ||||||
2708 | #define ADD_MASK(_mask) \ | |||||
2709 | { \ | |||||
2710 | if (nextNewMask >= numNewMasks) { \ | |||||
2711 | int i; \ | |||||
2712 | numNewMasks *= 2; \ | |||||
2713 | kmp_affin_mask_t *temp; \ | |||||
2714 | KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks)(temp = __kmp_affinity_dispatch->allocate_mask_array(numNewMasks )); \ | |||||
2715 | for (i = 0; i < numNewMasks / 2; i++) { \ | |||||
2716 | kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i)__kmp_affinity_dispatch->index_mask_array(newMasks, i); \ | |||||
2717 | kmp_affin_mask_t *dest = KMP_CPU_INDEX(temp, i)__kmp_affinity_dispatch->index_mask_array(temp, i); \ | |||||
2718 | KMP_CPU_COPY(dest, src)(dest)->copy(src); \ | |||||
2719 | } \ | |||||
2720 | KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks / 2)__kmp_affinity_dispatch->deallocate_mask_array(newMasks); \ | |||||
2721 | newMasks = temp; \ | |||||
2722 | } \ | |||||
2723 | KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask))(__kmp_affinity_dispatch->index_mask_array(newMasks, nextNewMask ))->copy((_mask)); \ | |||||
2724 | nextNewMask++; \ | |||||
2725 | } | |||||
2726 | ||||||
2727 | #define ADD_MASK_OSID(_osId, _osId2Mask, _maxOsId) \ | |||||
2728 | { \ | |||||
2729 | if (((_osId) > _maxOsId) || \ | |||||
2730 | (!KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId)))(__kmp_affinity_dispatch->index_mask_array((_osId2Mask), ( _osId)))->is_set((_osId)))) { \ | |||||
2731 | if (__kmp_affinity_verbose || \ | |||||
2732 | (__kmp_affinity_warnings && \ | |||||
2733 | (__kmp_affinity_type != affinity_none))) { \ | |||||
2734 | KMP_WARNING(AffIgnoreInvalidProcID, _osId)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffIgnoreInvalidProcID , _osId), __kmp_msg_null); \ | |||||
2735 | } \ | |||||
2736 | } else { \ | |||||
2737 | ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))__kmp_affinity_dispatch->index_mask_array(_osId2Mask, (_osId ))); \ | |||||
2738 | } \ | |||||
2739 | } | |||||
2740 | ||||||
2741 | // Re-parse the proclist (for the explicit affinity type), and form the list | |||||
2742 | // of affinity newMasks indexed by gtid. | |||||
2743 | static void __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks, | |||||
2744 | unsigned int *out_numMasks, | |||||
2745 | const char *proclist, | |||||
2746 | kmp_affin_mask_t *osId2Mask, | |||||
2747 | int maxOsId) { | |||||
2748 | int i; | |||||
2749 | const char *scan = proclist; | |||||
2750 | const char *next = proclist; | |||||
2751 | ||||||
2752 | // We use malloc() for the temporary mask vector, so that we can use | |||||
2753 | // realloc() to extend it. | |||||
2754 | numNewMasks = 2; | |||||
2755 | KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks)(newMasks = __kmp_affinity_dispatch->allocate_mask_array(numNewMasks )); | |||||
2756 | nextNewMask = 0; | |||||
2757 | kmp_affin_mask_t *sumMask; | |||||
2758 | KMP_CPU_ALLOC(sumMask)(sumMask = __kmp_affinity_dispatch->allocate_mask()); | |||||
2759 | int setSize = 0; | |||||
2760 | ||||||
2761 | for (;;) { | |||||
| ||||||
2762 | int start, end, stride; | |||||
2763 | ||||||
2764 | SKIP_WS(scan){ while (*(scan) == ' ' || *(scan) == '\t') (scan)++; }; | |||||
2765 | next = scan; | |||||
2766 | if (*next == '\0') { | |||||
2767 | break; | |||||
2768 | } | |||||
2769 | ||||||
2770 | if (*next == '{') { | |||||
2771 | int num; | |||||
2772 | setSize = 0; | |||||
2773 | next++; // skip '{' | |||||
2774 | SKIP_WS(next){ while (*(next) == ' ' || *(next) == '\t') (next)++; }; | |||||
2775 | scan = next; | |||||
2776 | ||||||
2777 | // Read the first integer in the set. | |||||
2778 | KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad proclist")if (!((*next >= '0') && (*next <= '9'))) { __kmp_debug_assert (("bad proclist"), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2778); }; | |||||
2779 | SKIP_DIGITS(next){ while (*(next) >= '0' && *(next) <= '9') (next )++; }; | |||||
2780 | num = __kmp_str_to_int(scan, *next); | |||||
2781 | KMP_ASSERT2(num >= 0, "bad explicit proc list")if (!(num >= 0)) { __kmp_debug_assert(("bad explicit proc list" ), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2781); }; | |||||
2782 | ||||||
2783 | // Copy the mask for that osId to the sum (union) mask. | |||||
2784 | if ((num > maxOsId) || | |||||
2785 | (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num))(__kmp_affinity_dispatch->index_mask_array(osId2Mask, num) )->is_set(num))) { | |||||
2786 | if (__kmp_affinity_verbose || | |||||
2787 | (__kmp_affinity_warnings && | |||||
2788 | (__kmp_affinity_type != affinity_none))) { | |||||
2789 | KMP_WARNING(AffIgnoreInvalidProcID, num)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffIgnoreInvalidProcID , num), __kmp_msg_null); | |||||
2790 | } | |||||
2791 | KMP_CPU_ZERO(sumMask)(sumMask)->zero(); | |||||
2792 | } else { | |||||
2793 | KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num))(sumMask)->copy(__kmp_affinity_dispatch->index_mask_array (osId2Mask, num)); | |||||
2794 | setSize = 1; | |||||
2795 | } | |||||
2796 | ||||||
2797 | for (;;) { | |||||
2798 | // Check for end of set. | |||||
2799 | SKIP_WS(next){ while (*(next) == ' ' || *(next) == '\t') (next)++; }; | |||||
2800 | if (*next == '}') { | |||||
2801 | next++; // skip '}' | |||||
2802 | break; | |||||
2803 | } | |||||
2804 | ||||||
2805 | // Skip optional comma. | |||||
2806 | if (*next == ',') { | |||||
2807 | next++; | |||||
2808 | } | |||||
2809 | SKIP_WS(next){ while (*(next) == ' ' || *(next) == '\t') (next)++; }; | |||||
2810 | ||||||
2811 | // Read the next integer in the set. | |||||
2812 | scan = next; | |||||
2813 | KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list")if (!((*next >= '0') && (*next <= '9'))) { __kmp_debug_assert (("bad explicit proc list"), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2813); }; | |||||
2814 | ||||||
2815 | SKIP_DIGITS(next){ while (*(next) >= '0' && *(next) <= '9') (next )++; }; | |||||
2816 | num = __kmp_str_to_int(scan, *next); | |||||
2817 | KMP_ASSERT2(num >= 0, "bad explicit proc list")if (!(num >= 0)) { __kmp_debug_assert(("bad explicit proc list" ), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2817); }; | |||||
2818 | ||||||
2819 | // Add the mask for that osId to the sum mask. | |||||
2820 | if ((num > maxOsId) || | |||||
2821 | (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num))(__kmp_affinity_dispatch->index_mask_array(osId2Mask, num) )->is_set(num))) { | |||||
2822 | if (__kmp_affinity_verbose || | |||||
2823 | (__kmp_affinity_warnings && | |||||
2824 | (__kmp_affinity_type != affinity_none))) { | |||||
2825 | KMP_WARNING(AffIgnoreInvalidProcID, num)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffIgnoreInvalidProcID , num), __kmp_msg_null); | |||||
2826 | } | |||||
2827 | } else { | |||||
2828 | KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num))(sumMask)->bitwise_or(__kmp_affinity_dispatch->index_mask_array (osId2Mask, num)); | |||||
2829 | setSize++; | |||||
2830 | } | |||||
2831 | } | |||||
2832 | if (setSize > 0) { | |||||
2833 | ADD_MASK(sumMask); | |||||
2834 | } | |||||
2835 | ||||||
2836 | SKIP_WS(next){ while (*(next) == ' ' || *(next) == '\t') (next)++; }; | |||||
2837 | if (*next == ',') { | |||||
2838 | next++; | |||||
2839 | } | |||||
2840 | scan = next; | |||||
2841 | continue; | |||||
2842 | } | |||||
2843 | ||||||
2844 | // Read the first integer. | |||||
2845 | KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list")if (!((*next >= '0') && (*next <= '9'))) { __kmp_debug_assert (("bad explicit proc list"), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2845); }; | |||||
2846 | SKIP_DIGITS(next){ while (*(next) >= '0' && *(next) <= '9') (next )++; }; | |||||
2847 | start = __kmp_str_to_int(scan, *next); | |||||
2848 | KMP_ASSERT2(start >= 0, "bad explicit proc list")if (!(start >= 0)) { __kmp_debug_assert(("bad explicit proc list" ), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2848); }; | |||||
2849 | SKIP_WS(next){ while (*(next) == ' ' || *(next) == '\t') (next)++; }; | |||||
2850 | ||||||
2851 | // If this isn't a range, then add a mask to the list and go on. | |||||
2852 | if (*next != '-') { | |||||
2853 | ADD_MASK_OSID(start, osId2Mask, maxOsId); | |||||
2854 | ||||||
2855 | // Skip optional comma. | |||||
2856 | if (*next == ',') { | |||||
2857 | next++; | |||||
2858 | } | |||||
2859 | scan = next; | |||||
2860 | continue; | |||||
2861 | } | |||||
2862 | ||||||
2863 | // This is a range. Skip over the '-' and read in the 2nd int. | |||||
2864 | next++; // skip '-' | |||||
2865 | SKIP_WS(next){ while (*(next) == ' ' || *(next) == '\t') (next)++; }; | |||||
2866 | scan = next; | |||||
2867 | KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list")if (!((*next >= '0') && (*next <= '9'))) { __kmp_debug_assert (("bad explicit proc list"), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2867); }; | |||||
2868 | SKIP_DIGITS(next){ while (*(next) >= '0' && *(next) <= '9') (next )++; }; | |||||
2869 | end = __kmp_str_to_int(scan, *next); | |||||
2870 | KMP_ASSERT2(end >= 0, "bad explicit proc list")if (!(end >= 0)) { __kmp_debug_assert(("bad explicit proc list" ), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2870); }; | |||||
2871 | ||||||
2872 | // Check for a stride parameter | |||||
2873 | stride = 1; | |||||
2874 | SKIP_WS(next){ while (*(next) == ' ' || *(next) == '\t') (next)++; }; | |||||
2875 | if (*next == ':') { | |||||
2876 | // A stride is specified. Skip over the ':" and read the 3rd int. | |||||
2877 | int sign = +1; | |||||
2878 | next++; // skip ':' | |||||
2879 | SKIP_WS(next){ while (*(next) == ' ' || *(next) == '\t') (next)++; }; | |||||
2880 | scan = next; | |||||
2881 | if (*next == '-') { | |||||
2882 | sign = -1; | |||||
2883 | next++; | |||||
2884 | SKIP_WS(next){ while (*(next) == ' ' || *(next) == '\t') (next)++; }; | |||||
2885 | scan = next; | |||||
2886 | } | |||||
2887 | KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list")if (!((*next >= '0') && (*next <= '9'))) { __kmp_debug_assert (("bad explicit proc list"), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2887); }; | |||||
2888 | SKIP_DIGITS(next){ while (*(next) >= '0' && *(next) <= '9') (next )++; }; | |||||
2889 | stride = __kmp_str_to_int(scan, *next); | |||||
2890 | KMP_ASSERT2(stride >= 0, "bad explicit proc list")if (!(stride >= 0)) { __kmp_debug_assert(("bad explicit proc list" ), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2890); }; | |||||
2891 | stride *= sign; | |||||
2892 | } | |||||
2893 | ||||||
2894 | // Do some range checks. | |||||
2895 | KMP_ASSERT2(stride != 0, "bad explicit proc list")if (!(stride != 0)) { __kmp_debug_assert(("bad explicit proc list" ), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2895); }; | |||||
2896 | if (stride > 0) { | |||||
2897 | KMP_ASSERT2(start <= end, "bad explicit proc list")if (!(start <= end)) { __kmp_debug_assert(("bad explicit proc list" ), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2897); }; | |||||
2898 | } else { | |||||
2899 | KMP_ASSERT2(start >= end, "bad explicit proc list")if (!(start >= end)) { __kmp_debug_assert(("bad explicit proc list" ), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2899); }; | |||||
2900 | } | |||||
2901 | KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list")if (!((end - start) / stride <= 65536)) { __kmp_debug_assert (("bad explicit proc list"), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2901); }; | |||||
| ||||||
2902 | ||||||
2903 | // Add the mask for each OS proc # to the list. | |||||
2904 | if (stride > 0) { | |||||
2905 | do { | |||||
2906 | ADD_MASK_OSID(start, osId2Mask, maxOsId); | |||||
2907 | start += stride; | |||||
2908 | } while (start <= end); | |||||
2909 | } else { | |||||
2910 | do { | |||||
2911 | ADD_MASK_OSID(start, osId2Mask, maxOsId); | |||||
2912 | start += stride; | |||||
2913 | } while (start >= end); | |||||
2914 | } | |||||
2915 | ||||||
2916 | // Skip optional comma. | |||||
2917 | SKIP_WS(next){ while (*(next) == ' ' || *(next) == '\t') (next)++; }; | |||||
2918 | if (*next == ',') { | |||||
2919 | next++; | |||||
2920 | } | |||||
2921 | scan = next; | |||||
2922 | } | |||||
2923 | ||||||
2924 | *out_numMasks = nextNewMask; | |||||
2925 | if (nextNewMask == 0) { | |||||
2926 | *out_masks = NULL__null; | |||||
2927 | KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks)__kmp_affinity_dispatch->deallocate_mask_array(newMasks); | |||||
2928 | return; | |||||
2929 | } | |||||
2930 | KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask)((*out_masks) = __kmp_affinity_dispatch->allocate_mask_array (nextNewMask)); | |||||
2931 | for (i = 0; i < nextNewMask; i++) { | |||||
2932 | kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i)__kmp_affinity_dispatch->index_mask_array(newMasks, i); | |||||
2933 | kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i)__kmp_affinity_dispatch->index_mask_array((*out_masks), i); | |||||
2934 | KMP_CPU_COPY(dest, src)(dest)->copy(src); | |||||
2935 | } | |||||
2936 | KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks)__kmp_affinity_dispatch->deallocate_mask_array(newMasks); | |||||
2937 | KMP_CPU_FREE(sumMask)__kmp_affinity_dispatch->deallocate_mask(sumMask); | |||||
2938 | } | |||||
2939 | ||||||
2940 | #if OMP_40_ENABLED(50 >= 40) | |||||
2941 | ||||||
2942 | /*----------------------------------------------------------------------------- | |||||
2943 | Re-parse the OMP_PLACES proc id list, forming the newMasks for the different | |||||
2944 | places. Again, Here is the grammar: | |||||
2945 | ||||||
2946 | place_list := place | |||||
2947 | place_list := place , place_list | |||||
2948 | place := num | |||||
2949 | place := place : num | |||||
2950 | place := place : num : signed | |||||
2951 | place := { subplacelist } | |||||
2952 | place := ! place // (lowest priority) | |||||
2953 | subplace_list := subplace | |||||
2954 | subplace_list := subplace , subplace_list | |||||
2955 | subplace := num | |||||
2956 | subplace := num : num | |||||
2957 | subplace := num : num : signed | |||||
2958 | signed := num | |||||
2959 | signed := + signed | |||||
2960 | signed := - signed | |||||
2961 | -----------------------------------------------------------------------------*/ | |||||
2962 | ||||||
2963 | static void __kmp_process_subplace_list(const char **scan, | |||||
2964 | kmp_affin_mask_t *osId2Mask, | |||||
2965 | int maxOsId, kmp_affin_mask_t *tempMask, | |||||
2966 | int *setSize) { | |||||
2967 | const char *next; | |||||
2968 | ||||||
2969 | for (;;) { | |||||
2970 | int start, count, stride, i; | |||||
2971 | ||||||
2972 | // Read in the starting proc id | |||||
2973 | SKIP_WS(*scan){ while (*(*scan) == ' ' || *(*scan) == '\t') (*scan)++; }; | |||||
2974 | KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), "bad explicit places list")if (!((**scan >= '0') && (**scan <= '9'))) { __kmp_debug_assert (("bad explicit places list"), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2974); }; | |||||
2975 | next = *scan; | |||||
2976 | SKIP_DIGITS(next){ while (*(next) >= '0' && *(next) <= '9') (next )++; }; | |||||
2977 | start = __kmp_str_to_int(*scan, *next); | |||||
2978 | KMP_ASSERT(start >= 0)if (!(start >= 0)) { __kmp_debug_assert("start >= 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 2978); }; | |||||
2979 | *scan = next; | |||||
2980 | ||||||
2981 | // valid follow sets are ',' ':' and '}' | |||||
2982 | SKIP_WS(*scan){ while (*(*scan) == ' ' || *(*scan) == '\t') (*scan)++; }; | |||||
2983 | if (**scan == '}' || **scan == ',') { | |||||
2984 | if ((start > maxOsId) || | |||||
2985 | (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start))(__kmp_affinity_dispatch->index_mask_array(osId2Mask, start ))->is_set(start))) { | |||||
2986 | if (__kmp_affinity_verbose || | |||||
2987 | (__kmp_affinity_warnings && | |||||
2988 | (__kmp_affinity_type != affinity_none))) { | |||||
2989 | KMP_WARNING(AffIgnoreInvalidProcID, start)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffIgnoreInvalidProcID , start), __kmp_msg_null); | |||||
2990 | } | |||||
2991 | } else { | |||||
2992 | KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start))(tempMask)->bitwise_or(__kmp_affinity_dispatch->index_mask_array (osId2Mask, start)); | |||||
2993 | (*setSize)++; | |||||
2994 | } | |||||
2995 | if (**scan == '}') { | |||||
2996 | break; | |||||
2997 | } | |||||
2998 | (*scan)++; // skip ',' | |||||
2999 | continue; | |||||
3000 | } | |||||
3001 | KMP_ASSERT2(**scan == ':', "bad explicit places list")if (!(**scan == ':')) { __kmp_debug_assert(("bad explicit places list" ), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3001); }; | |||||
3002 | (*scan)++; // skip ':' | |||||
3003 | ||||||
3004 | // Read count parameter | |||||
3005 | SKIP_WS(*scan){ while (*(*scan) == ' ' || *(*scan) == '\t') (*scan)++; }; | |||||
3006 | KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), "bad explicit places list")if (!((**scan >= '0') && (**scan <= '9'))) { __kmp_debug_assert (("bad explicit places list"), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3006); }; | |||||
3007 | next = *scan; | |||||
3008 | SKIP_DIGITS(next){ while (*(next) >= '0' && *(next) <= '9') (next )++; }; | |||||
3009 | count = __kmp_str_to_int(*scan, *next); | |||||
3010 | KMP_ASSERT(count >= 0)if (!(count >= 0)) { __kmp_debug_assert("count >= 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3010); }; | |||||
3011 | *scan = next; | |||||
3012 | ||||||
3013 | // valid follow sets are ',' ':' and '}' | |||||
3014 | SKIP_WS(*scan){ while (*(*scan) == ' ' || *(*scan) == '\t') (*scan)++; }; | |||||
3015 | if (**scan == '}' || **scan == ',') { | |||||
3016 | for (i = 0; i < count; i++) { | |||||
3017 | if ((start > maxOsId) || | |||||
3018 | (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start))(__kmp_affinity_dispatch->index_mask_array(osId2Mask, start ))->is_set(start))) { | |||||
3019 | if (__kmp_affinity_verbose || | |||||
3020 | (__kmp_affinity_warnings && | |||||
3021 | (__kmp_affinity_type != affinity_none))) { | |||||
3022 | KMP_WARNING(AffIgnoreInvalidProcID, start)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffIgnoreInvalidProcID , start), __kmp_msg_null); | |||||
3023 | } | |||||
3024 | break; // don't proliferate warnings for large count | |||||
3025 | } else { | |||||
3026 | KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start))(tempMask)->bitwise_or(__kmp_affinity_dispatch->index_mask_array (osId2Mask, start)); | |||||
3027 | start++; | |||||
3028 | (*setSize)++; | |||||
3029 | } | |||||
3030 | } | |||||
3031 | if (**scan == '}') { | |||||
3032 | break; | |||||
3033 | } | |||||
3034 | (*scan)++; // skip ',' | |||||
3035 | continue; | |||||
3036 | } | |||||
3037 | KMP_ASSERT2(**scan == ':', "bad explicit places list")if (!(**scan == ':')) { __kmp_debug_assert(("bad explicit places list" ), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3037); }; | |||||
3038 | (*scan)++; // skip ':' | |||||
3039 | ||||||
3040 | // Read stride parameter | |||||
3041 | int sign = +1; | |||||
3042 | for (;;) { | |||||
3043 | SKIP_WS(*scan){ while (*(*scan) == ' ' || *(*scan) == '\t') (*scan)++; }; | |||||
3044 | if (**scan == '+') { | |||||
3045 | (*scan)++; // skip '+' | |||||
3046 | continue; | |||||
3047 | } | |||||
3048 | if (**scan == '-') { | |||||
3049 | sign *= -1; | |||||
3050 | (*scan)++; // skip '-' | |||||
3051 | continue; | |||||
3052 | } | |||||
3053 | break; | |||||
3054 | } | |||||
3055 | SKIP_WS(*scan){ while (*(*scan) == ' ' || *(*scan) == '\t') (*scan)++; }; | |||||
3056 | KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), "bad explicit places list")if (!((**scan >= '0') && (**scan <= '9'))) { __kmp_debug_assert (("bad explicit places list"), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3056); }; | |||||
3057 | next = *scan; | |||||
3058 | SKIP_DIGITS(next){ while (*(next) >= '0' && *(next) <= '9') (next )++; }; | |||||
3059 | stride = __kmp_str_to_int(*scan, *next); | |||||
3060 | KMP_ASSERT(stride >= 0)if (!(stride >= 0)) { __kmp_debug_assert("stride >= 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3060); }; | |||||
3061 | *scan = next; | |||||
3062 | stride *= sign; | |||||
3063 | ||||||
3064 | // valid follow sets are ',' and '}' | |||||
3065 | SKIP_WS(*scan){ while (*(*scan) == ' ' || *(*scan) == '\t') (*scan)++; }; | |||||
3066 | if (**scan == '}' || **scan == ',') { | |||||
3067 | for (i = 0; i < count; i++) { | |||||
3068 | if ((start > maxOsId) || | |||||
3069 | (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start))(__kmp_affinity_dispatch->index_mask_array(osId2Mask, start ))->is_set(start))) { | |||||
3070 | if (__kmp_affinity_verbose || | |||||
3071 | (__kmp_affinity_warnings && | |||||
3072 | (__kmp_affinity_type != affinity_none))) { | |||||
3073 | KMP_WARNING(AffIgnoreInvalidProcID, start)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffIgnoreInvalidProcID , start), __kmp_msg_null); | |||||
3074 | } | |||||
3075 | break; // don't proliferate warnings for large count | |||||
3076 | } else { | |||||
3077 | KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start))(tempMask)->bitwise_or(__kmp_affinity_dispatch->index_mask_array (osId2Mask, start)); | |||||
3078 | start += stride; | |||||
3079 | (*setSize)++; | |||||
3080 | } | |||||
3081 | } | |||||
3082 | if (**scan == '}') { | |||||
3083 | break; | |||||
3084 | } | |||||
3085 | (*scan)++; // skip ',' | |||||
3086 | continue; | |||||
3087 | } | |||||
3088 | ||||||
3089 | KMP_ASSERT2(0, "bad explicit places list")if (!(0)) { __kmp_debug_assert(("bad explicit places list"), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3089); }; | |||||
3090 | } | |||||
3091 | } | |||||
3092 | ||||||
3093 | static void __kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask, | |||||
3094 | int maxOsId, kmp_affin_mask_t *tempMask, | |||||
3095 | int *setSize) { | |||||
3096 | const char *next; | |||||
3097 | ||||||
3098 | // valid follow sets are '{' '!' and num | |||||
3099 | SKIP_WS(*scan){ while (*(*scan) == ' ' || *(*scan) == '\t') (*scan)++; }; | |||||
3100 | if (**scan == '{') { | |||||
3101 | (*scan)++; // skip '{' | |||||
3102 | __kmp_process_subplace_list(scan, osId2Mask, maxOsId, tempMask, setSize); | |||||
3103 | KMP_ASSERT2(**scan == '}', "bad explicit places list")if (!(**scan == '}')) { __kmp_debug_assert(("bad explicit places list" ), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3103); }; | |||||
3104 | (*scan)++; // skip '}' | |||||
3105 | } else if (**scan == '!') { | |||||
3106 | (*scan)++; // skip '!' | |||||
3107 | __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize); | |||||
3108 | KMP_CPU_COMPLEMENT(maxOsId, tempMask)(tempMask)->bitwise_not(); | |||||
3109 | } else if ((**scan >= '0') && (**scan <= '9')) { | |||||
3110 | next = *scan; | |||||
3111 | SKIP_DIGITS(next){ while (*(next) >= '0' && *(next) <= '9') (next )++; }; | |||||
3112 | int num = __kmp_str_to_int(*scan, *next); | |||||
3113 | KMP_ASSERT(num >= 0)if (!(num >= 0)) { __kmp_debug_assert("num >= 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3113); }; | |||||
3114 | if ((num > maxOsId) || | |||||
3115 | (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num))(__kmp_affinity_dispatch->index_mask_array(osId2Mask, num) )->is_set(num))) { | |||||
3116 | if (__kmp_affinity_verbose || | |||||
3117 | (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) { | |||||
3118 | KMP_WARNING(AffIgnoreInvalidProcID, num)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffIgnoreInvalidProcID , num), __kmp_msg_null); | |||||
3119 | } | |||||
3120 | } else { | |||||
3121 | KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num))(tempMask)->bitwise_or(__kmp_affinity_dispatch->index_mask_array (osId2Mask, num)); | |||||
3122 | (*setSize)++; | |||||
3123 | } | |||||
3124 | *scan = next; // skip num | |||||
3125 | } else { | |||||
3126 | KMP_ASSERT2(0, "bad explicit places list")if (!(0)) { __kmp_debug_assert(("bad explicit places list"), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3126); }; | |||||
3127 | } | |||||
3128 | } | |||||
3129 | ||||||
3130 | // static void | |||||
3131 | void __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks, | |||||
3132 | unsigned int *out_numMasks, | |||||
3133 | const char *placelist, | |||||
3134 | kmp_affin_mask_t *osId2Mask, | |||||
3135 | int maxOsId) { | |||||
3136 | int i, j, count, stride, sign; | |||||
3137 | const char *scan = placelist; | |||||
3138 | const char *next = placelist; | |||||
3139 | ||||||
3140 | numNewMasks = 2; | |||||
3141 | KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks)(newMasks = __kmp_affinity_dispatch->allocate_mask_array(numNewMasks )); | |||||
3142 | nextNewMask = 0; | |||||
3143 | ||||||
3144 | // tempMask is modified based on the previous or initial | |||||
3145 | // place to form the current place | |||||
3146 | // previousMask contains the previous place | |||||
3147 | kmp_affin_mask_t *tempMask; | |||||
3148 | kmp_affin_mask_t *previousMask; | |||||
3149 | KMP_CPU_ALLOC(tempMask)(tempMask = __kmp_affinity_dispatch->allocate_mask()); | |||||
3150 | KMP_CPU_ZERO(tempMask)(tempMask)->zero(); | |||||
3151 | KMP_CPU_ALLOC(previousMask)(previousMask = __kmp_affinity_dispatch->allocate_mask()); | |||||
3152 | KMP_CPU_ZERO(previousMask)(previousMask)->zero(); | |||||
3153 | int setSize = 0; | |||||
3154 | ||||||
3155 | for (;;) { | |||||
3156 | __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize); | |||||
3157 | ||||||
3158 | // valid follow sets are ',' ':' and EOL | |||||
3159 | SKIP_WS(scan){ while (*(scan) == ' ' || *(scan) == '\t') (scan)++; }; | |||||
3160 | if (*scan == '\0' || *scan == ',') { | |||||
3161 | if (setSize > 0) { | |||||
3162 | ADD_MASK(tempMask); | |||||
3163 | } | |||||
3164 | KMP_CPU_ZERO(tempMask)(tempMask)->zero(); | |||||
3165 | setSize = 0; | |||||
3166 | if (*scan == '\0') { | |||||
3167 | break; | |||||
3168 | } | |||||
3169 | scan++; // skip ',' | |||||
3170 | continue; | |||||
3171 | } | |||||
3172 | ||||||
3173 | KMP_ASSERT2(*scan == ':', "bad explicit places list")if (!(*scan == ':')) { __kmp_debug_assert(("bad explicit places list" ), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3173); }; | |||||
3174 | scan++; // skip ':' | |||||
3175 | ||||||
3176 | // Read count parameter | |||||
3177 | SKIP_WS(scan){ while (*(scan) == ' ' || *(scan) == '\t') (scan)++; }; | |||||
3178 | KMP_ASSERT2((*scan >= '0') && (*scan <= '9'), "bad explicit places list")if (!((*scan >= '0') && (*scan <= '9'))) { __kmp_debug_assert (("bad explicit places list"), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3178); }; | |||||
3179 | next = scan; | |||||
3180 | SKIP_DIGITS(next){ while (*(next) >= '0' && *(next) <= '9') (next )++; }; | |||||
3181 | count = __kmp_str_to_int(scan, *next); | |||||
3182 | KMP_ASSERT(count >= 0)if (!(count >= 0)) { __kmp_debug_assert("count >= 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3182); }; | |||||
3183 | scan = next; | |||||
3184 | ||||||
3185 | // valid follow sets are ',' ':' and EOL | |||||
3186 | SKIP_WS(scan){ while (*(scan) == ' ' || *(scan) == '\t') (scan)++; }; | |||||
3187 | if (*scan == '\0' || *scan == ',') { | |||||
3188 | stride = +1; | |||||
3189 | } else { | |||||
3190 | KMP_ASSERT2(*scan == ':', "bad explicit places list")if (!(*scan == ':')) { __kmp_debug_assert(("bad explicit places list" ), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3190); }; | |||||
3191 | scan++; // skip ':' | |||||
3192 | ||||||
3193 | // Read stride parameter | |||||
3194 | sign = +1; | |||||
3195 | for (;;) { | |||||
3196 | SKIP_WS(scan){ while (*(scan) == ' ' || *(scan) == '\t') (scan)++; }; | |||||
3197 | if (*scan == '+') { | |||||
3198 | scan++; // skip '+' | |||||
3199 | continue; | |||||
3200 | } | |||||
3201 | if (*scan == '-') { | |||||
3202 | sign *= -1; | |||||
3203 | scan++; // skip '-' | |||||
3204 | continue; | |||||
3205 | } | |||||
3206 | break; | |||||
3207 | } | |||||
3208 | SKIP_WS(scan){ while (*(scan) == ' ' || *(scan) == '\t') (scan)++; }; | |||||
3209 | KMP_ASSERT2((*scan >= '0') && (*scan <= '9'), "bad explicit places list")if (!((*scan >= '0') && (*scan <= '9'))) { __kmp_debug_assert (("bad explicit places list"), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3209); }; | |||||
3210 | next = scan; | |||||
3211 | SKIP_DIGITS(next){ while (*(next) >= '0' && *(next) <= '9') (next )++; }; | |||||
3212 | stride = __kmp_str_to_int(scan, *next); | |||||
3213 | KMP_DEBUG_ASSERT(stride >= 0)if (!(stride >= 0)) { __kmp_debug_assert("stride >= 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3213); }; | |||||
3214 | scan = next; | |||||
3215 | stride *= sign; | |||||
3216 | } | |||||
3217 | ||||||
3218 | // Add places determined by initial_place : count : stride | |||||
3219 | for (i = 0; i < count; i++) { | |||||
3220 | if (setSize == 0) { | |||||
3221 | break; | |||||
3222 | } | |||||
3223 | // Add the current place, then build the next place (tempMask) from that | |||||
3224 | KMP_CPU_COPY(previousMask, tempMask)(previousMask)->copy(tempMask); | |||||
3225 | ADD_MASK(previousMask); | |||||
3226 | KMP_CPU_ZERO(tempMask)(tempMask)->zero(); | |||||
3227 | setSize = 0; | |||||
3228 | KMP_CPU_SET_ITERATE(j, previousMask)for (j = (previousMask)->begin(); (int)j != (previousMask) ->end(); j = (previousMask)->next(j)) { | |||||
3229 | if (!KMP_CPU_ISSET(j, previousMask)(previousMask)->is_set(j)) { | |||||
3230 | continue; | |||||
3231 | } | |||||
3232 | if ((j + stride > maxOsId) || (j + stride < 0) || | |||||
3233 | (!KMP_CPU_ISSET(j, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(j)) || | |||||
3234 | (!KMP_CPU_ISSET(j + stride,(__kmp_affinity_dispatch->index_mask_array(osId2Mask, j + stride ))->is_set(j + stride) | |||||
3235 | KMP_CPU_INDEX(osId2Mask, j + stride))(__kmp_affinity_dispatch->index_mask_array(osId2Mask, j + stride ))->is_set(j + stride))) { | |||||
3236 | if ((__kmp_affinity_verbose || | |||||
3237 | (__kmp_affinity_warnings && | |||||
3238 | (__kmp_affinity_type != affinity_none))) && | |||||
3239 | i < count - 1) { | |||||
3240 | KMP_WARNING(AffIgnoreInvalidProcID, j + stride)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffIgnoreInvalidProcID , j + stride), __kmp_msg_null); | |||||
3241 | } | |||||
3242 | continue; | |||||
3243 | } | |||||
3244 | KMP_CPU_SET(j + stride, tempMask)(tempMask)->set(j + stride); | |||||
3245 | setSize++; | |||||
3246 | } | |||||
3247 | } | |||||
3248 | KMP_CPU_ZERO(tempMask)(tempMask)->zero(); | |||||
3249 | setSize = 0; | |||||
3250 | ||||||
3251 | // valid follow sets are ',' and EOL | |||||
3252 | SKIP_WS(scan){ while (*(scan) == ' ' || *(scan) == '\t') (scan)++; }; | |||||
3253 | if (*scan == '\0') { | |||||
3254 | break; | |||||
3255 | } | |||||
3256 | if (*scan == ',') { | |||||
3257 | scan++; // skip ',' | |||||
3258 | continue; | |||||
3259 | } | |||||
3260 | ||||||
3261 | KMP_ASSERT2(0, "bad explicit places list")if (!(0)) { __kmp_debug_assert(("bad explicit places list"), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3261); }; | |||||
3262 | } | |||||
3263 | ||||||
3264 | *out_numMasks = nextNewMask; | |||||
3265 | if (nextNewMask == 0) { | |||||
3266 | *out_masks = NULL__null; | |||||
3267 | KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks)__kmp_affinity_dispatch->deallocate_mask_array(newMasks); | |||||
3268 | return; | |||||
3269 | } | |||||
3270 | KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask)((*out_masks) = __kmp_affinity_dispatch->allocate_mask_array (nextNewMask)); | |||||
3271 | KMP_CPU_FREE(tempMask)__kmp_affinity_dispatch->deallocate_mask(tempMask); | |||||
3272 | KMP_CPU_FREE(previousMask)__kmp_affinity_dispatch->deallocate_mask(previousMask); | |||||
3273 | for (i = 0; i < nextNewMask; i++) { | |||||
3274 | kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i)__kmp_affinity_dispatch->index_mask_array(newMasks, i); | |||||
3275 | kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i)__kmp_affinity_dispatch->index_mask_array((*out_masks), i); | |||||
3276 | KMP_CPU_COPY(dest, src)(dest)->copy(src); | |||||
3277 | } | |||||
3278 | KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks)__kmp_affinity_dispatch->deallocate_mask_array(newMasks); | |||||
3279 | } | |||||
3280 | ||||||
3281 | #endif /* OMP_40_ENABLED */ | |||||
3282 | ||||||
3283 | #undef ADD_MASK | |||||
3284 | #undef ADD_MASK_OSID | |||||
3285 | ||||||
3286 | #if KMP_USE_HWLOC0 | |||||
3287 | static int __kmp_hwloc_skip_PUs_obj(hwloc_topology_t t, hwloc_obj_t o) { | |||||
3288 | // skip PUs descendants of the object o | |||||
3289 | int skipped = 0; | |||||
3290 | hwloc_obj_t hT = NULL__null; | |||||
3291 | int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT); | |||||
3292 | for (int i = 0; i < N; ++i) { | |||||
3293 | KMP_DEBUG_ASSERT(hT)if (!(hT)) { __kmp_debug_assert("hT", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3293); }; | |||||
3294 | unsigned idx = hT->os_index; | |||||
3295 | if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(idx)) { | |||||
3296 | KMP_CPU_CLR(idx, __kmp_affin_fullMask)(__kmp_affin_fullMask)->clear(idx); | |||||
3297 | KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx))if (kmp_c_debug >= 200) { __kmp_debug_printf ("KMP_HW_SUBSET: skipped proc %d\n" , idx); }; | |||||
3298 | ++skipped; | |||||
3299 | } | |||||
3300 | hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT); | |||||
3301 | } | |||||
3302 | return skipped; // count number of skipped units | |||||
3303 | } | |||||
3304 | ||||||
3305 | static int __kmp_hwloc_obj_has_PUs(hwloc_topology_t t, hwloc_obj_t o) { | |||||
3306 | // check if obj has PUs present in fullMask | |||||
3307 | hwloc_obj_t hT = NULL__null; | |||||
3308 | int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT); | |||||
3309 | for (int i = 0; i < N; ++i) { | |||||
3310 | KMP_DEBUG_ASSERT(hT)if (!(hT)) { __kmp_debug_assert("hT", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3310); }; | |||||
3311 | unsigned idx = hT->os_index; | |||||
3312 | if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(idx)) | |||||
3313 | return 1; // found PU | |||||
3314 | hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT); | |||||
3315 | } | |||||
3316 | return 0; // no PUs found | |||||
3317 | } | |||||
3318 | #endif // KMP_USE_HWLOC | |||||
3319 | ||||||
3320 | static void __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth) { | |||||
3321 | AddrUnsPair *newAddr; | |||||
3322 | if (__kmp_hws_requested == 0) | |||||
3323 | goto _exit; // no topology limiting actions requested, exit | |||||
3324 | #if KMP_USE_HWLOC0 | |||||
3325 | if (__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) { | |||||
3326 | // Number of subobjects calculated dynamically, this works fine for | |||||
3327 | // any non-uniform topology. | |||||
3328 | // L2 cache objects are determined by depth, other objects - by type. | |||||
3329 | hwloc_topology_t tp = __kmp_hwloc_topology; | |||||
3330 | int nS = 0, nN = 0, nL = 0, nC = 0, | |||||
3331 | nT = 0; // logical index including skipped | |||||
3332 | int nCr = 0, nTr = 0; // number of requested units | |||||
3333 | int nPkg = 0, nCo = 0, n_new = 0, n_old = 0, nCpP = 0, nTpC = 0; // counters | |||||
3334 | hwloc_obj_t hT, hC, hL, hN, hS; // hwloc objects (pointers to) | |||||
3335 | int L2depth, idx; | |||||
3336 | ||||||
3337 | // check support of extensions ---------------------------------- | |||||
3338 | int numa_support = 0, tile_support = 0; | |||||
3339 | if (__kmp_pu_os_idx) | |||||
3340 | hT = hwloc_get_pu_obj_by_os_index(tp, | |||||
3341 | __kmp_pu_os_idx[__kmp_avail_proc - 1]); | |||||
3342 | else | |||||
3343 | hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, __kmp_avail_proc - 1); | |||||
3344 | if (hT == NULL__null) { // something's gone wrong | |||||
3345 | KMP_WARNING(AffHWSubsetUnsupported)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetUnsupported ), __kmp_msg_null); | |||||
3346 | goto _exit; | |||||
3347 | } | |||||
3348 | // check NUMA node | |||||
3349 | hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT); | |||||
3350 | hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT); | |||||
3351 | if (hN != NULL__null && hN->depth > hS->depth) { | |||||
3352 | numa_support = 1; // 1 in case socket includes node(s) | |||||
3353 | } else if (__kmp_hws_node.num > 0) { | |||||
3354 | // don't support sockets inside NUMA node (no such HW found for testing) | |||||
3355 | KMP_WARNING(AffHWSubsetUnsupported)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetUnsupported ), __kmp_msg_null); | |||||
3356 | goto _exit; | |||||
3357 | } | |||||
3358 | // check L2 cahce, get object by depth because of multiple caches | |||||
3359 | L2depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED); | |||||
3360 | hL = hwloc_get_ancestor_obj_by_depth(tp, L2depth, hT); | |||||
3361 | if (hL != NULL__null && | |||||
3362 | __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC) > 1) { | |||||
3363 | tile_support = 1; // no sense to count L2 if it includes single core | |||||
3364 | } else if (__kmp_hws_tile.num > 0) { | |||||
3365 | if (__kmp_hws_core.num == 0) { | |||||
3366 | __kmp_hws_core = __kmp_hws_tile; // replace L2 with core | |||||
3367 | __kmp_hws_tile.num = 0; | |||||
3368 | } else { | |||||
3369 | // L2 and core are both requested, but represent same object | |||||
3370 | KMP_WARNING(AffHWSubsetInvalid)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetInvalid ), __kmp_msg_null); | |||||
3371 | goto _exit; | |||||
3372 | } | |||||
3373 | } | |||||
3374 | // end of check of extensions ----------------------------------- | |||||
3375 | ||||||
3376 | // fill in unset items, validate settings ----------------------- | |||||
3377 | if (__kmp_hws_socket.num == 0) | |||||
3378 | __kmp_hws_socket.num = nPackages; // use all available sockets | |||||
3379 | if (__kmp_hws_socket.offset >= nPackages) { | |||||
3380 | KMP_WARNING(AffHWSubsetManySockets)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetManySockets ), __kmp_msg_null); | |||||
3381 | goto _exit; | |||||
3382 | } | |||||
3383 | if (numa_support) { | |||||
3384 | hN = NULL__null; | |||||
3385 | int NN = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE, | |||||
3386 | &hN); // num nodes in socket | |||||
3387 | if (__kmp_hws_node.num == 0) | |||||
3388 | __kmp_hws_node.num = NN; // use all available nodes | |||||
3389 | if (__kmp_hws_node.offset >= NN) { | |||||
3390 | KMP_WARNING(AffHWSubsetManyNodes)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetManyNodes ), __kmp_msg_null); | |||||
3391 | goto _exit; | |||||
3392 | } | |||||
3393 | if (tile_support) { | |||||
3394 | // get num tiles in node | |||||
3395 | int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL); | |||||
3396 | if (__kmp_hws_tile.num == 0) { | |||||
3397 | __kmp_hws_tile.num = NL + 1; | |||||
3398 | } // use all available tiles, some node may have more tiles, thus +1 | |||||
3399 | if (__kmp_hws_tile.offset >= NL) { | |||||
3400 | KMP_WARNING(AffHWSubsetManyTiles)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetManyTiles ), __kmp_msg_null); | |||||
3401 | goto _exit; | |||||
3402 | } | |||||
3403 | int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, | |||||
3404 | &hC); // num cores in tile | |||||
3405 | if (__kmp_hws_core.num == 0) | |||||
3406 | __kmp_hws_core.num = NC; // use all available cores | |||||
3407 | if (__kmp_hws_core.offset >= NC) { | |||||
3408 | KMP_WARNING(AffHWSubsetManyCores)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetManyCores ), __kmp_msg_null); | |||||
3409 | goto _exit; | |||||
3410 | } | |||||
3411 | } else { // tile_support | |||||
3412 | int NC = __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE, | |||||
3413 | &hC); // num cores in node | |||||
3414 | if (__kmp_hws_core.num == 0) | |||||
3415 | __kmp_hws_core.num = NC; // use all available cores | |||||
3416 | if (__kmp_hws_core.offset >= NC) { | |||||
3417 | KMP_WARNING(AffHWSubsetManyCores)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetManyCores ), __kmp_msg_null); | |||||
3418 | goto _exit; | |||||
3419 | } | |||||
3420 | } // tile_support | |||||
3421 | } else { // numa_support | |||||
3422 | if (tile_support) { | |||||
3423 | // get num tiles in socket | |||||
3424 | int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL); | |||||
3425 | if (__kmp_hws_tile.num == 0) | |||||
3426 | __kmp_hws_tile.num = NL; // use all available tiles | |||||
3427 | if (__kmp_hws_tile.offset >= NL) { | |||||
3428 | KMP_WARNING(AffHWSubsetManyTiles)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetManyTiles ), __kmp_msg_null); | |||||
3429 | goto _exit; | |||||
3430 | } | |||||
3431 | int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, | |||||
3432 | &hC); // num cores in tile | |||||
3433 | if (__kmp_hws_core.num == 0) | |||||
3434 | __kmp_hws_core.num = NC; // use all available cores | |||||
3435 | if (__kmp_hws_core.offset >= NC) { | |||||
3436 | KMP_WARNING(AffHWSubsetManyCores)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetManyCores ), __kmp_msg_null); | |||||
3437 | goto _exit; | |||||
3438 | } | |||||
3439 | } else { // tile_support | |||||
3440 | int NC = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE, | |||||
3441 | &hC); // num cores in socket | |||||
3442 | if (__kmp_hws_core.num == 0) | |||||
3443 | __kmp_hws_core.num = NC; // use all available cores | |||||
3444 | if (__kmp_hws_core.offset >= NC) { | |||||
3445 | KMP_WARNING(AffHWSubsetManyCores)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetManyCores ), __kmp_msg_null); | |||||
3446 | goto _exit; | |||||
3447 | } | |||||
3448 | } // tile_support | |||||
3449 | } | |||||
3450 | if (__kmp_hws_proc.num == 0) | |||||
3451 | __kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all available procs | |||||
3452 | if (__kmp_hws_proc.offset >= __kmp_nThreadsPerCore) { | |||||
3453 | KMP_WARNING(AffHWSubsetManyProcs)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetManyProcs ), __kmp_msg_null); | |||||
3454 | goto _exit; | |||||
3455 | } | |||||
3456 | // end of validation -------------------------------------------- | |||||
3457 | ||||||
3458 | if (pAddr) // pAddr is NULL in case of affinity_none | |||||
3459 | newAddr = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) *___kmp_allocate((sizeof(AddrUnsPair) * __kmp_avail_proc), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3460) | |||||
3460 | __kmp_avail_proc)___kmp_allocate((sizeof(AddrUnsPair) * __kmp_avail_proc), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3460); // max size | |||||
3461 | // main loop to form HW subset ---------------------------------- | |||||
3462 | hS = NULL__null; | |||||
3463 | int NP = hwloc_get_nbobjs_by_type(tp, HWLOC_OBJ_PACKAGE); | |||||
3464 | for (int s = 0; s < NP; ++s) { | |||||
3465 | // Check Socket ----------------------------------------------- | |||||
3466 | hS = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hS); | |||||
3467 | if (!__kmp_hwloc_obj_has_PUs(tp, hS)) | |||||
3468 | continue; // skip socket if all PUs are out of fullMask | |||||
3469 | ++nS; // only count objects those have PUs in affinity mask | |||||
3470 | if (nS <= __kmp_hws_socket.offset || | |||||
3471 | nS > __kmp_hws_socket.num + __kmp_hws_socket.offset) { | |||||
3472 | n_old += __kmp_hwloc_skip_PUs_obj(tp, hS); // skip socket | |||||
3473 | continue; // move to next socket | |||||
3474 | } | |||||
3475 | nCr = 0; // count number of cores per socket | |||||
3476 | // socket requested, go down the topology tree | |||||
3477 | // check 4 cases: (+NUMA+Tile), (+NUMA-Tile), (-NUMA+Tile), (-NUMA-Tile) | |||||
3478 | if (numa_support) { | |||||
3479 | nN = 0; | |||||
3480 | hN = NULL__null; | |||||
3481 | // num nodes in current socket | |||||
3482 | int NN = | |||||
3483 | __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE, &hN); | |||||
3484 | for (int n = 0; n < NN; ++n) { | |||||
3485 | // Check NUMA Node ---------------------------------------- | |||||
3486 | if (!__kmp_hwloc_obj_has_PUs(tp, hN)) { | |||||
3487 | hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN); | |||||
3488 | continue; // skip node if all PUs are out of fullMask | |||||
3489 | } | |||||
3490 | ++nN; | |||||
3491 | if (nN <= __kmp_hws_node.offset || | |||||
3492 | nN > __kmp_hws_node.num + __kmp_hws_node.offset) { | |||||
3493 | // skip node as not requested | |||||
3494 | n_old += __kmp_hwloc_skip_PUs_obj(tp, hN); // skip node | |||||
3495 | hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN); | |||||
3496 | continue; // move to next node | |||||
3497 | } | |||||
3498 | // node requested, go down the topology tree | |||||
3499 | if (tile_support) { | |||||
3500 | nL = 0; | |||||
3501 | hL = NULL__null; | |||||
3502 | int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL); | |||||
3503 | for (int l = 0; l < NL; ++l) { | |||||
3504 | // Check L2 (tile) ------------------------------------ | |||||
3505 | if (!__kmp_hwloc_obj_has_PUs(tp, hL)) { | |||||
3506 | hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL); | |||||
3507 | continue; // skip tile if all PUs are out of fullMask | |||||
3508 | } | |||||
3509 | ++nL; | |||||
3510 | if (nL <= __kmp_hws_tile.offset || | |||||
3511 | nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) { | |||||
3512 | // skip tile as not requested | |||||
3513 | n_old += __kmp_hwloc_skip_PUs_obj(tp, hL); // skip tile | |||||
3514 | hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL); | |||||
3515 | continue; // move to next tile | |||||
3516 | } | |||||
3517 | // tile requested, go down the topology tree | |||||
3518 | nC = 0; | |||||
3519 | hC = NULL__null; | |||||
3520 | // num cores in current tile | |||||
3521 | int NC = __kmp_hwloc_count_children_by_type(tp, hL, | |||||
3522 | HWLOC_OBJ_CORE, &hC); | |||||
3523 | for (int c = 0; c < NC; ++c) { | |||||
3524 | // Check Core --------------------------------------- | |||||
3525 | if (!__kmp_hwloc_obj_has_PUs(tp, hC)) { | |||||
3526 | hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); | |||||
3527 | continue; // skip core if all PUs are out of fullMask | |||||
3528 | } | |||||
3529 | ++nC; | |||||
3530 | if (nC <= __kmp_hws_core.offset || | |||||
3531 | nC > __kmp_hws_core.num + __kmp_hws_core.offset) { | |||||
3532 | // skip node as not requested | |||||
3533 | n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core | |||||
3534 | hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); | |||||
3535 | continue; // move to next node | |||||
3536 | } | |||||
3537 | // core requested, go down to PUs | |||||
3538 | nT = 0; | |||||
3539 | nTr = 0; | |||||
3540 | hT = NULL__null; | |||||
3541 | // num procs in current core | |||||
3542 | int NT = __kmp_hwloc_count_children_by_type(tp, hC, | |||||
3543 | HWLOC_OBJ_PU, &hT); | |||||
3544 | for (int t = 0; t < NT; ++t) { | |||||
3545 | // Check PU --------------------------------------- | |||||
3546 | idx = hT->os_index; | |||||
3547 | if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(idx)) { | |||||
3548 | hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); | |||||
3549 | continue; // skip PU if not in fullMask | |||||
3550 | } | |||||
3551 | ++nT; | |||||
3552 | if (nT <= __kmp_hws_proc.offset || | |||||
3553 | nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) { | |||||
3554 | // skip PU | |||||
3555 | KMP_CPU_CLR(idx, __kmp_affin_fullMask)(__kmp_affin_fullMask)->clear(idx); | |||||
3556 | ++n_old; | |||||
3557 | KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx))if (kmp_c_debug >= 200) { __kmp_debug_printf ("KMP_HW_SUBSET: skipped proc %d\n" , idx); }; | |||||
3558 | hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); | |||||
3559 | continue; // move to next node | |||||
3560 | } | |||||
3561 | ++nTr; | |||||
3562 | if (pAddr) // collect requested thread's data | |||||
3563 | newAddr[n_new] = (*pAddr)[n_old]; | |||||
3564 | ++n_new; | |||||
3565 | ++n_old; | |||||
3566 | hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); | |||||
3567 | } // threads loop | |||||
3568 | if (nTr > 0) { | |||||
3569 | ++nCr; // num cores per socket | |||||
3570 | ++nCo; // total num cores | |||||
3571 | if (nTr > nTpC) | |||||
3572 | nTpC = nTr; // calc max threads per core | |||||
3573 | } | |||||
3574 | hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); | |||||
3575 | } // cores loop | |||||
3576 | hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL); | |||||
3577 | } // tiles loop | |||||
3578 | } else { // tile_support | |||||
3579 | // no tiles, check cores | |||||
3580 | nC = 0; | |||||
3581 | hC = NULL__null; | |||||
3582 | // num cores in current node | |||||
3583 | int NC = | |||||
3584 | __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE, &hC); | |||||
3585 | for (int c = 0; c < NC; ++c) { | |||||
3586 | // Check Core --------------------------------------- | |||||
3587 | if (!__kmp_hwloc_obj_has_PUs(tp, hC)) { | |||||
3588 | hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); | |||||
3589 | continue; // skip core if all PUs are out of fullMask | |||||
3590 | } | |||||
3591 | ++nC; | |||||
3592 | if (nC <= __kmp_hws_core.offset || | |||||
3593 | nC > __kmp_hws_core.num + __kmp_hws_core.offset) { | |||||
3594 | // skip node as not requested | |||||
3595 | n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core | |||||
3596 | hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); | |||||
3597 | continue; // move to next node | |||||
3598 | } | |||||
3599 | // core requested, go down to PUs | |||||
3600 | nT = 0; | |||||
3601 | nTr = 0; | |||||
3602 | hT = NULL__null; | |||||
3603 | int NT = | |||||
3604 | __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT); | |||||
3605 | for (int t = 0; t < NT; ++t) { | |||||
3606 | // Check PU --------------------------------------- | |||||
3607 | idx = hT->os_index; | |||||
3608 | if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(idx)) { | |||||
3609 | hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); | |||||
3610 | continue; // skip PU if not in fullMask | |||||
3611 | } | |||||
3612 | ++nT; | |||||
3613 | if (nT <= __kmp_hws_proc.offset || | |||||
3614 | nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) { | |||||
3615 | // skip PU | |||||
3616 | KMP_CPU_CLR(idx, __kmp_affin_fullMask)(__kmp_affin_fullMask)->clear(idx); | |||||
3617 | ++n_old; | |||||
3618 | KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx))if (kmp_c_debug >= 200) { __kmp_debug_printf ("KMP_HW_SUBSET: skipped proc %d\n" , idx); }; | |||||
3619 | hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); | |||||
3620 | continue; // move to next node | |||||
3621 | } | |||||
3622 | ++nTr; | |||||
3623 | if (pAddr) // collect requested thread's data | |||||
3624 | newAddr[n_new] = (*pAddr)[n_old]; | |||||
3625 | ++n_new; | |||||
3626 | ++n_old; | |||||
3627 | hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); | |||||
3628 | } // threads loop | |||||
3629 | if (nTr > 0) { | |||||
3630 | ++nCr; // num cores per socket | |||||
3631 | ++nCo; // total num cores | |||||
3632 | if (nTr > nTpC) | |||||
3633 | nTpC = nTr; // calc max threads per core | |||||
3634 | } | |||||
3635 | hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); | |||||
3636 | } // cores loop | |||||
3637 | } // tiles support | |||||
3638 | hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN); | |||||
3639 | } // nodes loop | |||||
3640 | } else { // numa_support | |||||
3641 | // no NUMA support | |||||
3642 | if (tile_support) { | |||||
3643 | nL = 0; | |||||
3644 | hL = NULL__null; | |||||
3645 | // num tiles in current socket | |||||
3646 | int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL); | |||||
3647 | for (int l = 0; l < NL; ++l) { | |||||
3648 | // Check L2 (tile) ------------------------------------ | |||||
3649 | if (!__kmp_hwloc_obj_has_PUs(tp, hL)) { | |||||
3650 | hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL); | |||||
3651 | continue; // skip tile if all PUs are out of fullMask | |||||
3652 | } | |||||
3653 | ++nL; | |||||
3654 | if (nL <= __kmp_hws_tile.offset || | |||||
3655 | nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) { | |||||
3656 | // skip tile as not requested | |||||
3657 | n_old += __kmp_hwloc_skip_PUs_obj(tp, hL); // skip tile | |||||
3658 | hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL); | |||||
3659 | continue; // move to next tile | |||||
3660 | } | |||||
3661 | // tile requested, go down the topology tree | |||||
3662 | nC = 0; | |||||
3663 | hC = NULL__null; | |||||
3664 | // num cores per tile | |||||
3665 | int NC = | |||||
3666 | __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC); | |||||
3667 | for (int c = 0; c < NC; ++c) { | |||||
3668 | // Check Core --------------------------------------- | |||||
3669 | if (!__kmp_hwloc_obj_has_PUs(tp, hC)) { | |||||
3670 | hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); | |||||
3671 | continue; // skip core if all PUs are out of fullMask | |||||
3672 | } | |||||
3673 | ++nC; | |||||
3674 | if (nC <= __kmp_hws_core.offset || | |||||
3675 | nC > __kmp_hws_core.num + __kmp_hws_core.offset) { | |||||
3676 | // skip node as not requested | |||||
3677 | n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core | |||||
3678 | hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); | |||||
3679 | continue; // move to next node | |||||
3680 | } | |||||
3681 | // core requested, go down to PUs | |||||
3682 | nT = 0; | |||||
3683 | nTr = 0; | |||||
3684 | hT = NULL__null; | |||||
3685 | // num procs per core | |||||
3686 | int NT = | |||||
3687 | __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT); | |||||
3688 | for (int t = 0; t < NT; ++t) { | |||||
3689 | // Check PU --------------------------------------- | |||||
3690 | idx = hT->os_index; | |||||
3691 | if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(idx)) { | |||||
3692 | hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); | |||||
3693 | continue; // skip PU if not in fullMask | |||||
3694 | } | |||||
3695 | ++nT; | |||||
3696 | if (nT <= __kmp_hws_proc.offset || | |||||
3697 | nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) { | |||||
3698 | // skip PU | |||||
3699 | KMP_CPU_CLR(idx, __kmp_affin_fullMask)(__kmp_affin_fullMask)->clear(idx); | |||||
3700 | ++n_old; | |||||
3701 | KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx))if (kmp_c_debug >= 200) { __kmp_debug_printf ("KMP_HW_SUBSET: skipped proc %d\n" , idx); }; | |||||
3702 | hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); | |||||
3703 | continue; // move to next node | |||||
3704 | } | |||||
3705 | ++nTr; | |||||
3706 | if (pAddr) // collect requested thread's data | |||||
3707 | newAddr[n_new] = (*pAddr)[n_old]; | |||||
3708 | ++n_new; | |||||
3709 | ++n_old; | |||||
3710 | hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); | |||||
3711 | } // threads loop | |||||
3712 | if (nTr > 0) { | |||||
3713 | ++nCr; // num cores per socket | |||||
3714 | ++nCo; // total num cores | |||||
3715 | if (nTr > nTpC) | |||||
3716 | nTpC = nTr; // calc max threads per core | |||||
3717 | } | |||||
3718 | hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); | |||||
3719 | } // cores loop | |||||
3720 | hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL); | |||||
3721 | } // tiles loop | |||||
3722 | } else { // tile_support | |||||
3723 | // no tiles, check cores | |||||
3724 | nC = 0; | |||||
3725 | hC = NULL__null; | |||||
3726 | // num cores in socket | |||||
3727 | int NC = | |||||
3728 | __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE, &hC); | |||||
3729 | for (int c = 0; c < NC; ++c) { | |||||
3730 | // Check Core ------------------------------------------- | |||||
3731 | if (!__kmp_hwloc_obj_has_PUs(tp, hC)) { | |||||
3732 | hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); | |||||
3733 | continue; // skip core if all PUs are out of fullMask | |||||
3734 | } | |||||
3735 | ++nC; | |||||
3736 | if (nC <= __kmp_hws_core.offset || | |||||
3737 | nC > __kmp_hws_core.num + __kmp_hws_core.offset) { | |||||
3738 | // skip node as not requested | |||||
3739 | n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core | |||||
3740 | hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); | |||||
3741 | continue; // move to next node | |||||
3742 | } | |||||
3743 | // core requested, go down to PUs | |||||
3744 | nT = 0; | |||||
3745 | nTr = 0; | |||||
3746 | hT = NULL__null; | |||||
3747 | // num procs per core | |||||
3748 | int NT = | |||||
3749 | __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT); | |||||
3750 | for (int t = 0; t < NT; ++t) { | |||||
3751 | // Check PU --------------------------------------- | |||||
3752 | idx = hT->os_index; | |||||
3753 | if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(idx)) { | |||||
3754 | hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); | |||||
3755 | continue; // skip PU if not in fullMask | |||||
3756 | } | |||||
3757 | ++nT; | |||||
3758 | if (nT <= __kmp_hws_proc.offset || | |||||
3759 | nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) { | |||||
3760 | // skip PU | |||||
3761 | KMP_CPU_CLR(idx, __kmp_affin_fullMask)(__kmp_affin_fullMask)->clear(idx); | |||||
3762 | ++n_old; | |||||
3763 | KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx))if (kmp_c_debug >= 200) { __kmp_debug_printf ("KMP_HW_SUBSET: skipped proc %d\n" , idx); }; | |||||
3764 | hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); | |||||
3765 | continue; // move to next node | |||||
3766 | } | |||||
3767 | ++nTr; | |||||
3768 | if (pAddr) // collect requested thread's data | |||||
3769 | newAddr[n_new] = (*pAddr)[n_old]; | |||||
3770 | ++n_new; | |||||
3771 | ++n_old; | |||||
3772 | hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); | |||||
3773 | } // threads loop | |||||
3774 | if (nTr > 0) { | |||||
3775 | ++nCr; // num cores per socket | |||||
3776 | ++nCo; // total num cores | |||||
3777 | if (nTr > nTpC) | |||||
3778 | nTpC = nTr; // calc max threads per core | |||||
3779 | } | |||||
3780 | hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); | |||||
3781 | } // cores loop | |||||
3782 | } // tiles support | |||||
3783 | } // numa_support | |||||
3784 | if (nCr > 0) { // found cores? | |||||
3785 | ++nPkg; // num sockets | |||||
3786 | if (nCr > nCpP) | |||||
3787 | nCpP = nCr; // calc max cores per socket | |||||
3788 | } | |||||
3789 | } // sockets loop | |||||
3790 | ||||||
3791 | // check the subset is valid | |||||
3792 | KMP_DEBUG_ASSERT(n_old == __kmp_avail_proc)if (!(n_old == __kmp_avail_proc)) { __kmp_debug_assert("n_old == __kmp_avail_proc" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3792); }; | |||||
3793 | KMP_DEBUG_ASSERT(nPkg > 0)if (!(nPkg > 0)) { __kmp_debug_assert("nPkg > 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3793); }; | |||||
3794 | KMP_DEBUG_ASSERT(nCpP > 0)if (!(nCpP > 0)) { __kmp_debug_assert("nCpP > 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3794); }; | |||||
3795 | KMP_DEBUG_ASSERT(nTpC > 0)if (!(nTpC > 0)) { __kmp_debug_assert("nTpC > 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3795); }; | |||||
3796 | KMP_DEBUG_ASSERT(nCo > 0)if (!(nCo > 0)) { __kmp_debug_assert("nCo > 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3796); }; | |||||
3797 | KMP_DEBUG_ASSERT(nPkg <= nPackages)if (!(nPkg <= nPackages)) { __kmp_debug_assert("nPkg <= nPackages" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3797); }; | |||||
3798 | KMP_DEBUG_ASSERT(nCpP <= nCoresPerPkg)if (!(nCpP <= nCoresPerPkg)) { __kmp_debug_assert("nCpP <= nCoresPerPkg" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3798); }; | |||||
3799 | KMP_DEBUG_ASSERT(nTpC <= __kmp_nThreadsPerCore)if (!(nTpC <= __kmp_nThreadsPerCore)) { __kmp_debug_assert ("nTpC <= __kmp_nThreadsPerCore", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3799); }; | |||||
3800 | KMP_DEBUG_ASSERT(nCo <= __kmp_ncores)if (!(nCo <= __kmp_ncores)) { __kmp_debug_assert("nCo <= __kmp_ncores" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3800); }; | |||||
3801 | ||||||
3802 | nPackages = nPkg; // correct num sockets | |||||
3803 | nCoresPerPkg = nCpP; // correct num cores per socket | |||||
3804 | __kmp_nThreadsPerCore = nTpC; // correct num threads per core | |||||
3805 | __kmp_avail_proc = n_new; // correct num procs | |||||
3806 | __kmp_ncores = nCo; // correct num cores | |||||
3807 | // hwloc topology method end | |||||
3808 | } else | |||||
3809 | #endif // KMP_USE_HWLOC | |||||
3810 | { | |||||
3811 | int n_old = 0, n_new = 0, proc_num = 0; | |||||
3812 | if (__kmp_hws_node.num > 0 || __kmp_hws_tile.num > 0) { | |||||
3813 | KMP_WARNING(AffHWSubsetNoHWLOC)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetNoHWLOC ), __kmp_msg_null); | |||||
3814 | goto _exit; | |||||
3815 | } | |||||
3816 | if (__kmp_hws_socket.num == 0) | |||||
3817 | __kmp_hws_socket.num = nPackages; // use all available sockets | |||||
3818 | if (__kmp_hws_core.num == 0) | |||||
3819 | __kmp_hws_core.num = nCoresPerPkg; // use all available cores | |||||
3820 | if (__kmp_hws_proc.num == 0 || __kmp_hws_proc.num > __kmp_nThreadsPerCore) | |||||
3821 | __kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all HW contexts | |||||
3822 | if (!__kmp_affinity_uniform_topology()) { | |||||
3823 | KMP_WARNING(AffHWSubsetNonUniform)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetNonUniform ), __kmp_msg_null); | |||||
3824 | goto _exit; // don't support non-uniform topology | |||||
3825 | } | |||||
3826 | if (depth > 3) { | |||||
3827 | KMP_WARNING(AffHWSubsetNonThreeLevel)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetNonThreeLevel ), __kmp_msg_null); | |||||
3828 | goto _exit; // don't support not-3-level topology | |||||
3829 | } | |||||
3830 | if (__kmp_hws_socket.offset + __kmp_hws_socket.num > nPackages) { | |||||
3831 | KMP_WARNING(AffHWSubsetManySockets)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetManySockets ), __kmp_msg_null); | |||||
3832 | goto _exit; | |||||
3833 | } | |||||
3834 | if (__kmp_hws_core.offset + __kmp_hws_core.num > nCoresPerPkg) { | |||||
3835 | KMP_WARNING(AffHWSubsetManyCores)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetManyCores ), __kmp_msg_null); | |||||
3836 | goto _exit; | |||||
3837 | } | |||||
3838 | // Form the requested subset | |||||
3839 | if (pAddr) // pAddr is NULL in case of affinity_none | |||||
3840 | newAddr = (AddrUnsPair *)__kmp_allocate(___kmp_allocate((sizeof(AddrUnsPair) * __kmp_hws_socket.num * __kmp_hws_core.num * __kmp_hws_proc.num), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3842) | |||||
3841 | sizeof(AddrUnsPair) * __kmp_hws_socket.num * __kmp_hws_core.num *___kmp_allocate((sizeof(AddrUnsPair) * __kmp_hws_socket.num * __kmp_hws_core.num * __kmp_hws_proc.num), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3842) | |||||
3842 | __kmp_hws_proc.num)___kmp_allocate((sizeof(AddrUnsPair) * __kmp_hws_socket.num * __kmp_hws_core.num * __kmp_hws_proc.num), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3842); | |||||
3843 | for (int i = 0; i < nPackages; ++i) { | |||||
3844 | if (i < __kmp_hws_socket.offset || | |||||
3845 | i >= __kmp_hws_socket.offset + __kmp_hws_socket.num) { | |||||
3846 | // skip not-requested socket | |||||
3847 | n_old += nCoresPerPkg * __kmp_nThreadsPerCore; | |||||
3848 | if (__kmp_pu_os_idx != NULL__null) { | |||||
3849 | // walk through skipped socket | |||||
3850 | for (int j = 0; j < nCoresPerPkg; ++j) { | |||||
3851 | for (int k = 0; k < __kmp_nThreadsPerCore; ++k) { | |||||
3852 | KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask)(__kmp_affin_fullMask)->clear(__kmp_pu_os_idx[proc_num]); | |||||
3853 | ++proc_num; | |||||
3854 | } | |||||
3855 | } | |||||
3856 | } | |||||
3857 | } else { | |||||
3858 | // walk through requested socket | |||||
3859 | for (int j = 0; j < nCoresPerPkg; ++j) { | |||||
3860 | if (j < __kmp_hws_core.offset || | |||||
3861 | j >= __kmp_hws_core.offset + | |||||
3862 | __kmp_hws_core.num) { // skip not-requested core | |||||
3863 | n_old += __kmp_nThreadsPerCore; | |||||
3864 | if (__kmp_pu_os_idx != NULL__null) { | |||||
3865 | for (int k = 0; k < __kmp_nThreadsPerCore; ++k) { | |||||
3866 | KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask)(__kmp_affin_fullMask)->clear(__kmp_pu_os_idx[proc_num]); | |||||
3867 | ++proc_num; | |||||
3868 | } | |||||
3869 | } | |||||
3870 | } else { | |||||
3871 | // walk through requested core | |||||
3872 | for (int k = 0; k < __kmp_nThreadsPerCore; ++k) { | |||||
3873 | if (k < __kmp_hws_proc.num) { | |||||
3874 | if (pAddr) // collect requested thread's data | |||||
3875 | newAddr[n_new] = (*pAddr)[n_old]; | |||||
3876 | n_new++; | |||||
3877 | } else { | |||||
3878 | if (__kmp_pu_os_idx != NULL__null) | |||||
3879 | KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask)(__kmp_affin_fullMask)->clear(__kmp_pu_os_idx[proc_num]); | |||||
3880 | } | |||||
3881 | n_old++; | |||||
3882 | ++proc_num; | |||||
3883 | } | |||||
3884 | } | |||||
3885 | } | |||||
3886 | } | |||||
3887 | } | |||||
3888 | KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore)if (!(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore )) { __kmp_debug_assert("n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3888); }; | |||||
3889 | KMP_DEBUG_ASSERT(n_new ==if (!(n_new == __kmp_hws_socket.num * __kmp_hws_core.num * __kmp_hws_proc .num)) { __kmp_debug_assert("n_new == __kmp_hws_socket.num * __kmp_hws_core.num * __kmp_hws_proc.num" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3891); } | |||||
3890 | __kmp_hws_socket.num * __kmp_hws_core.num *if (!(n_new == __kmp_hws_socket.num * __kmp_hws_core.num * __kmp_hws_proc .num)) { __kmp_debug_assert("n_new == __kmp_hws_socket.num * __kmp_hws_core.num * __kmp_hws_proc.num" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3891); } | |||||
3891 | __kmp_hws_proc.num)if (!(n_new == __kmp_hws_socket.num * __kmp_hws_core.num * __kmp_hws_proc .num)) { __kmp_debug_assert("n_new == __kmp_hws_socket.num * __kmp_hws_core.num * __kmp_hws_proc.num" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3891); }; | |||||
3892 | nPackages = __kmp_hws_socket.num; // correct nPackages | |||||
3893 | nCoresPerPkg = __kmp_hws_core.num; // correct nCoresPerPkg | |||||
3894 | __kmp_nThreadsPerCore = __kmp_hws_proc.num; // correct __kmp_nThreadsPerCore | |||||
3895 | __kmp_avail_proc = n_new; // correct avail_proc | |||||
3896 | __kmp_ncores = nPackages * __kmp_hws_core.num; // correct ncores | |||||
3897 | } // non-hwloc topology method | |||||
3898 | if (pAddr) { | |||||
3899 | __kmp_free(*pAddr)___kmp_free((*pAddr), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3899); | |||||
3900 | *pAddr = newAddr; // replace old topology with new one | |||||
3901 | } | |||||
3902 | if (__kmp_affinity_verbose) { | |||||
3903 | char m[KMP_AFFIN_MASK_PRINT_LEN1024]; | |||||
3904 | __kmp_affinity_print_mask(m, KMP_AFFIN_MASK_PRINT_LEN1024, | |||||
3905 | __kmp_affin_fullMask); | |||||
3906 | if (__kmp_affinity_respect_mask) { | |||||
3907 | KMP_INFORM(InitOSProcSetRespect, "KMP_HW_SUBSET", m)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetRespect , "KMP_HW_SUBSET", m), __kmp_msg_null); | |||||
3908 | } else { | |||||
3909 | KMP_INFORM(InitOSProcSetNotRespect, "KMP_HW_SUBSET", m)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetNotRespect , "KMP_HW_SUBSET", m), __kmp_msg_null); | |||||
3910 | } | |||||
3911 | KMP_INFORM(AvailableOSProc, "KMP_HW_SUBSET", __kmp_avail_proc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc , "KMP_HW_SUBSET", __kmp_avail_proc), __kmp_msg_null); | |||||
3912 | kmp_str_buf_t buf; | |||||
3913 | __kmp_str_buf_init(&buf){ (&buf)->str = (&buf)->bulk; (&buf)->size = sizeof((&buf)->bulk); (&buf)->used = 0; (& buf)->bulk[0] = 0; }; | |||||
3914 | __kmp_str_buf_print(&buf, "%d", nPackages); | |||||
3915 | KMP_INFORM(TopologyExtra, "KMP_HW_SUBSET", buf.str, nCoresPerPkg,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyExtra , "KMP_HW_SUBSET", buf.str, nCoresPerPkg, __kmp_nThreadsPerCore , __kmp_ncores), __kmp_msg_null) | |||||
3916 | __kmp_nThreadsPerCore, __kmp_ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyExtra , "KMP_HW_SUBSET", buf.str, nCoresPerPkg, __kmp_nThreadsPerCore , __kmp_ncores), __kmp_msg_null); | |||||
3917 | __kmp_str_buf_free(&buf); | |||||
3918 | } | |||||
3919 | _exit: | |||||
3920 | if (__kmp_pu_os_idx != NULL__null) { | |||||
3921 | __kmp_free(__kmp_pu_os_idx)___kmp_free((__kmp_pu_os_idx), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 3921); | |||||
3922 | __kmp_pu_os_idx = NULL__null; | |||||
3923 | } | |||||
3924 | } | |||||
3925 | ||||||
3926 | // This function figures out the deepest level at which there is at least one | |||||
3927 | // cluster/core with more than one processing unit bound to it. | |||||
3928 | static int __kmp_affinity_find_core_level(const AddrUnsPair *address2os, | |||||
3929 | int nprocs, int bottom_level) { | |||||
3930 | int core_level = 0; | |||||
3931 | ||||||
3932 | for (int i = 0; i < nprocs; i++) { | |||||
3933 | for (int j = bottom_level; j > 0; j--) { | |||||
3934 | if (address2os[i].first.labels[j] > 0) { | |||||
3935 | if (core_level < (j - 1)) { | |||||
3936 | core_level = j - 1; | |||||
3937 | } | |||||
3938 | } | |||||
3939 | } | |||||
3940 | } | |||||
3941 | return core_level; | |||||
3942 | } | |||||
3943 | ||||||
3944 | // This function counts number of clusters/cores at given level. | |||||
3945 | static int __kmp_affinity_compute_ncores(const AddrUnsPair *address2os, | |||||
3946 | int nprocs, int bottom_level, | |||||
3947 | int core_level) { | |||||
3948 | int ncores = 0; | |||||
3949 | int i, j; | |||||
3950 | ||||||
3951 | j = bottom_level; | |||||
3952 | for (i = 0; i < nprocs; i++) { | |||||
3953 | for (j = bottom_level; j > core_level; j--) { | |||||
3954 | if ((i + 1) < nprocs) { | |||||
3955 | if (address2os[i + 1].first.labels[j] > 0) { | |||||
3956 | break; | |||||
3957 | } | |||||
3958 | } | |||||
3959 | } | |||||
3960 | if (j == core_level) { | |||||
3961 | ncores++; | |||||
3962 | } | |||||
3963 | } | |||||
3964 | if (j > core_level) { | |||||
3965 | // In case of ( nprocs < __kmp_avail_proc ) we may end too deep and miss one | |||||
3966 | // core. May occur when called from __kmp_affinity_find_core(). | |||||
3967 | ncores++; | |||||
3968 | } | |||||
3969 | return ncores; | |||||
3970 | } | |||||
3971 | ||||||
3972 | // This function finds to which cluster/core given processing unit is bound. | |||||
3973 | static int __kmp_affinity_find_core(const AddrUnsPair *address2os, int proc, | |||||
3974 | int bottom_level, int core_level) { | |||||
3975 | return __kmp_affinity_compute_ncores(address2os, proc + 1, bottom_level, | |||||
3976 | core_level) - | |||||
3977 | 1; | |||||
3978 | } | |||||
3979 | ||||||
3980 | // This function finds maximal number of processing units bound to a | |||||
3981 | // cluster/core at given level. | |||||
3982 | static int __kmp_affinity_max_proc_per_core(const AddrUnsPair *address2os, | |||||
3983 | int nprocs, int bottom_level, | |||||
3984 | int core_level) { | |||||
3985 | int maxprocpercore = 0; | |||||
3986 | ||||||
3987 | if (core_level < bottom_level) { | |||||
3988 | for (int i = 0; i < nprocs; i++) { | |||||
3989 | int percore = address2os[i].first.labels[core_level + 1] + 1; | |||||
3990 | ||||||
3991 | if (percore > maxprocpercore) { | |||||
3992 | maxprocpercore = percore; | |||||
3993 | } | |||||
3994 | } | |||||
3995 | } else { | |||||
3996 | maxprocpercore = 1; | |||||
3997 | } | |||||
3998 | return maxprocpercore; | |||||
3999 | } | |||||
4000 | ||||||
4001 | static AddrUnsPair *address2os = NULL__null; | |||||
4002 | static int *procarr = NULL__null; | |||||
4003 | static int __kmp_aff_depth = 0; | |||||
4004 | ||||||
4005 | #if KMP_USE_HIER_SCHED0 | |||||
4006 | #define KMP_EXIT_AFF_NONE \ | |||||
4007 | KMP_ASSERT(__kmp_affinity_type == affinity_none)if (!(__kmp_affinity_type == affinity_none)) { __kmp_debug_assert ("__kmp_affinity_type == affinity_none", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4007); }; \ | |||||
4008 | KMP_ASSERT(address2os == NULL)if (!(address2os == __null)) { __kmp_debug_assert("address2os == NULL" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4008); }; \ | |||||
4009 | __kmp_apply_thread_places(NULL__null, 0); \ | |||||
4010 | __kmp_create_affinity_none_places(); \ | |||||
4011 | __kmp_dispatch_set_hierarchy_values(); \ | |||||
4012 | return; | |||||
4013 | #else | |||||
4014 | #define KMP_EXIT_AFF_NONE \ | |||||
4015 | KMP_ASSERT(__kmp_affinity_type == affinity_none)if (!(__kmp_affinity_type == affinity_none)) { __kmp_debug_assert ("__kmp_affinity_type == affinity_none", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4015); }; \ | |||||
4016 | KMP_ASSERT(address2os == NULL)if (!(address2os == __null)) { __kmp_debug_assert("address2os == NULL" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4016); }; \ | |||||
4017 | __kmp_apply_thread_places(NULL__null, 0); \ | |||||
4018 | __kmp_create_affinity_none_places(); \ | |||||
4019 | return; | |||||
4020 | #endif | |||||
4021 | ||||||
4022 | // Create a one element mask array (set of places) which only contains the | |||||
4023 | // initial process's affinity mask | |||||
4024 | static void __kmp_create_affinity_none_places() { | |||||
4025 | KMP_ASSERT(__kmp_affin_fullMask != NULL)if (!(__kmp_affin_fullMask != __null)) { __kmp_debug_assert("__kmp_affin_fullMask != NULL" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4025); }; | |||||
4026 | KMP_ASSERT(__kmp_affinity_type == affinity_none)if (!(__kmp_affinity_type == affinity_none)) { __kmp_debug_assert ("__kmp_affinity_type == affinity_none", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4026); }; | |||||
4027 | __kmp_affinity_num_masks = 1; | |||||
4028 | KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks)(__kmp_affinity_masks = __kmp_affinity_dispatch->allocate_mask_array (__kmp_affinity_num_masks)); | |||||
4029 | kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, 0)__kmp_affinity_dispatch->index_mask_array(__kmp_affinity_masks , 0); | |||||
4030 | KMP_CPU_COPY(dest, __kmp_affin_fullMask)(dest)->copy(__kmp_affin_fullMask); | |||||
4031 | } | |||||
4032 | ||||||
4033 | static int __kmp_affinity_cmp_Address_child_num(const void *a, const void *b) { | |||||
4034 | const Address *aa = &(((const AddrUnsPair *)a)->first); | |||||
4035 | const Address *bb = &(((const AddrUnsPair *)b)->first); | |||||
4036 | unsigned depth = aa->depth; | |||||
4037 | unsigned i; | |||||
4038 | KMP_DEBUG_ASSERT(depth == bb->depth)if (!(depth == bb->depth)) { __kmp_debug_assert("depth == bb->depth" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4038); }; | |||||
4039 | KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth)if (!((unsigned)__kmp_affinity_compact <= depth)) { __kmp_debug_assert ("(unsigned)__kmp_affinity_compact <= depth", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4039); }; | |||||
4040 | KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0)if (!(__kmp_affinity_compact >= 0)) { __kmp_debug_assert("__kmp_affinity_compact >= 0" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4040); }; | |||||
4041 | for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) { | |||||
4042 | int j = depth - i - 1; | |||||
4043 | if (aa->childNums[j] < bb->childNums[j]) | |||||
4044 | return -1; | |||||
4045 | if (aa->childNums[j] > bb->childNums[j]) | |||||
4046 | return 1; | |||||
4047 | } | |||||
4048 | for (; i < depth; i++) { | |||||
4049 | int j = i - __kmp_affinity_compact; | |||||
4050 | if (aa->childNums[j] < bb->childNums[j]) | |||||
4051 | return -1; | |||||
4052 | if (aa->childNums[j] > bb->childNums[j]) | |||||
4053 | return 1; | |||||
4054 | } | |||||
4055 | return 0; | |||||
4056 | } | |||||
4057 | ||||||
4058 | static void __kmp_aux_affinity_initialize(void) { | |||||
4059 | if (__kmp_affinity_masks != NULL__null) { | |||||
4060 | KMP_ASSERT(__kmp_affin_fullMask != NULL)if (!(__kmp_affin_fullMask != __null)) { __kmp_debug_assert("__kmp_affin_fullMask != NULL" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4060); }; | |||||
4061 | return; | |||||
4062 | } | |||||
4063 | ||||||
4064 | // Create the "full" mask - this defines all of the processors that we | |||||
4065 | // consider to be in the machine model. If respect is set, then it is the | |||||
4066 | // initialization thread's affinity mask. Otherwise, it is all processors that | |||||
4067 | // we know about on the machine. | |||||
4068 | if (__kmp_affin_fullMask == NULL__null) { | |||||
4069 | KMP_CPU_ALLOC(__kmp_affin_fullMask)(__kmp_affin_fullMask = __kmp_affinity_dispatch->allocate_mask ()); | |||||
4070 | } | |||||
4071 | if (KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) { | |||||
4072 | if (__kmp_affinity_respect_mask) { | |||||
4073 | __kmp_get_system_affinity(__kmp_affin_fullMask, TRUE)(__kmp_affin_fullMask)->get_system_affinity((!0)); | |||||
4074 | ||||||
4075 | // Count the number of available processors. | |||||
4076 | unsigned i; | |||||
4077 | __kmp_avail_proc = 0; | |||||
4078 | KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask)for (i = (__kmp_affin_fullMask)->begin(); (int)i != (__kmp_affin_fullMask )->end(); i = (__kmp_affin_fullMask)->next(i)) { | |||||
4079 | if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(i)) { | |||||
4080 | continue; | |||||
4081 | } | |||||
4082 | __kmp_avail_proc++; | |||||
4083 | } | |||||
4084 | if (__kmp_avail_proc > __kmp_xproc) { | |||||
4085 | if (__kmp_affinity_verbose || | |||||
4086 | (__kmp_affinity_warnings && | |||||
4087 | (__kmp_affinity_type != affinity_none))) { | |||||
4088 | KMP_WARNING(ErrorInitializeAffinity)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_ErrorInitializeAffinity ), __kmp_msg_null); | |||||
4089 | } | |||||
4090 | __kmp_affinity_type = affinity_none; | |||||
4091 | KMP_AFFINITY_DISABLE()(__kmp_affin_mask_size = 0); | |||||
4092 | return; | |||||
4093 | } | |||||
4094 | } else { | |||||
4095 | __kmp_affinity_entire_machine_mask(__kmp_affin_fullMask); | |||||
4096 | __kmp_avail_proc = __kmp_xproc; | |||||
4097 | } | |||||
4098 | } | |||||
4099 | ||||||
4100 | if (__kmp_affinity_gran == affinity_gran_tile && | |||||
4101 | // check if user's request is valid | |||||
4102 | __kmp_affinity_dispatch->get_api_type() == KMPAffinity::NATIVE_OS) { | |||||
4103 | KMP_WARNING(AffTilesNoHWLOC, "KMP_AFFINITY")__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffTilesNoHWLOC , "KMP_AFFINITY"), __kmp_msg_null); | |||||
4104 | __kmp_affinity_gran = affinity_gran_package; | |||||
4105 | } | |||||
4106 | ||||||
4107 | int depth = -1; | |||||
4108 | kmp_i18n_id_t msg_id = kmp_i18n_null; | |||||
4109 | ||||||
4110 | // For backward compatibility, setting KMP_CPUINFO_FILE => | |||||
4111 | // KMP_TOPOLOGY_METHOD=cpuinfo | |||||
4112 | if ((__kmp_cpuinfo_file != NULL__null) && | |||||
4113 | (__kmp_affinity_top_method == affinity_top_method_all)) { | |||||
4114 | __kmp_affinity_top_method = affinity_top_method_cpuinfo; | |||||
4115 | } | |||||
4116 | ||||||
4117 | if (__kmp_affinity_top_method == affinity_top_method_all) { | |||||
4118 | // In the default code path, errors are not fatal - we just try using | |||||
4119 | // another method. We only emit a warning message if affinity is on, or the | |||||
4120 | // verbose flag is set, an the nowarnings flag was not set. | |||||
4121 | const char *file_name = NULL__null; | |||||
4122 | int line = 0; | |||||
4123 | #if KMP_USE_HWLOC0 | |||||
4124 | if (depth < 0 && | |||||
4125 | __kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) { | |||||
4126 | if (__kmp_affinity_verbose) { | |||||
4127 | KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffUsingHwloc , "KMP_AFFINITY"), __kmp_msg_null); | |||||
4128 | } | |||||
4129 | if (!__kmp_hwloc_error) { | |||||
4130 | depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id); | |||||
4131 | if (depth == 0) { | |||||
4132 | KMP_EXIT_AFF_NONE; | |||||
4133 | } else if (depth < 0 && __kmp_affinity_verbose) { | |||||
4134 | KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffIgnoringHwloc , "KMP_AFFINITY"), __kmp_msg_null); | |||||
4135 | } | |||||
4136 | } else if (__kmp_affinity_verbose) { | |||||
4137 | KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffIgnoringHwloc , "KMP_AFFINITY"), __kmp_msg_null); | |||||
4138 | } | |||||
4139 | } | |||||
4140 | #endif | |||||
4141 | ||||||
4142 | #if KMP_ARCH_X860 || KMP_ARCH_X86_641 | |||||
4143 | ||||||
4144 | if (depth < 0) { | |||||
4145 | if (__kmp_affinity_verbose) { | |||||
4146 | KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC))__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffInfoStr , "KMP_AFFINITY", __kmp_i18n_catgets(kmp_i18n_str_Decodingx2APIC )), __kmp_msg_null); | |||||
4147 | } | |||||
4148 | ||||||
4149 | file_name = NULL__null; | |||||
4150 | depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id); | |||||
4151 | if (depth == 0) { | |||||
4152 | KMP_EXIT_AFF_NONE; | |||||
4153 | } | |||||
4154 | ||||||
4155 | if (depth < 0) { | |||||
4156 | if (__kmp_affinity_verbose) { | |||||
4157 | if (msg_id != kmp_i18n_null) { | |||||
4158 | KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY",__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffInfoStrStr , "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), __kmp_i18n_catgets (kmp_i18n_str_DecodingLegacyAPIC)), __kmp_msg_null) | |||||
4159 | __kmp_i18n_catgets(msg_id),__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffInfoStrStr , "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), __kmp_i18n_catgets (kmp_i18n_str_DecodingLegacyAPIC)), __kmp_msg_null) | |||||
4160 | KMP_I18N_STR(DecodingLegacyAPIC))__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffInfoStrStr , "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), __kmp_i18n_catgets (kmp_i18n_str_DecodingLegacyAPIC)), __kmp_msg_null); | |||||
4161 | } else { | |||||
4162 | KMP_INFORM(AffInfoStr, "KMP_AFFINITY",__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffInfoStr , "KMP_AFFINITY", __kmp_i18n_catgets(kmp_i18n_str_DecodingLegacyAPIC )), __kmp_msg_null) | |||||
4163 | KMP_I18N_STR(DecodingLegacyAPIC))__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffInfoStr , "KMP_AFFINITY", __kmp_i18n_catgets(kmp_i18n_str_DecodingLegacyAPIC )), __kmp_msg_null); | |||||
4164 | } | |||||
4165 | } | |||||
4166 | ||||||
4167 | file_name = NULL__null; | |||||
4168 | depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id); | |||||
4169 | if (depth == 0) { | |||||
4170 | KMP_EXIT_AFF_NONE; | |||||
4171 | } | |||||
4172 | } | |||||
4173 | } | |||||
4174 | ||||||
4175 | #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ | |||||
4176 | ||||||
4177 | #if KMP_OS_LINUX1 | |||||
4178 | ||||||
4179 | if (depth < 0) { | |||||
4180 | if (__kmp_affinity_verbose) { | |||||
4181 | if (msg_id != kmp_i18n_null) { | |||||
4182 | KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY",__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffStrParseFilename , "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo" ), __kmp_msg_null) | |||||
4183 | __kmp_i18n_catgets(msg_id), "/proc/cpuinfo")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffStrParseFilename , "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo" ), __kmp_msg_null); | |||||
4184 | } else { | |||||
4185 | KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffParseFilename , "KMP_AFFINITY", "/proc/cpuinfo"), __kmp_msg_null); | |||||
4186 | } | |||||
4187 | } | |||||
4188 | ||||||
4189 | FILE *f = fopen("/proc/cpuinfo", "r"); | |||||
4190 | if (f == NULL__null) { | |||||
4191 | msg_id = kmp_i18n_str_CantOpenCpuinfo; | |||||
4192 | } else { | |||||
4193 | file_name = "/proc/cpuinfo"; | |||||
4194 | depth = | |||||
4195 | __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f); | |||||
4196 | fclose(f); | |||||
4197 | if (depth == 0) { | |||||
4198 | KMP_EXIT_AFF_NONE; | |||||
4199 | } | |||||
4200 | } | |||||
4201 | } | |||||
4202 | ||||||
4203 | #endif /* KMP_OS_LINUX */ | |||||
4204 | ||||||
4205 | #if KMP_GROUP_AFFINITY0 | |||||
4206 | ||||||
4207 | if ((depth < 0) && (__kmp_num_proc_groups > 1)) { | |||||
4208 | if (__kmp_affinity_verbose) { | |||||
4209 | KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffWindowsProcGroupMap , "KMP_AFFINITY"), __kmp_msg_null); | |||||
4210 | } | |||||
4211 | ||||||
4212 | depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id); | |||||
4213 | KMP_ASSERT(depth != 0)if (!(depth != 0)) { __kmp_debug_assert("depth != 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4213); }; | |||||
4214 | } | |||||
4215 | ||||||
4216 | #endif /* KMP_GROUP_AFFINITY */ | |||||
4217 | ||||||
4218 | if (depth < 0) { | |||||
4219 | if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) { | |||||
4220 | if (file_name == NULL__null) { | |||||
4221 | KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id))__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_UsingFlatOS , __kmp_i18n_catgets(msg_id)), __kmp_msg_null); | |||||
4222 | } else if (line == 0) { | |||||
4223 | KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id))__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_UsingFlatOSFile , file_name, __kmp_i18n_catgets(msg_id)), __kmp_msg_null); | |||||
4224 | } else { | |||||
4225 | KMP_INFORM(UsingFlatOSFileLine, file_name, line,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_UsingFlatOSFileLine , file_name, line, __kmp_i18n_catgets(msg_id)), __kmp_msg_null ) | |||||
4226 | __kmp_i18n_catgets(msg_id))__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_UsingFlatOSFileLine , file_name, line, __kmp_i18n_catgets(msg_id)), __kmp_msg_null ); | |||||
4227 | } | |||||
4228 | } | |||||
4229 | // FIXME - print msg if msg_id = kmp_i18n_null ??? | |||||
4230 | ||||||
4231 | file_name = ""; | |||||
4232 | depth = __kmp_affinity_create_flat_map(&address2os, &msg_id); | |||||
4233 | if (depth == 0) { | |||||
4234 | KMP_EXIT_AFF_NONE; | |||||
4235 | } | |||||
4236 | KMP_ASSERT(depth > 0)if (!(depth > 0)) { __kmp_debug_assert("depth > 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4236); }; | |||||
4237 | KMP_ASSERT(address2os != NULL)if (!(address2os != __null)) { __kmp_debug_assert("address2os != NULL" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4237); }; | |||||
4238 | } | |||||
4239 | } | |||||
4240 | ||||||
4241 | #if KMP_USE_HWLOC0 | |||||
4242 | else if (__kmp_affinity_top_method == affinity_top_method_hwloc) { | |||||
4243 | KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC)if (!(__kmp_affinity_dispatch->get_api_type() == KMPAffinity ::HWLOC)) { __kmp_debug_assert("__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4243); }; | |||||
4244 | if (__kmp_affinity_verbose) { | |||||
4245 | KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffUsingHwloc , "KMP_AFFINITY"), __kmp_msg_null); | |||||
4246 | } | |||||
4247 | depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id); | |||||
4248 | if (depth == 0) { | |||||
4249 | KMP_EXIT_AFF_NONE; | |||||
4250 | } | |||||
4251 | } | |||||
4252 | #endif // KMP_USE_HWLOC | |||||
4253 | ||||||
4254 | // If the user has specified that a paricular topology discovery method is to be | |||||
4255 | // used, then we abort if that method fails. The exception is group affinity, | |||||
4256 | // which might have been implicitly set. | |||||
4257 | ||||||
4258 | #if KMP_ARCH_X860 || KMP_ARCH_X86_641 | |||||
4259 | ||||||
4260 | else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) { | |||||
4261 | if (__kmp_affinity_verbose) { | |||||
4262 | KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC))__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffInfoStr , "KMP_AFFINITY", __kmp_i18n_catgets(kmp_i18n_str_Decodingx2APIC )), __kmp_msg_null); | |||||
4263 | } | |||||
4264 | ||||||
4265 | depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id); | |||||
4266 | if (depth == 0) { | |||||
4267 | KMP_EXIT_AFF_NONE; | |||||
4268 | } | |||||
4269 | if (depth < 0) { | |||||
4270 | KMP_ASSERT(msg_id != kmp_i18n_null)if (!(msg_id != kmp_i18n_null)) { __kmp_debug_assert("msg_id != kmp_i18n_null" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4270); }; | |||||
4271 | KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id))__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_MsgExiting, __kmp_i18n_catgets (msg_id)), __kmp_msg_null); | |||||
4272 | } | |||||
4273 | } else if (__kmp_affinity_top_method == affinity_top_method_apicid) { | |||||
4274 | if (__kmp_affinity_verbose) { | |||||
4275 | KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC))__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffInfoStr , "KMP_AFFINITY", __kmp_i18n_catgets(kmp_i18n_str_DecodingLegacyAPIC )), __kmp_msg_null); | |||||
4276 | } | |||||
4277 | ||||||
4278 | depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id); | |||||
4279 | if (depth == 0) { | |||||
4280 | KMP_EXIT_AFF_NONE; | |||||
4281 | } | |||||
4282 | if (depth < 0) { | |||||
4283 | KMP_ASSERT(msg_id != kmp_i18n_null)if (!(msg_id != kmp_i18n_null)) { __kmp_debug_assert("msg_id != kmp_i18n_null" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4283); }; | |||||
4284 | KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id))__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_MsgExiting, __kmp_i18n_catgets (msg_id)), __kmp_msg_null); | |||||
4285 | } | |||||
4286 | } | |||||
4287 | ||||||
4288 | #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ | |||||
4289 | ||||||
4290 | else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) { | |||||
4291 | const char *filename; | |||||
4292 | if (__kmp_cpuinfo_file != NULL__null) { | |||||
4293 | filename = __kmp_cpuinfo_file; | |||||
4294 | } else { | |||||
4295 | filename = "/proc/cpuinfo"; | |||||
4296 | } | |||||
4297 | ||||||
4298 | if (__kmp_affinity_verbose) { | |||||
4299 | KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffParseFilename , "KMP_AFFINITY", filename), __kmp_msg_null); | |||||
4300 | } | |||||
4301 | ||||||
4302 | FILE *f = fopen(filename, "r"); | |||||
4303 | if (f == NULL__null) { | |||||
4304 | int code = errno(*__errno_location ()); | |||||
4305 | if (__kmp_cpuinfo_file != NULL__null) { | |||||
4306 | __kmp_fatal(KMP_MSG(CantOpenFileForReading, filename)__kmp_msg_format(kmp_i18n_msg_CantOpenFileForReading, filename ), KMP_ERR(code)__kmp_msg_error_code(code), | |||||
4307 | KMP_HNT(NameComesFrom_CPUINFO_FILE)__kmp_msg_format(kmp_i18n_hnt_NameComesFrom_CPUINFO_FILE), __kmp_msg_null); | |||||
4308 | } else { | |||||
4309 | __kmp_fatal(KMP_MSG(CantOpenFileForReading, filename)__kmp_msg_format(kmp_i18n_msg_CantOpenFileForReading, filename ), KMP_ERR(code)__kmp_msg_error_code(code), | |||||
4310 | __kmp_msg_null); | |||||
4311 | } | |||||
4312 | } | |||||
4313 | int line = 0; | |||||
4314 | depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f); | |||||
4315 | fclose(f); | |||||
4316 | if (depth < 0) { | |||||
4317 | KMP_ASSERT(msg_id != kmp_i18n_null)if (!(msg_id != kmp_i18n_null)) { __kmp_debug_assert("msg_id != kmp_i18n_null" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4317); }; | |||||
4318 | if (line > 0) { | |||||
4319 | KMP_FATAL(FileLineMsgExiting, filename, line,__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id)), __kmp_msg_null) | |||||
4320 | __kmp_i18n_catgets(msg_id))__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id)), __kmp_msg_null); | |||||
4321 | } else { | |||||
4322 | KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id))__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_FileMsgExiting, filename , __kmp_i18n_catgets(msg_id)), __kmp_msg_null); | |||||
4323 | } | |||||
4324 | } | |||||
4325 | if (__kmp_affinity_type == affinity_none) { | |||||
4326 | KMP_ASSERT(depth == 0)if (!(depth == 0)) { __kmp_debug_assert("depth == 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4326); }; | |||||
4327 | KMP_EXIT_AFF_NONE; | |||||
4328 | } | |||||
4329 | } | |||||
4330 | ||||||
4331 | #if KMP_GROUP_AFFINITY0 | |||||
4332 | ||||||
4333 | else if (__kmp_affinity_top_method == affinity_top_method_group) { | |||||
4334 | if (__kmp_affinity_verbose) { | |||||
4335 | KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffWindowsProcGroupMap , "KMP_AFFINITY"), __kmp_msg_null); | |||||
4336 | } | |||||
4337 | ||||||
4338 | depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id); | |||||
4339 | KMP_ASSERT(depth != 0)if (!(depth != 0)) { __kmp_debug_assert("depth != 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4339); }; | |||||
4340 | if (depth < 0) { | |||||
4341 | KMP_ASSERT(msg_id != kmp_i18n_null)if (!(msg_id != kmp_i18n_null)) { __kmp_debug_assert("msg_id != kmp_i18n_null" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4341); }; | |||||
4342 | KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id))__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_MsgExiting, __kmp_i18n_catgets (msg_id)), __kmp_msg_null); | |||||
4343 | } | |||||
4344 | } | |||||
4345 | ||||||
4346 | #endif /* KMP_GROUP_AFFINITY */ | |||||
4347 | ||||||
4348 | else if (__kmp_affinity_top_method == affinity_top_method_flat) { | |||||
4349 | if (__kmp_affinity_verbose) { | |||||
4350 | KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffUsingFlatOS , "KMP_AFFINITY"), __kmp_msg_null); | |||||
4351 | } | |||||
4352 | ||||||
4353 | depth = __kmp_affinity_create_flat_map(&address2os, &msg_id); | |||||
4354 | if (depth == 0) { | |||||
4355 | KMP_EXIT_AFF_NONE; | |||||
4356 | } | |||||
4357 | // should not fail | |||||
4358 | KMP_ASSERT(depth > 0)if (!(depth > 0)) { __kmp_debug_assert("depth > 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4358); }; | |||||
4359 | KMP_ASSERT(address2os != NULL)if (!(address2os != __null)) { __kmp_debug_assert("address2os != NULL" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4359); }; | |||||
4360 | } | |||||
4361 | ||||||
4362 | #if KMP_USE_HIER_SCHED0 | |||||
4363 | __kmp_dispatch_set_hierarchy_values(); | |||||
4364 | #endif | |||||
4365 | ||||||
4366 | if (address2os == NULL__null) { | |||||
4367 | if (KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0) && | |||||
4368 | (__kmp_affinity_verbose || | |||||
4369 | (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none)))) { | |||||
4370 | KMP_WARNING(ErrorInitializeAffinity)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_ErrorInitializeAffinity ), __kmp_msg_null); | |||||
4371 | } | |||||
4372 | __kmp_affinity_type = affinity_none; | |||||
4373 | __kmp_create_affinity_none_places(); | |||||
4374 | KMP_AFFINITY_DISABLE()(__kmp_affin_mask_size = 0); | |||||
4375 | return; | |||||
4376 | } | |||||
4377 | ||||||
4378 | if (__kmp_affinity_gran == affinity_gran_tile | |||||
4379 | #if KMP_USE_HWLOC0 | |||||
4380 | && __kmp_tile_depth == 0 | |||||
4381 | #endif | |||||
4382 | ) { | |||||
4383 | // tiles requested but not detected, warn user on this | |||||
4384 | KMP_WARNING(AffTilesNoTiles, "KMP_AFFINITY")__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffTilesNoTiles , "KMP_AFFINITY"), __kmp_msg_null); | |||||
4385 | } | |||||
4386 | ||||||
4387 | __kmp_apply_thread_places(&address2os, depth); | |||||
4388 | ||||||
4389 | // Create the table of masks, indexed by thread Id. | |||||
4390 | unsigned maxIndex; | |||||
4391 | unsigned numUnique; | |||||
4392 | kmp_affin_mask_t *osId2Mask = | |||||
4393 | __kmp_create_masks(&maxIndex, &numUnique, address2os, __kmp_avail_proc); | |||||
4394 | if (__kmp_affinity_gran_levels == 0) { | |||||
4395 | KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc)if (!((int)numUnique == __kmp_avail_proc)) { __kmp_debug_assert ("(int)numUnique == __kmp_avail_proc", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4395); }; | |||||
4396 | } | |||||
4397 | ||||||
4398 | // Set the childNums vector in all Address objects. This must be done before | |||||
4399 | // we can sort using __kmp_affinity_cmp_Address_child_num(), which takes into | |||||
4400 | // account the setting of __kmp_affinity_compact. | |||||
4401 | __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc); | |||||
4402 | ||||||
4403 | switch (__kmp_affinity_type) { | |||||
4404 | ||||||
4405 | case affinity_explicit: | |||||
4406 | KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL)if (!(__kmp_affinity_proclist != __null)) { __kmp_debug_assert ("__kmp_affinity_proclist != __null", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4406); }; | |||||
4407 | #if OMP_40_ENABLED(50 >= 40) | |||||
4408 | if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) | |||||
4409 | #endif | |||||
4410 | { | |||||
4411 | __kmp_affinity_process_proclist( | |||||
4412 | &__kmp_affinity_masks, &__kmp_affinity_num_masks, | |||||
4413 | __kmp_affinity_proclist, osId2Mask, maxIndex); | |||||
4414 | } | |||||
4415 | #if OMP_40_ENABLED(50 >= 40) | |||||
4416 | else { | |||||
4417 | __kmp_affinity_process_placelist( | |||||
4418 | &__kmp_affinity_masks, &__kmp_affinity_num_masks, | |||||
4419 | __kmp_affinity_proclist, osId2Mask, maxIndex); | |||||
4420 | } | |||||
4421 | #endif | |||||
4422 | if (__kmp_affinity_num_masks == 0) { | |||||
4423 | if (__kmp_affinity_verbose || | |||||
4424 | (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) { | |||||
4425 | KMP_WARNING(AffNoValidProcID)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffNoValidProcID ), __kmp_msg_null); | |||||
4426 | } | |||||
4427 | __kmp_affinity_type = affinity_none; | |||||
4428 | return; | |||||
4429 | } | |||||
4430 | break; | |||||
4431 | ||||||
4432 | // The other affinity types rely on sorting the Addresses according to some | |||||
4433 | // permutation of the machine topology tree. Set __kmp_affinity_compact and | |||||
4434 | // __kmp_affinity_offset appropriately, then jump to a common code fragment | |||||
4435 | // to do the sort and create the array of affinity masks. | |||||
4436 | ||||||
4437 | case affinity_logical: | |||||
4438 | __kmp_affinity_compact = 0; | |||||
4439 | if (__kmp_affinity_offset) { | |||||
4440 | __kmp_affinity_offset = | |||||
4441 | __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc; | |||||
4442 | } | |||||
4443 | goto sortAddresses; | |||||
4444 | ||||||
4445 | case affinity_physical: | |||||
4446 | if (__kmp_nThreadsPerCore > 1) { | |||||
4447 | __kmp_affinity_compact = 1; | |||||
4448 | if (__kmp_affinity_compact >= depth) { | |||||
4449 | __kmp_affinity_compact = 0; | |||||
4450 | } | |||||
4451 | } else { | |||||
4452 | __kmp_affinity_compact = 0; | |||||
4453 | } | |||||
4454 | if (__kmp_affinity_offset) { | |||||
4455 | __kmp_affinity_offset = | |||||
4456 | __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc; | |||||
4457 | } | |||||
4458 | goto sortAddresses; | |||||
4459 | ||||||
4460 | case affinity_scatter: | |||||
4461 | if (__kmp_affinity_compact >= depth) { | |||||
4462 | __kmp_affinity_compact = 0; | |||||
4463 | } else { | |||||
4464 | __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact; | |||||
4465 | } | |||||
4466 | goto sortAddresses; | |||||
4467 | ||||||
4468 | case affinity_compact: | |||||
4469 | if (__kmp_affinity_compact >= depth) { | |||||
4470 | __kmp_affinity_compact = depth - 1; | |||||
4471 | } | |||||
4472 | goto sortAddresses; | |||||
4473 | ||||||
4474 | case affinity_balanced: | |||||
4475 | if (depth <= 1) { | |||||
4476 | if (__kmp_affinity_verbose || __kmp_affinity_warnings) { | |||||
4477 | KMP_WARNING(AffBalancedNotAvail, "KMP_AFFINITY")__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffBalancedNotAvail , "KMP_AFFINITY"), __kmp_msg_null); | |||||
4478 | } | |||||
4479 | __kmp_affinity_type = affinity_none; | |||||
4480 | return; | |||||
4481 | } else if (__kmp_affinity_uniform_topology()) { | |||||
4482 | break; | |||||
4483 | } else { // Non-uniform topology | |||||
4484 | ||||||
4485 | // Save the depth for further usage | |||||
4486 | __kmp_aff_depth = depth; | |||||
4487 | ||||||
4488 | int core_level = __kmp_affinity_find_core_level( | |||||
4489 | address2os, __kmp_avail_proc, depth - 1); | |||||
4490 | int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc, | |||||
4491 | depth - 1, core_level); | |||||
4492 | int maxprocpercore = __kmp_affinity_max_proc_per_core( | |||||
4493 | address2os, __kmp_avail_proc, depth - 1, core_level); | |||||
4494 | ||||||
4495 | int nproc = ncores * maxprocpercore; | |||||
4496 | if ((nproc < 2) || (nproc < __kmp_avail_proc)) { | |||||
4497 | if (__kmp_affinity_verbose || __kmp_affinity_warnings) { | |||||
4498 | KMP_WARNING(AffBalancedNotAvail, "KMP_AFFINITY")__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffBalancedNotAvail , "KMP_AFFINITY"), __kmp_msg_null); | |||||
4499 | } | |||||
4500 | __kmp_affinity_type = affinity_none; | |||||
4501 | return; | |||||
4502 | } | |||||
4503 | ||||||
4504 | procarr = (int *)__kmp_allocate(sizeof(int) * nproc)___kmp_allocate((sizeof(int) * nproc), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4504); | |||||
4505 | for (int i = 0; i < nproc; i++) { | |||||
4506 | procarr[i] = -1; | |||||
4507 | } | |||||
4508 | ||||||
4509 | int lastcore = -1; | |||||
4510 | int inlastcore = 0; | |||||
4511 | for (int i = 0; i < __kmp_avail_proc; i++) { | |||||
4512 | int proc = address2os[i].second; | |||||
4513 | int core = | |||||
4514 | __kmp_affinity_find_core(address2os, i, depth - 1, core_level); | |||||
4515 | ||||||
4516 | if (core == lastcore) { | |||||
4517 | inlastcore++; | |||||
4518 | } else { | |||||
4519 | inlastcore = 0; | |||||
4520 | } | |||||
4521 | lastcore = core; | |||||
4522 | ||||||
4523 | procarr[core * maxprocpercore + inlastcore] = proc; | |||||
4524 | } | |||||
4525 | ||||||
4526 | break; | |||||
4527 | } | |||||
4528 | ||||||
4529 | sortAddresses: | |||||
4530 | // Allocate the gtid->affinity mask table. | |||||
4531 | if (__kmp_affinity_dups) { | |||||
4532 | __kmp_affinity_num_masks = __kmp_avail_proc; | |||||
4533 | } else { | |||||
4534 | __kmp_affinity_num_masks = numUnique; | |||||
4535 | } | |||||
4536 | ||||||
4537 | #if OMP_40_ENABLED(50 >= 40) | |||||
4538 | if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) && | |||||
4539 | (__kmp_affinity_num_places > 0) && | |||||
4540 | ((unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks)) { | |||||
4541 | __kmp_affinity_num_masks = __kmp_affinity_num_places; | |||||
4542 | } | |||||
4543 | #endif | |||||
4544 | ||||||
4545 | KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks)(__kmp_affinity_masks = __kmp_affinity_dispatch->allocate_mask_array (__kmp_affinity_num_masks)); | |||||
4546 | ||||||
4547 | // Sort the address2os table according to the current setting of | |||||
4548 | // __kmp_affinity_compact, then fill out __kmp_affinity_masks. | |||||
4549 | qsort(address2os, __kmp_avail_proc, sizeof(*address2os), | |||||
4550 | __kmp_affinity_cmp_Address_child_num); | |||||
4551 | { | |||||
4552 | int i; | |||||
4553 | unsigned j; | |||||
4554 | for (i = 0, j = 0; i < __kmp_avail_proc; i++) { | |||||
4555 | if ((!__kmp_affinity_dups) && (!address2os[i].first.leader)) { | |||||
4556 | continue; | |||||
4557 | } | |||||
4558 | unsigned osId = address2os[i].second; | |||||
4559 | kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId)__kmp_affinity_dispatch->index_mask_array(osId2Mask, osId); | |||||
4560 | kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, j)__kmp_affinity_dispatch->index_mask_array(__kmp_affinity_masks , j); | |||||
4561 | KMP_ASSERT(KMP_CPU_ISSET(osId, src))if (!((src)->is_set(osId))) { __kmp_debug_assert("KMP_CPU_ISSET(osId, src)" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4561); }; | |||||
4562 | KMP_CPU_COPY(dest, src)(dest)->copy(src); | |||||
4563 | if (++j >= __kmp_affinity_num_masks) { | |||||
4564 | break; | |||||
4565 | } | |||||
4566 | } | |||||
4567 | KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks)if (!(j == __kmp_affinity_num_masks)) { __kmp_debug_assert("j == __kmp_affinity_num_masks" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4567); }; | |||||
4568 | } | |||||
4569 | break; | |||||
4570 | ||||||
4571 | default: | |||||
4572 | KMP_ASSERT2(0, "Unexpected affinity setting")if (!(0)) { __kmp_debug_assert(("Unexpected affinity setting" ), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4572); }; | |||||
4573 | } | |||||
4574 | ||||||
4575 | KMP_CPU_FREE_ARRAY(osId2Mask, maxIndex + 1)__kmp_affinity_dispatch->deallocate_mask_array(osId2Mask); | |||||
4576 | machine_hierarchy.init(address2os, __kmp_avail_proc); | |||||
4577 | } | |||||
4578 | #undef KMP_EXIT_AFF_NONE | |||||
4579 | ||||||
4580 | void __kmp_affinity_initialize(void) { | |||||
4581 | // Much of the code above was written assumming that if a machine was not | |||||
4582 | // affinity capable, then __kmp_affinity_type == affinity_none. We now | |||||
4583 | // explicitly represent this as __kmp_affinity_type == affinity_disabled. | |||||
4584 | // There are too many checks for __kmp_affinity_type == affinity_none | |||||
4585 | // in this code. Instead of trying to change them all, check if | |||||
4586 | // __kmp_affinity_type == affinity_disabled, and if so, slam it with | |||||
4587 | // affinity_none, call the real initialization routine, then restore | |||||
4588 | // __kmp_affinity_type to affinity_disabled. | |||||
4589 | int disabled = (__kmp_affinity_type == affinity_disabled); | |||||
4590 | if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) { | |||||
4591 | KMP_ASSERT(disabled)if (!(disabled)) { __kmp_debug_assert("disabled", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4591); }; | |||||
4592 | } | |||||
4593 | if (disabled) { | |||||
4594 | __kmp_affinity_type = affinity_none; | |||||
4595 | } | |||||
4596 | __kmp_aux_affinity_initialize(); | |||||
4597 | if (disabled) { | |||||
4598 | __kmp_affinity_type = affinity_disabled; | |||||
4599 | } | |||||
4600 | } | |||||
4601 | ||||||
4602 | void __kmp_affinity_uninitialize(void) { | |||||
4603 | if (__kmp_affinity_masks != NULL__null) { | |||||
4604 | KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks)__kmp_affinity_dispatch->deallocate_mask_array(__kmp_affinity_masks ); | |||||
4605 | __kmp_affinity_masks = NULL__null; | |||||
4606 | } | |||||
4607 | if (__kmp_affin_fullMask != NULL__null) { | |||||
4608 | KMP_CPU_FREE(__kmp_affin_fullMask)__kmp_affinity_dispatch->deallocate_mask(__kmp_affin_fullMask ); | |||||
4609 | __kmp_affin_fullMask = NULL__null; | |||||
4610 | } | |||||
4611 | __kmp_affinity_num_masks = 0; | |||||
4612 | __kmp_affinity_type = affinity_default; | |||||
4613 | #if OMP_40_ENABLED(50 >= 40) | |||||
4614 | __kmp_affinity_num_places = 0; | |||||
4615 | #endif | |||||
4616 | if (__kmp_affinity_proclist != NULL__null) { | |||||
4617 | __kmp_free(__kmp_affinity_proclist)___kmp_free((__kmp_affinity_proclist), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4617); | |||||
4618 | __kmp_affinity_proclist = NULL__null; | |||||
4619 | } | |||||
4620 | if (address2os != NULL__null) { | |||||
4621 | __kmp_free(address2os)___kmp_free((address2os), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4621); | |||||
4622 | address2os = NULL__null; | |||||
4623 | } | |||||
4624 | if (procarr != NULL__null) { | |||||
4625 | __kmp_free(procarr)___kmp_free((procarr), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4625); | |||||
4626 | procarr = NULL__null; | |||||
4627 | } | |||||
4628 | #if KMP_USE_HWLOC0 | |||||
4629 | if (__kmp_hwloc_topology != NULL__null) { | |||||
4630 | hwloc_topology_destroy(__kmp_hwloc_topology); | |||||
4631 | __kmp_hwloc_topology = NULL__null; | |||||
4632 | } | |||||
4633 | #endif | |||||
4634 | KMPAffinity::destroy_api(); | |||||
4635 | } | |||||
4636 | ||||||
4637 | void __kmp_affinity_set_init_mask(int gtid, int isa_root) { | |||||
4638 | if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) { | |||||
4639 | return; | |||||
4640 | } | |||||
4641 | ||||||
4642 | kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid])((void *)(__kmp_threads[gtid])); | |||||
4643 | if (th->th.th_affin_mask == NULL__null) { | |||||
4644 | KMP_CPU_ALLOC(th->th.th_affin_mask)(th->th.th_affin_mask = __kmp_affinity_dispatch->allocate_mask ()); | |||||
4645 | } else { | |||||
4646 | KMP_CPU_ZERO(th->th.th_affin_mask)(th->th.th_affin_mask)->zero(); | |||||
4647 | } | |||||
4648 | ||||||
4649 | // Copy the thread mask to the kmp_info_t strucuture. If | |||||
4650 | // __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one that | |||||
4651 | // has all of the OS proc ids set, or if __kmp_affinity_respect_mask is set, | |||||
4652 | // then the full mask is the same as the mask of the initialization thread. | |||||
4653 | kmp_affin_mask_t *mask; | |||||
4654 | int i; | |||||
4655 | ||||||
4656 | #if OMP_40_ENABLED(50 >= 40) | |||||
4657 | if (KMP_AFFINITY_NON_PROC_BIND((__kmp_nested_proc_bind.bind_types[0] == proc_bind_false || __kmp_nested_proc_bind .bind_types[0] == proc_bind_intel) && (__kmp_affinity_num_masks > 0 || __kmp_affinity_type == affinity_balanced))) | |||||
4658 | #endif | |||||
4659 | { | |||||
4660 | if ((__kmp_affinity_type == affinity_none) || | |||||
4661 | (__kmp_affinity_type == affinity_balanced)) { | |||||
4662 | #if KMP_GROUP_AFFINITY0 | |||||
4663 | if (__kmp_num_proc_groups > 1) { | |||||
4664 | return; | |||||
4665 | } | |||||
4666 | #endif | |||||
4667 | KMP_ASSERT(__kmp_affin_fullMask != NULL)if (!(__kmp_affin_fullMask != __null)) { __kmp_debug_assert("__kmp_affin_fullMask != NULL" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4667); }; | |||||
4668 | i = 0; | |||||
4669 | mask = __kmp_affin_fullMask; | |||||
4670 | } else { | |||||
4671 | KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0)if (!(__kmp_affinity_num_masks > 0)) { __kmp_debug_assert( "__kmp_affinity_num_masks > 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4671); }; | |||||
4672 | i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks; | |||||
4673 | mask = KMP_CPU_INDEX(__kmp_affinity_masks, i)__kmp_affinity_dispatch->index_mask_array(__kmp_affinity_masks , i); | |||||
4674 | } | |||||
4675 | } | |||||
4676 | #if OMP_40_ENABLED(50 >= 40) | |||||
4677 | else { | |||||
4678 | if ((!isa_root) || | |||||
4679 | (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) { | |||||
4680 | #if KMP_GROUP_AFFINITY0 | |||||
4681 | if (__kmp_num_proc_groups > 1) { | |||||
4682 | return; | |||||
4683 | } | |||||
4684 | #endif | |||||
4685 | KMP_ASSERT(__kmp_affin_fullMask != NULL)if (!(__kmp_affin_fullMask != __null)) { __kmp_debug_assert("__kmp_affin_fullMask != NULL" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4685); }; | |||||
4686 | i = KMP_PLACE_ALL(-1); | |||||
4687 | mask = __kmp_affin_fullMask; | |||||
4688 | } else { | |||||
4689 | // int i = some hash function or just a counter that doesn't | |||||
4690 | // always start at 0. Use gtid for now. | |||||
4691 | KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0)if (!(__kmp_affinity_num_masks > 0)) { __kmp_debug_assert( "__kmp_affinity_num_masks > 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4691); }; | |||||
4692 | i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks; | |||||
4693 | mask = KMP_CPU_INDEX(__kmp_affinity_masks, i)__kmp_affinity_dispatch->index_mask_array(__kmp_affinity_masks , i); | |||||
4694 | } | |||||
4695 | } | |||||
4696 | #endif | |||||
4697 | ||||||
4698 | #if OMP_40_ENABLED(50 >= 40) | |||||
4699 | th->th.th_current_place = i; | |||||
4700 | if (isa_root) { | |||||
4701 | th->th.th_new_place = i; | |||||
4702 | th->th.th_first_place = 0; | |||||
4703 | th->th.th_last_place = __kmp_affinity_num_masks - 1; | |||||
4704 | } | |||||
4705 | ||||||
4706 | if (i == KMP_PLACE_ALL(-1)) { | |||||
4707 | KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_init_mask: binding T#%d to all places\n" , gtid); } | |||||
4708 | gtid))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_init_mask: binding T#%d to all places\n" , gtid); }; | |||||
4709 | } else { | |||||
4710 | KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n" , gtid, i); } | |||||
4711 | gtid, i))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n" , gtid, i); }; | |||||
4712 | } | |||||
4713 | #else | |||||
4714 | if (i == -1) { | |||||
4715 | KA_TRACE(if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n" , gtid); } | |||||
4716 | 100,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n" , gtid); } | |||||
4717 | ("__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n" , gtid); } | |||||
4718 | gtid))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n" , gtid); }; | |||||
4719 | } else { | |||||
4720 | KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n" , gtid, i); } | |||||
4721 | gtid, i))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n" , gtid, i); }; | |||||
4722 | } | |||||
4723 | #endif /* OMP_40_ENABLED */ | |||||
4724 | ||||||
4725 | KMP_CPU_COPY(th->th.th_affin_mask, mask)(th->th.th_affin_mask)->copy(mask); | |||||
4726 | ||||||
4727 | if (__kmp_affinity_verbose | |||||
4728 | /* to avoid duplicate printing (will be correctly printed on barrier) */ | |||||
4729 | && (__kmp_affinity_type == affinity_none || | |||||
4730 | (i != KMP_PLACE_ALL(-1) && __kmp_affinity_type != affinity_balanced))) { | |||||
4731 | char buf[KMP_AFFIN_MASK_PRINT_LEN1024]; | |||||
4732 | __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN1024, | |||||
4733 | th->th.th_affin_mask); | |||||
4734 | KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_BoundToOSProcSet , "KMP_AFFINITY", (kmp_int32)getpid(), syscall(186), gtid, buf ), __kmp_msg_null) | |||||
4735 | __kmp_gettid(), gtid, buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_BoundToOSProcSet , "KMP_AFFINITY", (kmp_int32)getpid(), syscall(186), gtid, buf ), __kmp_msg_null); | |||||
4736 | } | |||||
4737 | ||||||
4738 | #if KMP_OS_WINDOWS0 | |||||
4739 | // On Windows* OS, the process affinity mask might have changed. If the user | |||||
4740 | // didn't request affinity and this call fails, just continue silently. | |||||
4741 | // See CQ171393. | |||||
4742 | if (__kmp_affinity_type == affinity_none) { | |||||
4743 | __kmp_set_system_affinity(th->th.th_affin_mask, FALSE)(th->th.th_affin_mask)->set_system_affinity(0); | |||||
4744 | } else | |||||
4745 | #endif | |||||
4746 | __kmp_set_system_affinity(th->th.th_affin_mask, TRUE)(th->th.th_affin_mask)->set_system_affinity((!0)); | |||||
4747 | } | |||||
4748 | ||||||
4749 | #if OMP_40_ENABLED(50 >= 40) | |||||
4750 | ||||||
4751 | void __kmp_affinity_set_place(int gtid) { | |||||
4752 | if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) { | |||||
4753 | return; | |||||
4754 | } | |||||
4755 | ||||||
4756 | kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid])((void *)(__kmp_threads[gtid])); | |||||
4757 | ||||||
4758 | KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_place: binding T#%d to place %d (current " "place = %d)\n", gtid, th->th.th_new_place, th->th.th_current_place ); } | |||||
4759 | "place = %d)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_place: binding T#%d to place %d (current " "place = %d)\n", gtid, th->th.th_new_place, th->th.th_current_place ); } | |||||
4760 | gtid, th->th.th_new_place, th->th.th_current_place))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_place: binding T#%d to place %d (current " "place = %d)\n", gtid, th->th.th_new_place, th->th.th_current_place ); }; | |||||
4761 | ||||||
4762 | // Check that the new place is within this thread's partition. | |||||
4763 | KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL)if (!(th->th.th_affin_mask != __null)) { __kmp_debug_assert ("th->th.th_affin_mask != __null", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4763); }; | |||||
4764 | KMP_ASSERT(th->th.th_new_place >= 0)if (!(th->th.th_new_place >= 0)) { __kmp_debug_assert("th->th.th_new_place >= 0" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4764); }; | |||||
4765 | KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks)if (!((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks )) { __kmp_debug_assert("(unsigned)th->th.th_new_place <= __kmp_affinity_num_masks" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4765); }; | |||||
4766 | if (th->th.th_first_place <= th->th.th_last_place) { | |||||
4767 | KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) &&if (!((th->th.th_new_place >= th->th.th_first_place) && (th->th.th_new_place <= th->th.th_last_place ))) { __kmp_debug_assert("(th->th.th_new_place >= th->th.th_first_place) && (th->th.th_new_place <= th->th.th_last_place)" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4768); } | |||||
4768 | (th->th.th_new_place <= th->th.th_last_place))if (!((th->th.th_new_place >= th->th.th_first_place) && (th->th.th_new_place <= th->th.th_last_place ))) { __kmp_debug_assert("(th->th.th_new_place >= th->th.th_first_place) && (th->th.th_new_place <= th->th.th_last_place)" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4768); }; | |||||
4769 | } else { | |||||
4770 | KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place) ||if (!((th->th.th_new_place <= th->th.th_first_place) || (th->th.th_new_place >= th->th.th_last_place))) { __kmp_debug_assert("(th->th.th_new_place <= th->th.th_first_place) || (th->th.th_new_place >= th->th.th_last_place)" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4771); } | |||||
4771 | (th->th.th_new_place >= th->th.th_last_place))if (!((th->th.th_new_place <= th->th.th_first_place) || (th->th.th_new_place >= th->th.th_last_place))) { __kmp_debug_assert("(th->th.th_new_place <= th->th.th_first_place) || (th->th.th_new_place >= th->th.th_last_place)" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4771); }; | |||||
4772 | } | |||||
4773 | ||||||
4774 | // Copy the thread mask to the kmp_info_t strucuture, | |||||
4775 | // and set this thread's affinity. | |||||
4776 | kmp_affin_mask_t *mask = | |||||
4777 | KMP_CPU_INDEX(__kmp_affinity_masks, th->th.th_new_place)__kmp_affinity_dispatch->index_mask_array(__kmp_affinity_masks , th->th.th_new_place); | |||||
4778 | KMP_CPU_COPY(th->th.th_affin_mask, mask)(th->th.th_affin_mask)->copy(mask); | |||||
4779 | th->th.th_current_place = th->th.th_new_place; | |||||
4780 | ||||||
4781 | if (__kmp_affinity_verbose) { | |||||
4782 | char buf[KMP_AFFIN_MASK_PRINT_LEN1024]; | |||||
4783 | __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN1024, | |||||
4784 | th->th.th_affin_mask); | |||||
4785 | KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_BoundToOSProcSet , "OMP_PROC_BIND", (kmp_int32)getpid(), syscall(186), gtid, buf ), __kmp_msg_null) | |||||
4786 | __kmp_gettid(), gtid, buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_BoundToOSProcSet , "OMP_PROC_BIND", (kmp_int32)getpid(), syscall(186), gtid, buf ), __kmp_msg_null); | |||||
4787 | } | |||||
4788 | __kmp_set_system_affinity(th->th.th_affin_mask, TRUE)(th->th.th_affin_mask)->set_system_affinity((!0)); | |||||
4789 | } | |||||
4790 | ||||||
4791 | #endif /* OMP_40_ENABLED */ | |||||
4792 | ||||||
4793 | int __kmp_aux_set_affinity(void **mask) { | |||||
4794 | int gtid; | |||||
4795 | kmp_info_t *th; | |||||
4796 | int retval; | |||||
4797 | ||||||
4798 | if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) { | |||||
4799 | return -1; | |||||
4800 | } | |||||
4801 | ||||||
4802 | gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg(); | |||||
4803 | KA_TRACE(1000, ; {if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf [1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_set_affinity: setting affinity mask for thread %d = %s\n" , gtid, buf); }; } | |||||
4804 | char buf[KMP_AFFIN_MASK_PRINT_LEN];if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf [1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_set_affinity: setting affinity mask for thread %d = %s\n" , gtid, buf); }; } | |||||
4805 | __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf [1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_set_affinity: setting affinity mask for thread %d = %s\n" , gtid, buf); }; } | |||||
4806 | (kmp_affin_mask_t *)(*mask));if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf [1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_set_affinity: setting affinity mask for thread %d = %s\n" , gtid, buf); }; } | |||||
4807 | __kmp_debug_printf(if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf [1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_set_affinity: setting affinity mask for thread %d = %s\n" , gtid, buf); }; } | |||||
4808 | "kmp_set_affinity: setting affinity mask for thread %d = %s\n", gtid,if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf [1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_set_affinity: setting affinity mask for thread %d = %s\n" , gtid, buf); }; } | |||||
4809 | buf);if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf [1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_set_affinity: setting affinity mask for thread %d = %s\n" , gtid, buf); }; } | |||||
4810 | })if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf [1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_set_affinity: setting affinity mask for thread %d = %s\n" , gtid, buf); }; }; | |||||
4811 | ||||||
4812 | if (__kmp_env_consistency_check) { | |||||
4813 | if ((mask == NULL__null) || (*mask == NULL__null)) { | |||||
4814 | KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_AffinityInvalidMask , "kmp_set_affinity"), __kmp_msg_null); | |||||
4815 | } else { | |||||
4816 | unsigned proc; | |||||
4817 | int num_procs = 0; | |||||
4818 | ||||||
4819 | KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t *)(*mask)))for (proc = (((kmp_affin_mask_t *)(*mask)))->begin(); (int )proc != (((kmp_affin_mask_t *)(*mask)))->end(); proc = (( (kmp_affin_mask_t *)(*mask)))->next(proc)) { | |||||
4820 | if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(proc)) { | |||||
4821 | KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_AffinityInvalidMask , "kmp_set_affinity"), __kmp_msg_null); | |||||
4822 | } | |||||
4823 | if (!KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))((kmp_affin_mask_t *)(*mask))->is_set(proc)) { | |||||
4824 | continue; | |||||
4825 | } | |||||
4826 | num_procs++; | |||||
4827 | } | |||||
4828 | if (num_procs == 0) { | |||||
4829 | KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_AffinityInvalidMask , "kmp_set_affinity"), __kmp_msg_null); | |||||
4830 | } | |||||
4831 | ||||||
4832 | #if KMP_GROUP_AFFINITY0 | |||||
4833 | if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask))((kmp_affin_mask_t *)(*mask))->get_proc_group() < 0) { | |||||
4834 | KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_AffinityInvalidMask , "kmp_set_affinity"), __kmp_msg_null); | |||||
4835 | } | |||||
4836 | #endif /* KMP_GROUP_AFFINITY */ | |||||
4837 | } | |||||
4838 | } | |||||
4839 | ||||||
4840 | th = __kmp_threads[gtid]; | |||||
4841 | KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL)if (!(th->th.th_affin_mask != __null)) { __kmp_debug_assert ("th->th.th_affin_mask != __null", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4841); }; | |||||
4842 | retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE)((kmp_affin_mask_t *)(*mask))->set_system_affinity(0); | |||||
4843 | if (retval == 0) { | |||||
4844 | KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask))(th->th.th_affin_mask)->copy((kmp_affin_mask_t *)(*mask )); | |||||
4845 | } | |||||
4846 | ||||||
4847 | #if OMP_40_ENABLED(50 >= 40) | |||||
4848 | th->th.th_current_place = KMP_PLACE_UNDEFINED(-2); | |||||
4849 | th->th.th_new_place = KMP_PLACE_UNDEFINED(-2); | |||||
4850 | th->th.th_first_place = 0; | |||||
4851 | th->th.th_last_place = __kmp_affinity_num_masks - 1; | |||||
4852 | ||||||
4853 | // Turn off 4.0 affinity for the current tread at this parallel level. | |||||
4854 | th->th.th_current_task->td_icvs.proc_bind = proc_bind_false; | |||||
4855 | #endif | |||||
4856 | ||||||
4857 | return retval; | |||||
4858 | } | |||||
4859 | ||||||
4860 | int __kmp_aux_get_affinity(void **mask) { | |||||
4861 | int gtid; | |||||
4862 | int retval; | |||||
4863 | kmp_info_t *th; | |||||
4864 | ||||||
4865 | if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) { | |||||
4866 | return -1; | |||||
4867 | } | |||||
4868 | ||||||
4869 | gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg(); | |||||
4870 | th = __kmp_threads[gtid]; | |||||
4871 | KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL)if (!(th->th.th_affin_mask != __null)) { __kmp_debug_assert ("th->th.th_affin_mask != __null", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 4871); }; | |||||
4872 | ||||||
4873 | KA_TRACE(1000, ; {if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf [1024]; __kmp_affinity_print_mask(buf, 1024, th->th.th_affin_mask ); __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n" , gtid, buf); }; } | |||||
4874 | char buf[KMP_AFFIN_MASK_PRINT_LEN];if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf [1024]; __kmp_affinity_print_mask(buf, 1024, th->th.th_affin_mask ); __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n" , gtid, buf); }; } | |||||
4875 | __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf [1024]; __kmp_affinity_print_mask(buf, 1024, th->th.th_affin_mask ); __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n" , gtid, buf); }; } | |||||
4876 | th->th.th_affin_mask);if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf [1024]; __kmp_affinity_print_mask(buf, 1024, th->th.th_affin_mask ); __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n" , gtid, buf); }; } | |||||
4877 | __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n",if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf [1024]; __kmp_affinity_print_mask(buf, 1024, th->th.th_affin_mask ); __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n" , gtid, buf); }; } | |||||
4878 | gtid, buf);if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf [1024]; __kmp_affinity_print_mask(buf, 1024, th->th.th_affin_mask ); __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n" , gtid, buf); }; } | |||||
4879 | })if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf [1024]; __kmp_affinity_print_mask(buf, 1024, th->th.th_affin_mask ); __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n" , gtid, buf); }; }; | |||||
4880 | ||||||
4881 | if (__kmp_env_consistency_check) { | |||||
4882 | if ((mask == NULL__null) || (*mask == NULL__null)) { | |||||
4883 | KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_AffinityInvalidMask , "kmp_get_affinity"), __kmp_msg_null); | |||||
4884 | } | |||||
4885 | } | |||||
4886 | ||||||
4887 | #if !KMP_OS_WINDOWS0 | |||||
4888 | ||||||
4889 | retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE)((kmp_affin_mask_t *)(*mask))->get_system_affinity(0); | |||||
4890 | KA_TRACE(1000, ; {if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf [1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n" , gtid, buf); }; } | |||||
4891 | char buf[KMP_AFFIN_MASK_PRINT_LEN];if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf [1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n" , gtid, buf); }; } | |||||
4892 | __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf [1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n" , gtid, buf); }; } | |||||
4893 | (kmp_affin_mask_t *)(*mask));if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf [1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n" , gtid, buf); }; } | |||||
4894 | __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n",if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf [1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n" , gtid, buf); }; } | |||||
4895 | gtid, buf);if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf [1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n" , gtid, buf); }; } | |||||
4896 | })if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf [1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n" , gtid, buf); }; }; | |||||
4897 | return retval; | |||||
4898 | ||||||
4899 | #else | |||||
4900 | ||||||
4901 | KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask)((kmp_affin_mask_t *)(*mask))->copy(th->th.th_affin_mask ); | |||||
4902 | return 0; | |||||
4903 | ||||||
4904 | #endif /* KMP_OS_WINDOWS */ | |||||
4905 | } | |||||
4906 | ||||||
4907 | int __kmp_aux_get_affinity_max_proc() { | |||||
4908 | if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) { | |||||
4909 | return 0; | |||||
4910 | } | |||||
4911 | #if KMP_GROUP_AFFINITY0 | |||||
4912 | if (__kmp_num_proc_groups > 1) { | |||||
4913 | return (int)(__kmp_num_proc_groups * sizeof(DWORD_PTR) * CHAR_BIT8); | |||||
4914 | } | |||||
4915 | #endif | |||||
4916 | return __kmp_xproc; | |||||
4917 | } | |||||
4918 | ||||||
4919 | int __kmp_aux_set_affinity_mask_proc(int proc, void **mask) { | |||||
4920 | if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) { | |||||
4921 | return -1; | |||||
4922 | } | |||||
4923 | ||||||
4924 | KA_TRACE(1000, ; {if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_set_affinity_mask_proc: setting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; } | |||||
4925 | int gtid = __kmp_entry_gtid();if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_set_affinity_mask_proc: setting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; } | |||||
4926 | char buf[KMP_AFFIN_MASK_PRINT_LEN];if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_set_affinity_mask_proc: setting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; } | |||||
4927 | __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_set_affinity_mask_proc: setting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; } | |||||
4928 | (kmp_affin_mask_t *)(*mask));if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_set_affinity_mask_proc: setting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; } | |||||
4929 | __kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in "if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_set_affinity_mask_proc: setting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; } | |||||
4930 | "affinity mask for thread %d = %s\n",if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_set_affinity_mask_proc: setting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; } | |||||
4931 | proc, gtid, buf);if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_set_affinity_mask_proc: setting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; } | |||||
4932 | })if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_set_affinity_mask_proc: setting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; }; | |||||
4933 | ||||||
4934 | if (__kmp_env_consistency_check) { | |||||
4935 | if ((mask == NULL__null) || (*mask == NULL__null)) { | |||||
4936 | KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_AffinityInvalidMask , "kmp_set_affinity_mask_proc"), __kmp_msg_null); | |||||
4937 | } | |||||
4938 | } | |||||
4939 | ||||||
4940 | if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) { | |||||
4941 | return -1; | |||||
4942 | } | |||||
4943 | if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(proc)) { | |||||
4944 | return -2; | |||||
4945 | } | |||||
4946 | ||||||
4947 | KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask))((kmp_affin_mask_t *)(*mask))->set(proc); | |||||
4948 | return 0; | |||||
4949 | } | |||||
4950 | ||||||
4951 | int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask) { | |||||
4952 | if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) { | |||||
4953 | return -1; | |||||
4954 | } | |||||
4955 | ||||||
4956 | KA_TRACE(1000, ; {if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_unset_affinity_mask_proc: unsetting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; } | |||||
4957 | int gtid = __kmp_entry_gtid();if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_unset_affinity_mask_proc: unsetting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; } | |||||
4958 | char buf[KMP_AFFIN_MASK_PRINT_LEN];if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_unset_affinity_mask_proc: unsetting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; } | |||||
4959 | __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_unset_affinity_mask_proc: unsetting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; } | |||||
4960 | (kmp_affin_mask_t *)(*mask));if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_unset_affinity_mask_proc: unsetting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; } | |||||
4961 | __kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in "if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_unset_affinity_mask_proc: unsetting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; } | |||||
4962 | "affinity mask for thread %d = %s\n",if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_unset_affinity_mask_proc: unsetting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; } | |||||
4963 | proc, gtid, buf);if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_unset_affinity_mask_proc: unsetting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; } | |||||
4964 | })if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_unset_affinity_mask_proc: unsetting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; }; | |||||
4965 | ||||||
4966 | if (__kmp_env_consistency_check) { | |||||
4967 | if ((mask == NULL__null) || (*mask == NULL__null)) { | |||||
4968 | KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_AffinityInvalidMask , "kmp_unset_affinity_mask_proc"), __kmp_msg_null); | |||||
4969 | } | |||||
4970 | } | |||||
4971 | ||||||
4972 | if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) { | |||||
4973 | return -1; | |||||
4974 | } | |||||
4975 | if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(proc)) { | |||||
4976 | return -2; | |||||
4977 | } | |||||
4978 | ||||||
4979 | KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask))((kmp_affin_mask_t *)(*mask))->clear(proc); | |||||
4980 | return 0; | |||||
4981 | } | |||||
4982 | ||||||
4983 | int __kmp_aux_get_affinity_mask_proc(int proc, void **mask) { | |||||
4984 | if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) { | |||||
4985 | return -1; | |||||
4986 | } | |||||
4987 | ||||||
4988 | KA_TRACE(1000, ; {if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_get_affinity_mask_proc: getting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; } | |||||
4989 | int gtid = __kmp_entry_gtid();if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_get_affinity_mask_proc: getting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; } | |||||
4990 | char buf[KMP_AFFIN_MASK_PRINT_LEN];if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_get_affinity_mask_proc: getting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; } | |||||
4991 | __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_get_affinity_mask_proc: getting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; } | |||||
4992 | (kmp_affin_mask_t *)(*mask));if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_get_affinity_mask_proc: getting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; } | |||||
4993 | __kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in "if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_get_affinity_mask_proc: getting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; } | |||||
4994 | "affinity mask for thread %d = %s\n",if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_get_affinity_mask_proc: getting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; } | |||||
4995 | proc, gtid, buf);if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_get_affinity_mask_proc: getting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; } | |||||
4996 | })if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid = __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask (buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf( "kmp_get_affinity_mask_proc: getting proc %d in " "affinity mask for thread %d = %s\n" , proc, gtid, buf); }; }; | |||||
4997 | ||||||
4998 | if (__kmp_env_consistency_check) { | |||||
4999 | if ((mask == NULL__null) || (*mask == NULL__null)) { | |||||
5000 | KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_AffinityInvalidMask , "kmp_get_affinity_mask_proc"), __kmp_msg_null); | |||||
5001 | } | |||||
5002 | } | |||||
5003 | ||||||
5004 | if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) { | |||||
5005 | return -1; | |||||
5006 | } | |||||
5007 | if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(proc)) { | |||||
5008 | return 0; | |||||
5009 | } | |||||
5010 | ||||||
5011 | return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))((kmp_affin_mask_t *)(*mask))->is_set(proc); | |||||
5012 | } | |||||
5013 | ||||||
5014 | // Dynamic affinity settings - Affinity balanced | |||||
5015 | void __kmp_balanced_affinity(kmp_info_t *th, int nthreads) { | |||||
5016 | KMP_DEBUG_ASSERT(th)if (!(th)) { __kmp_debug_assert("th", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 5016); }; | |||||
5017 | bool fine_gran = true; | |||||
5018 | int tid = th->th.th_info.ds.ds_tid; | |||||
5019 | ||||||
5020 | switch (__kmp_affinity_gran) { | |||||
5021 | case affinity_gran_fine: | |||||
5022 | case affinity_gran_thread: | |||||
5023 | break; | |||||
5024 | case affinity_gran_core: | |||||
5025 | if (__kmp_nThreadsPerCore > 1) { | |||||
5026 | fine_gran = false; | |||||
5027 | } | |||||
5028 | break; | |||||
5029 | case affinity_gran_package: | |||||
5030 | if (nCoresPerPkg > 1) { | |||||
5031 | fine_gran = false; | |||||
5032 | } | |||||
5033 | break; | |||||
5034 | default: | |||||
5035 | fine_gran = false; | |||||
5036 | } | |||||
5037 | ||||||
5038 | if (__kmp_affinity_uniform_topology()) { | |||||
5039 | int coreID; | |||||
5040 | int threadID; | |||||
5041 | // Number of hyper threads per core in HT machine | |||||
5042 | int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores; | |||||
5043 | // Number of cores | |||||
5044 | int ncores = __kmp_ncores; | |||||
5045 | if ((nPackages > 1) && (__kmp_nth_per_core <= 1)) { | |||||
5046 | __kmp_nth_per_core = __kmp_avail_proc / nPackages; | |||||
5047 | ncores = nPackages; | |||||
5048 | } | |||||
5049 | // How many threads will be bound to each core | |||||
5050 | int chunk = nthreads / ncores; | |||||
5051 | // How many cores will have an additional thread bound to it - "big cores" | |||||
5052 | int big_cores = nthreads % ncores; | |||||
5053 | // Number of threads on the big cores | |||||
5054 | int big_nth = (chunk + 1) * big_cores; | |||||
5055 | if (tid < big_nth) { | |||||
5056 | coreID = tid / (chunk + 1); | |||||
5057 | threadID = (tid % (chunk + 1)) % __kmp_nth_per_core; | |||||
5058 | } else { // tid >= big_nth | |||||
5059 | coreID = (tid - big_cores) / chunk; | |||||
5060 | threadID = ((tid - big_cores) % chunk) % __kmp_nth_per_core; | |||||
5061 | } | |||||
5062 | ||||||
5063 | KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),if (!((__kmp_affin_mask_size > 0))) { __kmp_debug_assert(( "Illegal set affinity operation when not capable"), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 5064); } | |||||
5064 | "Illegal set affinity operation when not capable")if (!((__kmp_affin_mask_size > 0))) { __kmp_debug_assert(( "Illegal set affinity operation when not capable"), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 5064); }; | |||||
5065 | ||||||
5066 | kmp_affin_mask_t *mask = th->th.th_affin_mask; | |||||
5067 | KMP_CPU_ZERO(mask)(mask)->zero(); | |||||
5068 | ||||||
5069 | if (fine_gran) { | |||||
5070 | int osID = address2os[coreID * __kmp_nth_per_core + threadID].second; | |||||
5071 | KMP_CPU_SET(osID, mask)(mask)->set(osID); | |||||
5072 | } else { | |||||
5073 | for (int i = 0; i < __kmp_nth_per_core; i++) { | |||||
5074 | int osID; | |||||
5075 | osID = address2os[coreID * __kmp_nth_per_core + i].second; | |||||
5076 | KMP_CPU_SET(osID, mask)(mask)->set(osID); | |||||
5077 | } | |||||
5078 | } | |||||
5079 | if (__kmp_affinity_verbose) { | |||||
5080 | char buf[KMP_AFFIN_MASK_PRINT_LEN1024]; | |||||
5081 | __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN1024, mask); | |||||
5082 | KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_BoundToOSProcSet , "KMP_AFFINITY", (kmp_int32)getpid(), syscall(186), tid, buf ), __kmp_msg_null) | |||||
5083 | __kmp_gettid(), tid, buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_BoundToOSProcSet , "KMP_AFFINITY", (kmp_int32)getpid(), syscall(186), tid, buf ), __kmp_msg_null); | |||||
5084 | } | |||||
5085 | __kmp_set_system_affinity(mask, TRUE)(mask)->set_system_affinity((!0)); | |||||
5086 | } else { // Non-uniform topology | |||||
5087 | ||||||
5088 | kmp_affin_mask_t *mask = th->th.th_affin_mask; | |||||
5089 | KMP_CPU_ZERO(mask)(mask)->zero(); | |||||
5090 | ||||||
5091 | int core_level = __kmp_affinity_find_core_level( | |||||
5092 | address2os, __kmp_avail_proc, __kmp_aff_depth - 1); | |||||
5093 | int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc, | |||||
5094 | __kmp_aff_depth - 1, core_level); | |||||
5095 | int nth_per_core = __kmp_affinity_max_proc_per_core( | |||||
5096 | address2os, __kmp_avail_proc, __kmp_aff_depth - 1, core_level); | |||||
5097 | ||||||
5098 | // For performance gain consider the special case nthreads == | |||||
5099 | // __kmp_avail_proc | |||||
5100 | if (nthreads == __kmp_avail_proc) { | |||||
5101 | if (fine_gran) { | |||||
5102 | int osID = address2os[tid].second; | |||||
5103 | KMP_CPU_SET(osID, mask)(mask)->set(osID); | |||||
5104 | } else { | |||||
5105 | int core = __kmp_affinity_find_core(address2os, tid, | |||||
5106 | __kmp_aff_depth - 1, core_level); | |||||
5107 | for (int i = 0; i < __kmp_avail_proc; i++) { | |||||
5108 | int osID = address2os[i].second; | |||||
5109 | if (__kmp_affinity_find_core(address2os, i, __kmp_aff_depth - 1, | |||||
5110 | core_level) == core) { | |||||
5111 | KMP_CPU_SET(osID, mask)(mask)->set(osID); | |||||
5112 | } | |||||
5113 | } | |||||
5114 | } | |||||
5115 | } else if (nthreads <= ncores) { | |||||
5116 | ||||||
5117 | int core = 0; | |||||
5118 | for (int i = 0; i < ncores; i++) { | |||||
5119 | // Check if this core from procarr[] is in the mask | |||||
5120 | int in_mask = 0; | |||||
5121 | for (int j = 0; j < nth_per_core; j++) { | |||||
5122 | if (procarr[i * nth_per_core + j] != -1) { | |||||
5123 | in_mask = 1; | |||||
5124 | break; | |||||
5125 | } | |||||
5126 | } | |||||
5127 | if (in_mask) { | |||||
5128 | if (tid == core) { | |||||
5129 | for (int j = 0; j < nth_per_core; j++) { | |||||
5130 | int osID = procarr[i * nth_per_core + j]; | |||||
5131 | if (osID != -1) { | |||||
5132 | KMP_CPU_SET(osID, mask)(mask)->set(osID); | |||||
5133 | // For fine granularity it is enough to set the first available | |||||
5134 | // osID for this core | |||||
5135 | if (fine_gran) { | |||||
5136 | break; | |||||
5137 | } | |||||
5138 | } | |||||
5139 | } | |||||
5140 | break; | |||||
5141 | } else { | |||||
5142 | core++; | |||||
5143 | } | |||||
5144 | } | |||||
5145 | } | |||||
5146 | } else { // nthreads > ncores | |||||
5147 | // Array to save the number of processors at each core | |||||
5148 | int *nproc_at_core = (int *)KMP_ALLOCA(sizeof(int) * ncores)__builtin_alloca (sizeof(int) * ncores); | |||||
5149 | // Array to save the number of cores with "x" available processors; | |||||
5150 | int *ncores_with_x_procs = | |||||
5151 | (int *)KMP_ALLOCA(sizeof(int) * (nth_per_core + 1))__builtin_alloca (sizeof(int) * (nth_per_core + 1)); | |||||
5152 | // Array to save the number of cores with # procs from x to nth_per_core | |||||
5153 | int *ncores_with_x_to_max_procs = | |||||
5154 | (int *)KMP_ALLOCA(sizeof(int) * (nth_per_core + 1))__builtin_alloca (sizeof(int) * (nth_per_core + 1)); | |||||
5155 | ||||||
5156 | for (int i = 0; i <= nth_per_core; i++) { | |||||
5157 | ncores_with_x_procs[i] = 0; | |||||
5158 | ncores_with_x_to_max_procs[i] = 0; | |||||
5159 | } | |||||
5160 | ||||||
5161 | for (int i = 0; i < ncores; i++) { | |||||
5162 | int cnt = 0; | |||||
5163 | for (int j = 0; j < nth_per_core; j++) { | |||||
5164 | if (procarr[i * nth_per_core + j] != -1) { | |||||
5165 | cnt++; | |||||
5166 | } | |||||
5167 | } | |||||
5168 | nproc_at_core[i] = cnt; | |||||
5169 | ncores_with_x_procs[cnt]++; | |||||
5170 | } | |||||
5171 | ||||||
5172 | for (int i = 0; i <= nth_per_core; i++) { | |||||
5173 | for (int j = i; j <= nth_per_core; j++) { | |||||
5174 | ncores_with_x_to_max_procs[i] += ncores_with_x_procs[j]; | |||||
5175 | } | |||||
5176 | } | |||||
5177 | ||||||
5178 | // Max number of processors | |||||
5179 | int nproc = nth_per_core * ncores; | |||||
5180 | // An array to keep number of threads per each context | |||||
5181 | int *newarr = (int *)__kmp_allocate(sizeof(int) * nproc)___kmp_allocate((sizeof(int) * nproc), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 5181); | |||||
5182 | for (int i = 0; i < nproc; i++) { | |||||
5183 | newarr[i] = 0; | |||||
5184 | } | |||||
5185 | ||||||
5186 | int nth = nthreads; | |||||
5187 | int flag = 0; | |||||
5188 | while (nth > 0) { | |||||
5189 | for (int j = 1; j <= nth_per_core; j++) { | |||||
5190 | int cnt = ncores_with_x_to_max_procs[j]; | |||||
5191 | for (int i = 0; i < ncores; i++) { | |||||
5192 | // Skip the core with 0 processors | |||||
5193 | if (nproc_at_core[i] == 0) { | |||||
5194 | continue; | |||||
5195 | } | |||||
5196 | for (int k = 0; k < nth_per_core; k++) { | |||||
5197 | if (procarr[i * nth_per_core + k] != -1) { | |||||
5198 | if (newarr[i * nth_per_core + k] == 0) { | |||||
5199 | newarr[i * nth_per_core + k] = 1; | |||||
5200 | cnt--; | |||||
5201 | nth--; | |||||
5202 | break; | |||||
5203 | } else { | |||||
5204 | if (flag != 0) { | |||||
5205 | newarr[i * nth_per_core + k]++; | |||||
5206 | cnt--; | |||||
5207 | nth--; | |||||
5208 | break; | |||||
5209 | } | |||||
5210 | } | |||||
5211 | } | |||||
5212 | } | |||||
5213 | if (cnt == 0 || nth == 0) { | |||||
5214 | break; | |||||
5215 | } | |||||
5216 | } | |||||
5217 | if (nth == 0) { | |||||
5218 | break; | |||||
5219 | } | |||||
5220 | } | |||||
5221 | flag = 1; | |||||
5222 | } | |||||
5223 | int sum = 0; | |||||
5224 | for (int i = 0; i < nproc; i++) { | |||||
5225 | sum += newarr[i]; | |||||
5226 | if (sum > tid) { | |||||
5227 | if (fine_gran) { | |||||
5228 | int osID = procarr[i]; | |||||
5229 | KMP_CPU_SET(osID, mask)(mask)->set(osID); | |||||
5230 | } else { | |||||
5231 | int coreID = i / nth_per_core; | |||||
5232 | for (int ii = 0; ii < nth_per_core; ii++) { | |||||
5233 | int osID = procarr[coreID * nth_per_core + ii]; | |||||
5234 | if (osID != -1) { | |||||
5235 | KMP_CPU_SET(osID, mask)(mask)->set(osID); | |||||
5236 | } | |||||
5237 | } | |||||
5238 | } | |||||
5239 | break; | |||||
5240 | } | |||||
5241 | } | |||||
5242 | __kmp_free(newarr)___kmp_free((newarr), "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 5242); | |||||
5243 | } | |||||
5244 | ||||||
5245 | if (__kmp_affinity_verbose) { | |||||
5246 | char buf[KMP_AFFIN_MASK_PRINT_LEN1024]; | |||||
5247 | __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN1024, mask); | |||||
5248 | KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_BoundToOSProcSet , "KMP_AFFINITY", (kmp_int32)getpid(), syscall(186), tid, buf ), __kmp_msg_null) | |||||
5249 | __kmp_gettid(), tid, buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_BoundToOSProcSet , "KMP_AFFINITY", (kmp_int32)getpid(), syscall(186), tid, buf ), __kmp_msg_null); | |||||
5250 | } | |||||
5251 | __kmp_set_system_affinity(mask, TRUE)(mask)->set_system_affinity((!0)); | |||||
5252 | } | |||||
5253 | } | |||||
5254 | ||||||
5255 | #if KMP_OS_LINUX1 | |||||
5256 | // We don't need this entry for Windows because | |||||
5257 | // there is GetProcessAffinityMask() api | |||||
5258 | // | |||||
5259 | // The intended usage is indicated by these steps: | |||||
5260 | // 1) The user gets the current affinity mask | |||||
5261 | // 2) Then sets the affinity by calling this function | |||||
5262 | // 3) Error check the return value | |||||
5263 | // 4) Use non-OpenMP parallelization | |||||
5264 | // 5) Reset the affinity to what was stored in step 1) | |||||
5265 | #ifdef __cplusplus201103L | |||||
5266 | extern "C" | |||||
5267 | #endif | |||||
5268 | int | |||||
5269 | kmp_set_thread_affinity_mask_initial() | |||||
5270 | // the function returns 0 on success, | |||||
5271 | // -1 if we cannot bind thread | |||||
5272 | // >0 (errno) if an error happened during binding | |||||
5273 | { | |||||
5274 | int gtid = __kmp_get_gtid()__kmp_get_global_thread_id(); | |||||
5275 | if (gtid < 0) { | |||||
5276 | // Do not touch non-omp threads | |||||
5277 | KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "if (kmp_a_debug >= 30) { __kmp_debug_printf ("kmp_set_thread_affinity_mask_initial: " "non-omp thread, returning\n"); } | |||||
5278 | "non-omp thread, returning\n"))if (kmp_a_debug >= 30) { __kmp_debug_printf ("kmp_set_thread_affinity_mask_initial: " "non-omp thread, returning\n"); }; | |||||
5279 | return -1; | |||||
5280 | } | |||||
5281 | if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0) || !__kmp_init_middle) { | |||||
5282 | KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "if (kmp_a_debug >= 30) { __kmp_debug_printf ("kmp_set_thread_affinity_mask_initial: " "affinity not initialized, returning\n"); } | |||||
5283 | "affinity not initialized, returning\n"))if (kmp_a_debug >= 30) { __kmp_debug_printf ("kmp_set_thread_affinity_mask_initial: " "affinity not initialized, returning\n"); }; | |||||
5284 | return -1; | |||||
5285 | } | |||||
5286 | KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "if (kmp_a_debug >= 30) { __kmp_debug_printf ("kmp_set_thread_affinity_mask_initial: " "set full mask for thread %d\n", gtid); } | |||||
5287 | "set full mask for thread %d\n",if (kmp_a_debug >= 30) { __kmp_debug_printf ("kmp_set_thread_affinity_mask_initial: " "set full mask for thread %d\n", gtid); } | |||||
5288 | gtid))if (kmp_a_debug >= 30) { __kmp_debug_printf ("kmp_set_thread_affinity_mask_initial: " "set full mask for thread %d\n", gtid); }; | |||||
5289 | KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL)if (!(__kmp_affin_fullMask != __null)) { __kmp_debug_assert("__kmp_affin_fullMask != __null" , "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_affinity.cpp" , 5289); }; | |||||
5290 | return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE)(__kmp_affin_fullMask)->set_system_affinity(0); | |||||
5291 | } | |||||
5292 | #endif | |||||
5293 | ||||||
5294 | #endif // KMP_AFFINITY_SUPPORTED |