File: | lib/Support/Host.cpp |
Warning: | line 1127, column 18 The left operand of '==' is a garbage value |
1 | //===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===// | |||
2 | // | |||
3 | // The LLVM Compiler Infrastructure | |||
4 | // | |||
5 | // This file is distributed under the University of Illinois Open Source | |||
6 | // License. See LICENSE.TXT for details. | |||
7 | // | |||
8 | //===----------------------------------------------------------------------===// | |||
9 | // | |||
10 | // This file implements the operating system Host concept. | |||
11 | // | |||
12 | //===----------------------------------------------------------------------===// | |||
13 | ||||
14 | #include "llvm/Support/Host.h" | |||
15 | #include "llvm/ADT/SmallSet.h" | |||
16 | #include "llvm/ADT/SmallVector.h" | |||
17 | #include "llvm/ADT/StringRef.h" | |||
18 | #include "llvm/ADT/StringSwitch.h" | |||
19 | #include "llvm/ADT/Triple.h" | |||
20 | #include "llvm/Config/config.h" | |||
21 | #include "llvm/Support/Debug.h" | |||
22 | #include "llvm/Support/FileSystem.h" | |||
23 | #include "llvm/Support/MemoryBuffer.h" | |||
24 | #include "llvm/Support/raw_ostream.h" | |||
25 | #include <assert.h> | |||
26 | #include <string.h> | |||
27 | ||||
28 | // Include the platform-specific parts of this class. | |||
29 | #ifdef LLVM_ON_UNIX1 | |||
30 | #include "Unix/Host.inc" | |||
31 | #endif | |||
32 | #ifdef LLVM_ON_WIN32 | |||
33 | #include "Windows/Host.inc" | |||
34 | #endif | |||
35 | #ifdef _MSC_VER | |||
36 | #include <intrin.h> | |||
37 | #endif | |||
38 | #if defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__)) | |||
39 | #include <mach/host_info.h> | |||
40 | #include <mach/mach.h> | |||
41 | #include <mach/mach_host.h> | |||
42 | #include <mach/machine.h> | |||
43 | #endif | |||
44 | ||||
45 | #define DEBUG_TYPE"host-detection" "host-detection" | |||
46 | ||||
47 | //===----------------------------------------------------------------------===// | |||
48 | // | |||
49 | // Implementations of the CPU detection routines | |||
50 | // | |||
51 | //===----------------------------------------------------------------------===// | |||
52 | ||||
53 | using namespace llvm; | |||
54 | ||||
55 | static std::unique_ptr<llvm::MemoryBuffer> | |||
56 | LLVM_ATTRIBUTE_UNUSED__attribute__((__unused__)) getProcCpuinfoContent() { | |||
57 | llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = | |||
58 | llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); | |||
59 | if (std::error_code EC = Text.getError()) { | |||
60 | llvm::errs() << "Can't read " | |||
61 | << "/proc/cpuinfo: " << EC.message() << "\n"; | |||
62 | return nullptr; | |||
63 | } | |||
64 | return std::move(*Text); | |||
65 | } | |||
66 | ||||
67 | StringRef sys::detail::getHostCPUNameForPowerPC( | |||
68 | const StringRef &ProcCpuinfoContent) { | |||
69 | // Access to the Processor Version Register (PVR) on PowerPC is privileged, | |||
70 | // and so we must use an operating-system interface to determine the current | |||
71 | // processor type. On Linux, this is exposed through the /proc/cpuinfo file. | |||
72 | const char *generic = "generic"; | |||
73 | ||||
74 | // The cpu line is second (after the 'processor: 0' line), so if this | |||
75 | // buffer is too small then something has changed (or is wrong). | |||
76 | StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin(); | |||
77 | StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end(); | |||
78 | ||||
79 | StringRef::const_iterator CIP = CPUInfoStart; | |||
80 | ||||
81 | StringRef::const_iterator CPUStart = 0; | |||
82 | size_t CPULen = 0; | |||
83 | ||||
84 | // We need to find the first line which starts with cpu, spaces, and a colon. | |||
85 | // After the colon, there may be some additional spaces and then the cpu type. | |||
86 | while (CIP < CPUInfoEnd && CPUStart == 0) { | |||
87 | if (CIP < CPUInfoEnd && *CIP == '\n') | |||
88 | ++CIP; | |||
89 | ||||
90 | if (CIP < CPUInfoEnd && *CIP == 'c') { | |||
91 | ++CIP; | |||
92 | if (CIP < CPUInfoEnd && *CIP == 'p') { | |||
93 | ++CIP; | |||
94 | if (CIP < CPUInfoEnd && *CIP == 'u') { | |||
95 | ++CIP; | |||
96 | while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) | |||
97 | ++CIP; | |||
98 | ||||
99 | if (CIP < CPUInfoEnd && *CIP == ':') { | |||
100 | ++CIP; | |||
101 | while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) | |||
102 | ++CIP; | |||
103 | ||||
104 | if (CIP < CPUInfoEnd) { | |||
105 | CPUStart = CIP; | |||
106 | while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' && | |||
107 | *CIP != ',' && *CIP != '\n')) | |||
108 | ++CIP; | |||
109 | CPULen = CIP - CPUStart; | |||
110 | } | |||
111 | } | |||
112 | } | |||
113 | } | |||
114 | } | |||
115 | ||||
116 | if (CPUStart == 0) | |||
117 | while (CIP < CPUInfoEnd && *CIP != '\n') | |||
118 | ++CIP; | |||
119 | } | |||
120 | ||||
121 | if (CPUStart == 0) | |||
122 | return generic; | |||
123 | ||||
124 | return StringSwitch<const char *>(StringRef(CPUStart, CPULen)) | |||
125 | .Case("604e", "604e") | |||
126 | .Case("604", "604") | |||
127 | .Case("7400", "7400") | |||
128 | .Case("7410", "7400") | |||
129 | .Case("7447", "7400") | |||
130 | .Case("7455", "7450") | |||
131 | .Case("G4", "g4") | |||
132 | .Case("POWER4", "970") | |||
133 | .Case("PPC970FX", "970") | |||
134 | .Case("PPC970MP", "970") | |||
135 | .Case("G5", "g5") | |||
136 | .Case("POWER5", "g5") | |||
137 | .Case("A2", "a2") | |||
138 | .Case("POWER6", "pwr6") | |||
139 | .Case("POWER7", "pwr7") | |||
140 | .Case("POWER8", "pwr8") | |||
141 | .Case("POWER8E", "pwr8") | |||
142 | .Case("POWER8NVL", "pwr8") | |||
143 | .Case("POWER9", "pwr9") | |||
144 | .Default(generic); | |||
145 | } | |||
146 | ||||
147 | StringRef sys::detail::getHostCPUNameForARM( | |||
148 | const StringRef &ProcCpuinfoContent) { | |||
149 | // The cpuid register on arm is not accessible from user space. On Linux, | |||
150 | // it is exposed through the /proc/cpuinfo file. | |||
151 | ||||
152 | // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line | |||
153 | // in all cases. | |||
154 | SmallVector<StringRef, 32> Lines; | |||
155 | ProcCpuinfoContent.split(Lines, "\n"); | |||
156 | ||||
157 | // Look for the CPU implementer line. | |||
158 | StringRef Implementer; | |||
159 | StringRef Hardware; | |||
160 | for (unsigned I = 0, E = Lines.size(); I != E; ++I) { | |||
161 | if (Lines[I].startswith("CPU implementer")) | |||
162 | Implementer = Lines[I].substr(15).ltrim("\t :"); | |||
163 | if (Lines[I].startswith("Hardware")) | |||
164 | Hardware = Lines[I].substr(8).ltrim("\t :"); | |||
165 | } | |||
166 | ||||
167 | if (Implementer == "0x41") { // ARM Ltd. | |||
168 | // MSM8992/8994 may give cpu part for the core that the kernel is running on, | |||
169 | // which is undeterministic and wrong. Always return cortex-a53 for these SoC. | |||
170 | if (Hardware.endswith("MSM8994") || Hardware.endswith("MSM8996")) | |||
171 | return "cortex-a53"; | |||
172 | ||||
173 | ||||
174 | // Look for the CPU part line. | |||
175 | for (unsigned I = 0, E = Lines.size(); I != E; ++I) | |||
176 | if (Lines[I].startswith("CPU part")) | |||
177 | // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The | |||
178 | // values correspond to the "Part number" in the CP15/c0 register. The | |||
179 | // contents are specified in the various processor manuals. | |||
180 | return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) | |||
181 | .Case("0x926", "arm926ej-s") | |||
182 | .Case("0xb02", "mpcore") | |||
183 | .Case("0xb36", "arm1136j-s") | |||
184 | .Case("0xb56", "arm1156t2-s") | |||
185 | .Case("0xb76", "arm1176jz-s") | |||
186 | .Case("0xc08", "cortex-a8") | |||
187 | .Case("0xc09", "cortex-a9") | |||
188 | .Case("0xc0f", "cortex-a15") | |||
189 | .Case("0xc20", "cortex-m0") | |||
190 | .Case("0xc23", "cortex-m3") | |||
191 | .Case("0xc24", "cortex-m4") | |||
192 | .Case("0xd04", "cortex-a35") | |||
193 | .Case("0xd03", "cortex-a53") | |||
194 | .Case("0xd07", "cortex-a57") | |||
195 | .Case("0xd08", "cortex-a72") | |||
196 | .Case("0xd09", "cortex-a73") | |||
197 | .Default("generic"); | |||
198 | } | |||
199 | ||||
200 | if (Implementer == "0x51") // Qualcomm Technologies, Inc. | |||
201 | // Look for the CPU part line. | |||
202 | for (unsigned I = 0, E = Lines.size(); I != E; ++I) | |||
203 | if (Lines[I].startswith("CPU part")) | |||
204 | // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The | |||
205 | // values correspond to the "Part number" in the CP15/c0 register. The | |||
206 | // contents are specified in the various processor manuals. | |||
207 | return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) | |||
208 | .Case("0x06f", "krait") // APQ8064 | |||
209 | .Case("0x201", "kryo") | |||
210 | .Case("0x205", "kryo") | |||
211 | .Default("generic"); | |||
212 | ||||
213 | return "generic"; | |||
214 | } | |||
215 | ||||
216 | StringRef sys::detail::getHostCPUNameForS390x( | |||
217 | const StringRef &ProcCpuinfoContent) { | |||
218 | // STIDP is a privileged operation, so use /proc/cpuinfo instead. | |||
219 | ||||
220 | // The "processor 0:" line comes after a fair amount of other information, | |||
221 | // including a cache breakdown, but this should be plenty. | |||
222 | SmallVector<StringRef, 32> Lines; | |||
223 | ProcCpuinfoContent.split(Lines, "\n"); | |||
224 | ||||
225 | // Look for the CPU features. | |||
226 | SmallVector<StringRef, 32> CPUFeatures; | |||
227 | for (unsigned I = 0, E = Lines.size(); I != E; ++I) | |||
228 | if (Lines[I].startswith("features")) { | |||
229 | size_t Pos = Lines[I].find(":"); | |||
230 | if (Pos != StringRef::npos) { | |||
231 | Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' '); | |||
232 | break; | |||
233 | } | |||
234 | } | |||
235 | ||||
236 | // We need to check for the presence of vector support independently of | |||
237 | // the machine type, since we may only use the vector register set when | |||
238 | // supported by the kernel (and hypervisor). | |||
239 | bool HaveVectorSupport = false; | |||
240 | for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { | |||
241 | if (CPUFeatures[I] == "vx") | |||
242 | HaveVectorSupport = true; | |||
243 | } | |||
244 | ||||
245 | // Now check the processor machine type. | |||
246 | for (unsigned I = 0, E = Lines.size(); I != E; ++I) { | |||
247 | if (Lines[I].startswith("processor ")) { | |||
248 | size_t Pos = Lines[I].find("machine = "); | |||
249 | if (Pos != StringRef::npos) { | |||
250 | Pos += sizeof("machine = ") - 1; | |||
251 | unsigned int Id; | |||
252 | if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) { | |||
253 | if (Id >= 2964 && HaveVectorSupport) | |||
254 | return "z13"; | |||
255 | if (Id >= 2827) | |||
256 | return "zEC12"; | |||
257 | if (Id >= 2817) | |||
258 | return "z196"; | |||
259 | } | |||
260 | } | |||
261 | break; | |||
262 | } | |||
263 | } | |||
264 | ||||
265 | return "generic"; | |||
266 | } | |||
267 | ||||
268 | #if defined(__i386__) || defined(_M_IX86) || \ | |||
269 | defined(__x86_64__1) || defined(_M_X64) | |||
270 | ||||
271 | enum VendorSignatures { | |||
272 | SIG_INTEL = 0x756e6547 /* Genu */, | |||
273 | SIG_AMD = 0x68747541 /* Auth */ | |||
274 | }; | |||
275 | ||||
276 | enum ProcessorVendors { | |||
277 | VENDOR_INTEL = 1, | |||
278 | VENDOR_AMD, | |||
279 | VENDOR_OTHER, | |||
280 | VENDOR_MAX | |||
281 | }; | |||
282 | ||||
283 | enum ProcessorTypes { | |||
284 | INTEL_ATOM = 1, | |||
285 | INTEL_CORE2, | |||
286 | INTEL_COREI7, | |||
287 | AMDFAM10H, | |||
288 | AMDFAM15H, | |||
289 | INTEL_i386, | |||
290 | INTEL_i486, | |||
291 | INTEL_PENTIUM, | |||
292 | INTEL_PENTIUM_PRO, | |||
293 | INTEL_PENTIUM_II, | |||
294 | INTEL_PENTIUM_III, | |||
295 | INTEL_PENTIUM_IV, | |||
296 | INTEL_PENTIUM_M, | |||
297 | INTEL_CORE_DUO, | |||
298 | INTEL_XEONPHI, | |||
299 | INTEL_X86_64, | |||
300 | INTEL_NOCONA, | |||
301 | INTEL_PRESCOTT, | |||
302 | AMD_i486, | |||
303 | AMDPENTIUM, | |||
304 | AMDATHLON, | |||
305 | AMDFAM14H, | |||
306 | AMDFAM16H, | |||
307 | AMDFAM17H, | |||
308 | CPU_TYPE_MAX | |||
309 | }; | |||
310 | ||||
311 | enum ProcessorSubtypes { | |||
312 | INTEL_COREI7_NEHALEM = 1, | |||
313 | INTEL_COREI7_WESTMERE, | |||
314 | INTEL_COREI7_SANDYBRIDGE, | |||
315 | AMDFAM10H_BARCELONA, | |||
316 | AMDFAM10H_SHANGHAI, | |||
317 | AMDFAM10H_ISTANBUL, | |||
318 | AMDFAM15H_BDVER1, | |||
319 | AMDFAM15H_BDVER2, | |||
320 | INTEL_PENTIUM_MMX, | |||
321 | INTEL_CORE2_65, | |||
322 | INTEL_CORE2_45, | |||
323 | INTEL_COREI7_IVYBRIDGE, | |||
324 | INTEL_COREI7_HASWELL, | |||
325 | INTEL_COREI7_BROADWELL, | |||
326 | INTEL_COREI7_SKYLAKE, | |||
327 | INTEL_COREI7_SKYLAKE_AVX512, | |||
328 | INTEL_ATOM_BONNELL, | |||
329 | INTEL_ATOM_SILVERMONT, | |||
330 | INTEL_KNIGHTS_LANDING, | |||
331 | AMDPENTIUM_K6, | |||
332 | AMDPENTIUM_K62, | |||
333 | AMDPENTIUM_K63, | |||
334 | AMDPENTIUM_GEODE, | |||
335 | AMDATHLON_TBIRD, | |||
336 | AMDATHLON_MP, | |||
337 | AMDATHLON_XP, | |||
338 | AMDATHLON_K8SSE3, | |||
339 | AMDATHLON_OPTERON, | |||
340 | AMDATHLON_FX, | |||
341 | AMDATHLON_64, | |||
342 | AMD_BTVER1, | |||
343 | AMD_BTVER2, | |||
344 | AMDFAM15H_BDVER3, | |||
345 | AMDFAM15H_BDVER4, | |||
346 | AMDFAM17H_ZNVER1, | |||
347 | CPU_SUBTYPE_MAX | |||
348 | }; | |||
349 | ||||
350 | enum ProcessorFeatures { | |||
351 | FEATURE_CMOV = 0, | |||
352 | FEATURE_MMX, | |||
353 | FEATURE_POPCNT, | |||
354 | FEATURE_SSE, | |||
355 | FEATURE_SSE2, | |||
356 | FEATURE_SSE3, | |||
357 | FEATURE_SSSE3, | |||
358 | FEATURE_SSE4_1, | |||
359 | FEATURE_SSE4_2, | |||
360 | FEATURE_AVX, | |||
361 | FEATURE_AVX2, | |||
362 | FEATURE_AVX512, | |||
363 | FEATURE_AVX512SAVE, | |||
364 | FEATURE_MOVBE, | |||
365 | FEATURE_ADX, | |||
366 | FEATURE_EM64T | |||
367 | }; | |||
368 | ||||
369 | // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). | |||
370 | // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID | |||
371 | // support. Consequently, for i386, the presence of CPUID is checked first | |||
372 | // via the corresponding eflags bit. | |||
373 | // Removal of cpuid.h header motivated by PR30384 | |||
374 | // Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp | |||
375 | // or test-suite, but are used in external projects e.g. libstdcxx | |||
376 | static bool isCpuIdSupported() { | |||
377 | #if defined(__GNUC__4) || defined(__clang__1) | |||
378 | #if defined(__i386__) | |||
379 | int __cpuid_supported; | |||
380 | __asm__(" pushfl\n" | |||
381 | " popl %%eax\n" | |||
382 | " movl %%eax,%%ecx\n" | |||
383 | " xorl $0x00200000,%%eax\n" | |||
384 | " pushl %%eax\n" | |||
385 | " popfl\n" | |||
386 | " pushfl\n" | |||
387 | " popl %%eax\n" | |||
388 | " movl $0,%0\n" | |||
389 | " cmpl %%eax,%%ecx\n" | |||
390 | " je 1f\n" | |||
391 | " movl $1,%0\n" | |||
392 | "1:" | |||
393 | : "=r"(__cpuid_supported) | |||
394 | : | |||
395 | : "eax", "ecx"); | |||
396 | if (!__cpuid_supported) | |||
397 | return false; | |||
398 | #endif | |||
399 | return true; | |||
400 | #endif | |||
401 | return true; | |||
402 | } | |||
403 | ||||
404 | /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in | |||
405 | /// the specified arguments. If we can't run cpuid on the host, return true. | |||
406 | static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, | |||
407 | unsigned *rECX, unsigned *rEDX) { | |||
408 | #if defined(__GNUC__4) || defined(__clang__1) || defined(_MSC_VER) | |||
409 | #if defined(__GNUC__4) || defined(__clang__1) | |||
410 | #if defined(__x86_64__1) | |||
411 | // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. | |||
412 | // FIXME: should we save this for Clang? | |||
413 | __asm__("movq\t%%rbx, %%rsi\n\t" | |||
414 | "cpuid\n\t" | |||
415 | "xchgq\t%%rbx, %%rsi\n\t" | |||
416 | : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) | |||
417 | : "a"(value)); | |||
418 | #elif defined(__i386__) | |||
419 | __asm__("movl\t%%ebx, %%esi\n\t" | |||
420 | "cpuid\n\t" | |||
421 | "xchgl\t%%ebx, %%esi\n\t" | |||
422 | : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) | |||
423 | : "a"(value)); | |||
424 | #else | |||
425 | assert(0 && "This method is defined only for x86.")((0 && "This method is defined only for x86.") ? static_cast <void> (0) : __assert_fail ("0 && \"This method is defined only for x86.\"" , "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Support/Host.cpp" , 425, __PRETTY_FUNCTION__)); | |||
426 | #endif | |||
427 | #elif defined(_MSC_VER) | |||
428 | // The MSVC intrinsic is portable across x86 and x64. | |||
429 | int registers[4]; | |||
430 | __cpuid(registers, value); | |||
431 | *rEAX = registers[0]; | |||
432 | *rEBX = registers[1]; | |||
433 | *rECX = registers[2]; | |||
434 | *rEDX = registers[3]; | |||
435 | #endif | |||
436 | return false; | |||
437 | #else | |||
438 | return true; | |||
439 | #endif | |||
440 | } | |||
441 | ||||
442 | /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return | |||
443 | /// the 4 values in the specified arguments. If we can't run cpuid on the host, | |||
444 | /// return true. | |||
445 | static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, | |||
446 | unsigned *rEAX, unsigned *rEBX, unsigned *rECX, | |||
447 | unsigned *rEDX) { | |||
448 | #if defined(__GNUC__4) || defined(__clang__1) || defined(_MSC_VER) | |||
449 | #if defined(__x86_64__1) || defined(_M_X64) | |||
450 | #if defined(__GNUC__4) || defined(__clang__1) | |||
451 | // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually. | |||
452 | // FIXME: should we save this for Clang? | |||
453 | __asm__("movq\t%%rbx, %%rsi\n\t" | |||
454 | "cpuid\n\t" | |||
455 | "xchgq\t%%rbx, %%rsi\n\t" | |||
456 | : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) | |||
457 | : "a"(value), "c"(subleaf)); | |||
458 | #elif defined(_MSC_VER) | |||
459 | int registers[4]; | |||
460 | __cpuidex(registers, value, subleaf); | |||
461 | *rEAX = registers[0]; | |||
462 | *rEBX = registers[1]; | |||
463 | *rECX = registers[2]; | |||
464 | *rEDX = registers[3]; | |||
465 | #endif | |||
466 | #elif defined(__i386__) || defined(_M_IX86) | |||
467 | #if defined(__GNUC__4) || defined(__clang__1) | |||
468 | __asm__("movl\t%%ebx, %%esi\n\t" | |||
469 | "cpuid\n\t" | |||
470 | "xchgl\t%%ebx, %%esi\n\t" | |||
471 | : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) | |||
472 | : "a"(value), "c"(subleaf)); | |||
473 | #elif defined(_MSC_VER) | |||
474 | __asm { | |||
475 | mov eax,value | |||
476 | mov ecx,subleaf | |||
477 | cpuid | |||
478 | mov esi,rEAX | |||
479 | mov dword ptr [esi],eax | |||
480 | mov esi,rEBX | |||
481 | mov dword ptr [esi],ebx | |||
482 | mov esi,rECX | |||
483 | mov dword ptr [esi],ecx | |||
484 | mov esi,rEDX | |||
485 | mov dword ptr [esi],edx | |||
486 | } | |||
487 | #endif | |||
488 | #else | |||
489 | assert(0 && "This method is defined only for x86.")((0 && "This method is defined only for x86.") ? static_cast <void> (0) : __assert_fail ("0 && \"This method is defined only for x86.\"" , "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Support/Host.cpp" , 489, __PRETTY_FUNCTION__)); | |||
490 | #endif | |||
491 | return false; | |||
492 | #else | |||
493 | return true; | |||
494 | #endif | |||
495 | } | |||
496 | ||||
497 | static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { | |||
498 | #if defined(__GNUC__4) || defined(__clang__1) | |||
499 | // Check xgetbv; this uses a .byte sequence instead of the instruction | |||
500 | // directly because older assemblers do not include support for xgetbv and | |||
501 | // there is no easy way to conditionally compile based on the assembler used. | |||
502 | __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0)); | |||
503 | return false; | |||
504 | #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) | |||
505 | unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); | |||
506 | *rEAX = Result; | |||
507 | *rEDX = Result >> 32; | |||
508 | return false; | |||
509 | #else | |||
510 | return true; | |||
511 | #endif | |||
512 | } | |||
513 | ||||
514 | static void detectX86FamilyModel(unsigned EAX, unsigned *Family, | |||
515 | unsigned *Model) { | |||
516 | *Family = (EAX >> 8) & 0xf; // Bits 8 - 11 | |||
517 | *Model = (EAX >> 4) & 0xf; // Bits 4 - 7 | |||
518 | if (*Family == 6 || *Family == 0xf) { | |||
519 | if (*Family == 0xf) | |||
520 | // Examine extended family ID if family ID is F. | |||
521 | *Family += (EAX >> 20) & 0xff; // Bits 20 - 27 | |||
522 | // Examine extended model ID if family ID is 6 or F. | |||
523 | *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 | |||
524 | } | |||
525 | } | |||
526 | ||||
527 | static void | |||
528 | getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model, | |||
529 | unsigned int Brand_id, unsigned int Features, | |||
530 | unsigned *Type, unsigned *Subtype) { | |||
531 | if (Brand_id != 0) | |||
532 | return; | |||
533 | switch (Family) { | |||
534 | case 3: | |||
535 | *Type = INTEL_i386; | |||
536 | break; | |||
537 | case 4: | |||
538 | switch (Model) { | |||
539 | case 0: // Intel486 DX processors | |||
540 | case 1: // Intel486 DX processors | |||
541 | case 2: // Intel486 SX processors | |||
542 | case 3: // Intel487 processors, IntelDX2 OverDrive processors, | |||
543 | // IntelDX2 processors | |||
544 | case 4: // Intel486 SL processor | |||
545 | case 5: // IntelSX2 processors | |||
546 | case 7: // Write-Back Enhanced IntelDX2 processors | |||
547 | case 8: // IntelDX4 OverDrive processors, IntelDX4 processors | |||
548 | default: | |||
549 | *Type = INTEL_i486; | |||
550 | break; | |||
551 | } | |||
552 | break; | |||
553 | case 5: | |||
554 | switch (Model) { | |||
555 | case 1: // Pentium OverDrive processor for Pentium processor (60, 66), | |||
556 | // Pentium processors (60, 66) | |||
557 | case 2: // Pentium OverDrive processor for Pentium processor (75, 90, | |||
558 | // 100, 120, 133), Pentium processors (75, 90, 100, 120, 133, | |||
559 | // 150, 166, 200) | |||
560 | case 3: // Pentium OverDrive processors for Intel486 processor-based | |||
561 | // systems | |||
562 | *Type = INTEL_PENTIUM; | |||
563 | break; | |||
564 | case 4: // Pentium OverDrive processor with MMX technology for Pentium | |||
565 | // processor (75, 90, 100, 120, 133), Pentium processor with | |||
566 | // MMX technology (166, 200) | |||
567 | *Type = INTEL_PENTIUM; | |||
568 | *Subtype = INTEL_PENTIUM_MMX; | |||
569 | break; | |||
570 | default: | |||
571 | *Type = INTEL_PENTIUM; | |||
572 | break; | |||
573 | } | |||
574 | break; | |||
575 | case 6: | |||
576 | switch (Model) { | |||
577 | case 0x01: // Pentium Pro processor | |||
578 | *Type = INTEL_PENTIUM_PRO; | |||
579 | break; | |||
580 | case 0x03: // Intel Pentium II OverDrive processor, Pentium II processor, | |||
581 | // model 03 | |||
582 | case 0x05: // Pentium II processor, model 05, Pentium II Xeon processor, | |||
583 | // model 05, and Intel Celeron processor, model 05 | |||
584 | case 0x06: // Celeron processor, model 06 | |||
585 | *Type = INTEL_PENTIUM_II; | |||
586 | break; | |||
587 | case 0x07: // Pentium III processor, model 07, and Pentium III Xeon | |||
588 | // processor, model 07 | |||
589 | case 0x08: // Pentium III processor, model 08, Pentium III Xeon processor, | |||
590 | // model 08, and Celeron processor, model 08 | |||
591 | case 0x0a: // Pentium III Xeon processor, model 0Ah | |||
592 | case 0x0b: // Pentium III processor, model 0Bh | |||
593 | *Type = INTEL_PENTIUM_III; | |||
594 | break; | |||
595 | case 0x09: // Intel Pentium M processor, Intel Celeron M processor model 09. | |||
596 | case 0x0d: // Intel Pentium M processor, Intel Celeron M processor, model | |||
597 | // 0Dh. All processors are manufactured using the 90 nm process. | |||
598 | case 0x15: // Intel EP80579 Integrated Processor and Intel EP80579 | |||
599 | // Integrated Processor with Intel QuickAssist Technology | |||
600 | *Type = INTEL_PENTIUM_M; | |||
601 | break; | |||
602 | case 0x0e: // Intel Core Duo processor, Intel Core Solo processor, model | |||
603 | // 0Eh. All processors are manufactured using the 65 nm process. | |||
604 | *Type = INTEL_CORE_DUO; | |||
605 | break; // yonah | |||
606 | case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile | |||
607 | // processor, Intel Core 2 Quad processor, Intel Core 2 Quad | |||
608 | // mobile processor, Intel Core 2 Extreme processor, Intel | |||
609 | // Pentium Dual-Core processor, Intel Xeon processor, model | |||
610 | // 0Fh. All processors are manufactured using the 65 nm process. | |||
611 | case 0x16: // Intel Celeron processor model 16h. All processors are | |||
612 | // manufactured using the 65 nm process | |||
613 | *Type = INTEL_CORE2; // "core2" | |||
614 | *Subtype = INTEL_CORE2_65; | |||
615 | break; | |||
616 | case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model | |||
617 | // 17h. All processors are manufactured using the 45 nm process. | |||
618 | // | |||
619 | // 45nm: Penryn , Wolfdale, Yorkfield (XE) | |||
620 | case 0x1d: // Intel Xeon processor MP. All processors are manufactured using | |||
621 | // the 45 nm process. | |||
622 | *Type = INTEL_CORE2; // "penryn" | |||
623 | *Subtype = INTEL_CORE2_45; | |||
624 | break; | |||
625 | case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All | |||
626 | // processors are manufactured using the 45 nm process. | |||
627 | case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. | |||
628 | // As found in a Summer 2010 model iMac. | |||
629 | case 0x1f: | |||
630 | case 0x2e: // Nehalem EX | |||
631 | *Type = INTEL_COREI7; // "nehalem" | |||
632 | *Subtype = INTEL_COREI7_NEHALEM; | |||
633 | break; | |||
634 | case 0x25: // Intel Core i7, laptop version. | |||
635 | case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All | |||
636 | // processors are manufactured using the 32 nm process. | |||
637 | case 0x2f: // Westmere EX | |||
638 | *Type = INTEL_COREI7; // "westmere" | |||
639 | *Subtype = INTEL_COREI7_WESTMERE; | |||
640 | break; | |||
641 | case 0x2a: // Intel Core i7 processor. All processors are manufactured | |||
642 | // using the 32 nm process. | |||
643 | case 0x2d: | |||
644 | *Type = INTEL_COREI7; //"sandybridge" | |||
645 | *Subtype = INTEL_COREI7_SANDYBRIDGE; | |||
646 | break; | |||
647 | case 0x3a: | |||
648 | case 0x3e: // Ivy Bridge EP | |||
649 | *Type = INTEL_COREI7; // "ivybridge" | |||
650 | *Subtype = INTEL_COREI7_IVYBRIDGE; | |||
651 | break; | |||
652 | ||||
653 | // Haswell: | |||
654 | case 0x3c: | |||
655 | case 0x3f: | |||
656 | case 0x45: | |||
657 | case 0x46: | |||
658 | *Type = INTEL_COREI7; // "haswell" | |||
659 | *Subtype = INTEL_COREI7_HASWELL; | |||
660 | break; | |||
661 | ||||
662 | // Broadwell: | |||
663 | case 0x3d: | |||
664 | case 0x47: | |||
665 | case 0x4f: | |||
666 | case 0x56: | |||
667 | *Type = INTEL_COREI7; // "broadwell" | |||
668 | *Subtype = INTEL_COREI7_BROADWELL; | |||
669 | break; | |||
670 | ||||
671 | // Skylake: | |||
672 | case 0x4e: // Skylake mobile | |||
673 | case 0x5e: // Skylake desktop | |||
674 | case 0x8e: // Kaby Lake mobile | |||
675 | case 0x9e: // Kaby Lake desktop | |||
676 | *Type = INTEL_COREI7; // "skylake" | |||
677 | *Subtype = INTEL_COREI7_SKYLAKE; | |||
678 | break; | |||
679 | ||||
680 | // Skylake Xeon: | |||
681 | case 0x55: | |||
682 | *Type = INTEL_COREI7; | |||
683 | // Check that we really have AVX512 | |||
684 | if (Features & (1 << FEATURE_AVX512)) { | |||
685 | *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512" | |||
686 | } else { | |||
687 | *Subtype = INTEL_COREI7_SKYLAKE; // "skylake" | |||
688 | } | |||
689 | break; | |||
690 | ||||
691 | case 0x1c: // Most 45 nm Intel Atom processors | |||
692 | case 0x26: // 45 nm Atom Lincroft | |||
693 | case 0x27: // 32 nm Atom Medfield | |||
694 | case 0x35: // 32 nm Atom Midview | |||
695 | case 0x36: // 32 nm Atom Midview | |||
696 | *Type = INTEL_ATOM; | |||
697 | *Subtype = INTEL_ATOM_BONNELL; | |||
698 | break; // "bonnell" | |||
699 | ||||
700 | // Atom Silvermont codes from the Intel software optimization guide. | |||
701 | case 0x37: | |||
702 | case 0x4a: | |||
703 | case 0x4d: | |||
704 | case 0x5a: | |||
705 | case 0x5d: | |||
706 | case 0x4c: // really airmont | |||
707 | *Type = INTEL_ATOM; | |||
708 | *Subtype = INTEL_ATOM_SILVERMONT; | |||
709 | break; // "silvermont" | |||
710 | ||||
711 | case 0x57: | |||
712 | *Type = INTEL_XEONPHI; // knl | |||
713 | *Subtype = INTEL_KNIGHTS_LANDING; | |||
714 | break; | |||
715 | ||||
716 | default: // Unknown family 6 CPU, try to guess. | |||
717 | if (Features & (1 << FEATURE_AVX512)) { | |||
718 | *Type = INTEL_XEONPHI; // knl | |||
719 | *Subtype = INTEL_KNIGHTS_LANDING; | |||
720 | break; | |||
721 | } | |||
722 | if (Features & (1 << FEATURE_ADX)) { | |||
723 | *Type = INTEL_COREI7; | |||
724 | *Subtype = INTEL_COREI7_BROADWELL; | |||
725 | break; | |||
726 | } | |||
727 | if (Features & (1 << FEATURE_AVX2)) { | |||
728 | *Type = INTEL_COREI7; | |||
729 | *Subtype = INTEL_COREI7_HASWELL; | |||
730 | break; | |||
731 | } | |||
732 | if (Features & (1 << FEATURE_AVX)) { | |||
733 | *Type = INTEL_COREI7; | |||
734 | *Subtype = INTEL_COREI7_SANDYBRIDGE; | |||
735 | break; | |||
736 | } | |||
737 | if (Features & (1 << FEATURE_SSE4_2)) { | |||
738 | if (Features & (1 << FEATURE_MOVBE)) { | |||
739 | *Type = INTEL_ATOM; | |||
740 | *Subtype = INTEL_ATOM_SILVERMONT; | |||
741 | } else { | |||
742 | *Type = INTEL_COREI7; | |||
743 | *Subtype = INTEL_COREI7_NEHALEM; | |||
744 | } | |||
745 | break; | |||
746 | } | |||
747 | if (Features & (1 << FEATURE_SSE4_1)) { | |||
748 | *Type = INTEL_CORE2; // "penryn" | |||
749 | *Subtype = INTEL_CORE2_45; | |||
750 | break; | |||
751 | } | |||
752 | if (Features & (1 << FEATURE_SSSE3)) { | |||
753 | if (Features & (1 << FEATURE_MOVBE)) { | |||
754 | *Type = INTEL_ATOM; | |||
755 | *Subtype = INTEL_ATOM_BONNELL; // "bonnell" | |||
756 | } else { | |||
757 | *Type = INTEL_CORE2; // "core2" | |||
758 | *Subtype = INTEL_CORE2_65; | |||
759 | } | |||
760 | break; | |||
761 | } | |||
762 | if (Features & (1 << FEATURE_EM64T)) { | |||
763 | *Type = INTEL_X86_64; | |||
764 | break; // x86-64 | |||
765 | } | |||
766 | if (Features & (1 << FEATURE_SSE2)) { | |||
767 | *Type = INTEL_PENTIUM_M; | |||
768 | break; | |||
769 | } | |||
770 | if (Features & (1 << FEATURE_SSE)) { | |||
771 | *Type = INTEL_PENTIUM_III; | |||
772 | break; | |||
773 | } | |||
774 | if (Features & (1 << FEATURE_MMX)) { | |||
775 | *Type = INTEL_PENTIUM_II; | |||
776 | break; | |||
777 | } | |||
778 | *Type = INTEL_PENTIUM_PRO; | |||
779 | break; | |||
780 | } | |||
781 | break; | |||
782 | case 15: { | |||
783 | switch (Model) { | |||
784 | case 0: // Pentium 4 processor, Intel Xeon processor. All processors are | |||
785 | // model 00h and manufactured using the 0.18 micron process. | |||
786 | case 1: // Pentium 4 processor, Intel Xeon processor, Intel Xeon | |||
787 | // processor MP, and Intel Celeron processor. All processors are | |||
788 | // model 01h and manufactured using the 0.18 micron process. | |||
789 | case 2: // Pentium 4 processor, Mobile Intel Pentium 4 processor - M, | |||
790 | // Intel Xeon processor, Intel Xeon processor MP, Intel Celeron | |||
791 | // processor, and Mobile Intel Celeron processor. All processors | |||
792 | // are model 02h and manufactured using the 0.13 micron process. | |||
793 | *Type = | |||
794 | ((Features & (1 << FEATURE_EM64T)) ? INTEL_X86_64 : INTEL_PENTIUM_IV); | |||
795 | break; | |||
796 | ||||
797 | case 3: // Pentium 4 processor, Intel Xeon processor, Intel Celeron D | |||
798 | // processor. All processors are model 03h and manufactured using | |||
799 | // the 90 nm process. | |||
800 | case 4: // Pentium 4 processor, Pentium 4 processor Extreme Edition, | |||
801 | // Pentium D processor, Intel Xeon processor, Intel Xeon | |||
802 | // processor MP, Intel Celeron D processor. All processors are | |||
803 | // model 04h and manufactured using the 90 nm process. | |||
804 | case 6: // Pentium 4 processor, Pentium D processor, Pentium processor | |||
805 | // Extreme Edition, Intel Xeon processor, Intel Xeon processor | |||
806 | // MP, Intel Celeron D processor. All processors are model 06h | |||
807 | // and manufactured using the 65 nm process. | |||
808 | *Type = | |||
809 | ((Features & (1 << FEATURE_EM64T)) ? INTEL_NOCONA : INTEL_PRESCOTT); | |||
810 | break; | |||
811 | ||||
812 | default: | |||
813 | *Type = | |||
814 | ((Features & (1 << FEATURE_EM64T)) ? INTEL_X86_64 : INTEL_PENTIUM_IV); | |||
815 | break; | |||
816 | } | |||
817 | break; | |||
818 | } | |||
819 | default: | |||
820 | break; /*"generic"*/ | |||
821 | } | |||
822 | } | |||
823 | ||||
824 | static void getAMDProcessorTypeAndSubtype(unsigned int Family, | |||
825 | unsigned int Model, | |||
826 | unsigned int Features, | |||
827 | unsigned *Type, | |||
828 | unsigned *Subtype) { | |||
829 | // FIXME: this poorly matches the generated SubtargetFeatureKV table. There | |||
830 | // appears to be no way to generate the wide variety of AMD-specific targets | |||
831 | // from the information returned from CPUID. | |||
832 | switch (Family) { | |||
833 | case 4: | |||
834 | *Type = AMD_i486; | |||
835 | break; | |||
836 | case 5: | |||
837 | *Type = AMDPENTIUM; | |||
838 | switch (Model) { | |||
839 | case 6: | |||
840 | case 7: | |||
841 | *Subtype = AMDPENTIUM_K6; | |||
842 | break; // "k6" | |||
843 | case 8: | |||
844 | *Subtype = AMDPENTIUM_K62; | |||
845 | break; // "k6-2" | |||
846 | case 9: | |||
847 | case 13: | |||
848 | *Subtype = AMDPENTIUM_K63; | |||
849 | break; // "k6-3" | |||
850 | case 10: | |||
851 | *Subtype = AMDPENTIUM_GEODE; | |||
852 | break; // "geode" | |||
853 | } | |||
854 | break; | |||
855 | case 6: | |||
856 | *Type = AMDATHLON; | |||
857 | switch (Model) { | |||
858 | case 4: | |||
859 | *Subtype = AMDATHLON_TBIRD; | |||
860 | break; // "athlon-tbird" | |||
861 | case 6: | |||
862 | case 7: | |||
863 | case 8: | |||
864 | *Subtype = AMDATHLON_MP; | |||
865 | break; // "athlon-mp" | |||
866 | case 10: | |||
867 | *Subtype = AMDATHLON_XP; | |||
868 | break; // "athlon-xp" | |||
869 | } | |||
870 | break; | |||
871 | case 15: | |||
872 | *Type = AMDATHLON; | |||
873 | if (Features & (1 << FEATURE_SSE3)) { | |||
874 | *Subtype = AMDATHLON_K8SSE3; | |||
875 | break; // "k8-sse3" | |||
876 | } | |||
877 | switch (Model) { | |||
878 | case 1: | |||
879 | *Subtype = AMDATHLON_OPTERON; | |||
880 | break; // "opteron" | |||
881 | case 5: | |||
882 | *Subtype = AMDATHLON_FX; | |||
883 | break; // "athlon-fx"; also opteron | |||
884 | default: | |||
885 | *Subtype = AMDATHLON_64; | |||
886 | break; // "athlon64" | |||
887 | } | |||
888 | break; | |||
889 | case 16: | |||
890 | *Type = AMDFAM10H; // "amdfam10" | |||
891 | switch (Model) { | |||
892 | case 2: | |||
893 | *Subtype = AMDFAM10H_BARCELONA; | |||
894 | break; | |||
895 | case 4: | |||
896 | *Subtype = AMDFAM10H_SHANGHAI; | |||
897 | break; | |||
898 | case 8: | |||
899 | *Subtype = AMDFAM10H_ISTANBUL; | |||
900 | break; | |||
901 | } | |||
902 | break; | |||
903 | case 20: | |||
904 | *Type = AMDFAM14H; | |||
905 | *Subtype = AMD_BTVER1; | |||
906 | break; // "btver1"; | |||
907 | case 21: | |||
908 | *Type = AMDFAM15H; | |||
909 | if (!(Features & | |||
910 | (1 << FEATURE_AVX))) { // If no AVX support, provide a sane fallback. | |||
911 | *Subtype = AMD_BTVER1; | |||
912 | break; // "btver1" | |||
913 | } | |||
914 | if (Model >= 0x50 && Model <= 0x6f) { | |||
915 | *Subtype = AMDFAM15H_BDVER4; | |||
916 | break; // "bdver4"; 50h-6Fh: Excavator | |||
917 | } | |||
918 | if (Model >= 0x30 && Model <= 0x3f) { | |||
919 | *Subtype = AMDFAM15H_BDVER3; | |||
920 | break; // "bdver3"; 30h-3Fh: Steamroller | |||
921 | } | |||
922 | if (Model >= 0x10 && Model <= 0x1f) { | |||
923 | *Subtype = AMDFAM15H_BDVER2; | |||
924 | break; // "bdver2"; 10h-1Fh: Piledriver | |||
925 | } | |||
926 | if (Model <= 0x0f) { | |||
927 | *Subtype = AMDFAM15H_BDVER1; | |||
928 | break; // "bdver1"; 00h-0Fh: Bulldozer | |||
929 | } | |||
930 | break; | |||
931 | case 22: | |||
932 | *Type = AMDFAM16H; | |||
933 | if (!(Features & | |||
934 | (1 << FEATURE_AVX))) { // If no AVX support provide a sane fallback. | |||
935 | *Subtype = AMD_BTVER1; | |||
936 | break; // "btver1"; | |||
937 | } | |||
938 | *Subtype = AMD_BTVER2; | |||
939 | break; // "btver2" | |||
940 | case 23: | |||
941 | *Type = AMDFAM17H; | |||
942 | if (Features & (1 << FEATURE_ADX)) { | |||
943 | *Subtype = AMDFAM17H_ZNVER1; | |||
944 | break; // "znver1" | |||
945 | } | |||
946 | *Subtype = AMD_BTVER1; | |||
947 | break; | |||
948 | default: | |||
949 | break; // "generic" | |||
950 | } | |||
951 | } | |||
952 | ||||
953 | static unsigned getAvailableFeatures(unsigned int ECX, unsigned int EDX, | |||
954 | unsigned MaxLeaf) { | |||
955 | unsigned Features = 0; | |||
956 | unsigned int EAX, EBX; | |||
957 | Features |= (((EDX >> 23) & 1) << FEATURE_MMX); | |||
958 | Features |= (((EDX >> 25) & 1) << FEATURE_SSE); | |||
959 | Features |= (((EDX >> 26) & 1) << FEATURE_SSE2); | |||
960 | Features |= (((ECX >> 0) & 1) << FEATURE_SSE3); | |||
961 | Features |= (((ECX >> 9) & 1) << FEATURE_SSSE3); | |||
962 | Features |= (((ECX >> 19) & 1) << FEATURE_SSE4_1); | |||
963 | Features |= (((ECX >> 20) & 1) << FEATURE_SSE4_2); | |||
964 | Features |= (((ECX >> 22) & 1) << FEATURE_MOVBE); | |||
965 | ||||
966 | // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV | |||
967 | // indicates that the AVX registers will be saved and restored on context | |||
968 | // switch, then we have full AVX support. | |||
969 | const unsigned AVXBits = (1 << 27) | (1 << 28); | |||
970 | bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && | |||
971 | ((EAX & 0x6) == 0x6); | |||
972 | bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); | |||
973 | bool HasLeaf7 = | |||
974 | MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); | |||
975 | bool HasADX = HasLeaf7 && ((EBX >> 19) & 1); | |||
976 | bool HasAVX2 = HasAVX && HasLeaf7 && (EBX & 0x20); | |||
977 | bool HasAVX512 = HasLeaf7 && HasAVX512Save && ((EBX >> 16) & 1); | |||
978 | Features |= (HasAVX << FEATURE_AVX); | |||
979 | Features |= (HasAVX2 << FEATURE_AVX2); | |||
980 | Features |= (HasAVX512 << FEATURE_AVX512); | |||
981 | Features |= (HasAVX512Save << FEATURE_AVX512SAVE); | |||
982 | Features |= (HasADX << FEATURE_ADX); | |||
983 | ||||
984 | getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); | |||
985 | Features |= (((EDX >> 29) & 0x1) << FEATURE_EM64T); | |||
986 | return Features; | |||
987 | } | |||
988 | ||||
989 | StringRef sys::getHostCPUName() { | |||
990 | unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; | |||
991 | unsigned MaxLeaf, Vendor; | |||
992 | ||||
993 | #if defined(__GNUC__4) || defined(__clang__1) | |||
994 | //FIXME: include cpuid.h from clang or copy __get_cpuid_max here | |||
995 | // and simplify it to not invoke __cpuid (like cpu_model.c in | |||
996 | // compiler-rt/lib/builtins/cpu_model.c? | |||
997 | // Opting for the second option. | |||
998 | if(!isCpuIdSupported()) | |||
| ||||
999 | return "generic"; | |||
1000 | #endif | |||
1001 | if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX)) | |||
1002 | return "generic"; | |||
1003 | if (getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX)) | |||
1004 | return "generic"; | |||
1005 | ||||
1006 | unsigned Brand_id = EBX & 0xff; | |||
1007 | unsigned Family = 0, Model = 0; | |||
1008 | unsigned Features = 0; | |||
1009 | detectX86FamilyModel(EAX, &Family, &Model); | |||
1010 | Features = getAvailableFeatures(ECX, EDX, MaxLeaf); | |||
1011 | ||||
1012 | unsigned Type; | |||
1013 | unsigned Subtype; | |||
1014 | ||||
1015 | if (Vendor == SIG_INTEL) { | |||
1016 | getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features, &Type, | |||
1017 | &Subtype); | |||
1018 | switch (Type) { | |||
1019 | case INTEL_i386: | |||
1020 | return "i386"; | |||
1021 | case INTEL_i486: | |||
1022 | return "i486"; | |||
1023 | case INTEL_PENTIUM: | |||
1024 | if (Subtype == INTEL_PENTIUM_MMX) | |||
1025 | return "pentium-mmx"; | |||
1026 | return "pentium"; | |||
1027 | case INTEL_PENTIUM_PRO: | |||
1028 | return "pentiumpro"; | |||
1029 | case INTEL_PENTIUM_II: | |||
1030 | return "pentium2"; | |||
1031 | case INTEL_PENTIUM_III: | |||
1032 | return "pentium3"; | |||
1033 | case INTEL_PENTIUM_IV: | |||
1034 | return "pentium4"; | |||
1035 | case INTEL_PENTIUM_M: | |||
1036 | return "pentium-m"; | |||
1037 | case INTEL_CORE_DUO: | |||
1038 | return "yonah"; | |||
1039 | case INTEL_CORE2: | |||
1040 | switch (Subtype) { | |||
1041 | case INTEL_CORE2_65: | |||
1042 | return "core2"; | |||
1043 | case INTEL_CORE2_45: | |||
1044 | return "penryn"; | |||
1045 | default: | |||
1046 | return "core2"; | |||
1047 | } | |||
1048 | case INTEL_COREI7: | |||
1049 | switch (Subtype) { | |||
1050 | case INTEL_COREI7_NEHALEM: | |||
1051 | return "nehalem"; | |||
1052 | case INTEL_COREI7_WESTMERE: | |||
1053 | return "westmere"; | |||
1054 | case INTEL_COREI7_SANDYBRIDGE: | |||
1055 | return "sandybridge"; | |||
1056 | case INTEL_COREI7_IVYBRIDGE: | |||
1057 | return "ivybridge"; | |||
1058 | case INTEL_COREI7_HASWELL: | |||
1059 | return "haswell"; | |||
1060 | case INTEL_COREI7_BROADWELL: | |||
1061 | return "broadwell"; | |||
1062 | case INTEL_COREI7_SKYLAKE: | |||
1063 | return "skylake"; | |||
1064 | case INTEL_COREI7_SKYLAKE_AVX512: | |||
1065 | return "skylake-avx512"; | |||
1066 | default: | |||
1067 | return "corei7"; | |||
1068 | } | |||
1069 | case INTEL_ATOM: | |||
1070 | switch (Subtype) { | |||
1071 | case INTEL_ATOM_BONNELL: | |||
1072 | return "bonnell"; | |||
1073 | case INTEL_ATOM_SILVERMONT: | |||
1074 | return "silvermont"; | |||
1075 | default: | |||
1076 | return "atom"; | |||
1077 | } | |||
1078 | case INTEL_XEONPHI: | |||
1079 | return "knl"; /*update for more variants added*/ | |||
1080 | case INTEL_X86_64: | |||
1081 | return "x86-64"; | |||
1082 | case INTEL_NOCONA: | |||
1083 | return "nocona"; | |||
1084 | case INTEL_PRESCOTT: | |||
1085 | return "prescott"; | |||
1086 | default: | |||
1087 | return "generic"; | |||
1088 | } | |||
1089 | } else if (Vendor == SIG_AMD) { | |||
1090 | getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type, &Subtype); | |||
1091 | switch (Type) { | |||
1092 | case AMD_i486: | |||
1093 | return "i486"; | |||
1094 | case AMDPENTIUM: | |||
1095 | switch (Subtype) { | |||
1096 | case AMDPENTIUM_K6: | |||
1097 | return "k6"; | |||
1098 | case AMDPENTIUM_K62: | |||
1099 | return "k6-2"; | |||
1100 | case AMDPENTIUM_K63: | |||
1101 | return "k6-3"; | |||
1102 | case AMDPENTIUM_GEODE: | |||
1103 | return "geode"; | |||
1104 | default: | |||
1105 | return "pentium"; | |||
1106 | } | |||
1107 | case AMDATHLON: | |||
1108 | switch (Subtype) { | |||
1109 | case AMDATHLON_TBIRD: | |||
1110 | return "athlon-tbird"; | |||
1111 | case AMDATHLON_MP: | |||
1112 | return "athlon-mp"; | |||
1113 | case AMDATHLON_XP: | |||
1114 | return "athlon-xp"; | |||
1115 | case AMDATHLON_K8SSE3: | |||
1116 | return "k8-sse3"; | |||
1117 | case AMDATHLON_OPTERON: | |||
1118 | return "opteron"; | |||
1119 | case AMDATHLON_FX: | |||
1120 | return "athlon-fx"; | |||
1121 | case AMDATHLON_64: | |||
1122 | return "athlon64"; | |||
1123 | default: | |||
1124 | return "athlon"; | |||
1125 | } | |||
1126 | case AMDFAM10H: | |||
1127 | if(Subtype == AMDFAM10H_BARCELONA) | |||
| ||||
1128 | return "barcelona"; | |||
1129 | return "amdfam10"; | |||
1130 | case AMDFAM14H: | |||
1131 | return "btver1"; | |||
1132 | case AMDFAM15H: | |||
1133 | switch (Subtype) { | |||
1134 | case AMDFAM15H_BDVER1: | |||
1135 | return "bdver1"; | |||
1136 | case AMDFAM15H_BDVER2: | |||
1137 | return "bdver2"; | |||
1138 | case AMDFAM15H_BDVER3: | |||
1139 | return "bdver3"; | |||
1140 | case AMDFAM15H_BDVER4: | |||
1141 | return "bdver4"; | |||
1142 | case AMD_BTVER1: | |||
1143 | return "btver1"; | |||
1144 | default: | |||
1145 | return "amdfam15"; | |||
1146 | } | |||
1147 | case AMDFAM16H: | |||
1148 | switch (Subtype) { | |||
1149 | case AMD_BTVER1: | |||
1150 | return "btver1"; | |||
1151 | case AMD_BTVER2: | |||
1152 | return "btver2"; | |||
1153 | default: | |||
1154 | return "amdfam16"; | |||
1155 | } | |||
1156 | case AMDFAM17H: | |||
1157 | switch (Subtype) { | |||
1158 | case AMD_BTVER1: | |||
1159 | return "btver1"; | |||
1160 | case AMDFAM17H_ZNVER1: | |||
1161 | return "znver1"; | |||
1162 | default: | |||
1163 | return "amdfam17"; | |||
1164 | } | |||
1165 | default: | |||
1166 | return "generic"; | |||
1167 | } | |||
1168 | } | |||
1169 | return "generic"; | |||
1170 | } | |||
1171 | ||||
1172 | #elif defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__)) | |||
1173 | StringRef sys::getHostCPUName() { | |||
1174 | host_basic_info_data_t hostInfo; | |||
1175 | mach_msg_type_number_t infoCount; | |||
1176 | ||||
1177 | infoCount = HOST_BASIC_INFO_COUNT; | |||
1178 | host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&hostInfo, | |||
1179 | &infoCount); | |||
1180 | ||||
1181 | if (hostInfo.cpu_type != CPU_TYPE_POWERPC) | |||
1182 | return "generic"; | |||
1183 | ||||
1184 | switch (hostInfo.cpu_subtype) { | |||
1185 | case CPU_SUBTYPE_POWERPC_601: | |||
1186 | return "601"; | |||
1187 | case CPU_SUBTYPE_POWERPC_602: | |||
1188 | return "602"; | |||
1189 | case CPU_SUBTYPE_POWERPC_603: | |||
1190 | return "603"; | |||
1191 | case CPU_SUBTYPE_POWERPC_603e: | |||
1192 | return "603e"; | |||
1193 | case CPU_SUBTYPE_POWERPC_603ev: | |||
1194 | return "603ev"; | |||
1195 | case CPU_SUBTYPE_POWERPC_604: | |||
1196 | return "604"; | |||
1197 | case CPU_SUBTYPE_POWERPC_604e: | |||
1198 | return "604e"; | |||
1199 | case CPU_SUBTYPE_POWERPC_620: | |||
1200 | return "620"; | |||
1201 | case CPU_SUBTYPE_POWERPC_750: | |||
1202 | return "750"; | |||
1203 | case CPU_SUBTYPE_POWERPC_7400: | |||
1204 | return "7400"; | |||
1205 | case CPU_SUBTYPE_POWERPC_7450: | |||
1206 | return "7450"; | |||
1207 | case CPU_SUBTYPE_POWERPC_970: | |||
1208 | return "970"; | |||
1209 | default:; | |||
1210 | } | |||
1211 | ||||
1212 | return "generic"; | |||
1213 | } | |||
1214 | #elif defined(__linux__1) && (defined(__ppc__) || defined(__powerpc__)) | |||
1215 | StringRef sys::getHostCPUName() { | |||
1216 | std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); | |||
1217 | const StringRef& Content = P ? P->getBuffer() : ""; | |||
1218 | return detail::getHostCPUNameForPowerPC(Content); | |||
1219 | } | |||
1220 | #elif defined(__linux__1) && (defined(__arm__) || defined(__aarch64__)) | |||
1221 | StringRef sys::getHostCPUName() { | |||
1222 | std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); | |||
1223 | const StringRef& Content = P ? P->getBuffer() : ""; | |||
1224 | return detail::getHostCPUNameForARM(Content); | |||
1225 | } | |||
1226 | #elif defined(__linux__1) && defined(__s390x__) | |||
1227 | StringRef sys::getHostCPUName() { | |||
1228 | std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); | |||
1229 | const StringRef& Content = P ? P->getBuffer() : ""; | |||
1230 | return detail::getHostCPUNameForS390x(Content); | |||
1231 | } | |||
1232 | #else | |||
1233 | StringRef sys::getHostCPUName() { return "generic"; } | |||
1234 | #endif | |||
1235 | ||||
1236 | #if defined(__linux__1) && defined(__x86_64__1) | |||
1237 | // On Linux, the number of physical cores can be computed from /proc/cpuinfo, | |||
1238 | // using the number of unique physical/core id pairs. The following | |||
1239 | // implementation reads the /proc/cpuinfo format on an x86_64 system. | |||
1240 | static int computeHostNumPhysicalCores() { | |||
1241 | // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be | |||
1242 | // mmapped because it appears to have 0 size. | |||
1243 | llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = | |||
1244 | llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); | |||
1245 | if (std::error_code EC = Text.getError()) { | |||
1246 | llvm::errs() << "Can't read " | |||
1247 | << "/proc/cpuinfo: " << EC.message() << "\n"; | |||
1248 | return -1; | |||
1249 | } | |||
1250 | SmallVector<StringRef, 8> strs; | |||
1251 | (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1, | |||
1252 | /*KeepEmpty=*/false); | |||
1253 | int CurPhysicalId = -1; | |||
1254 | int CurCoreId = -1; | |||
1255 | SmallSet<std::pair<int, int>, 32> UniqueItems; | |||
1256 | for (auto &Line : strs) { | |||
1257 | Line = Line.trim(); | |||
1258 | if (!Line.startswith("physical id") && !Line.startswith("core id")) | |||
1259 | continue; | |||
1260 | std::pair<StringRef, StringRef> Data = Line.split(':'); | |||
1261 | auto Name = Data.first.trim(); | |||
1262 | auto Val = Data.second.trim(); | |||
1263 | if (Name == "physical id") { | |||
1264 | assert(CurPhysicalId == -1 &&((CurPhysicalId == -1 && "Expected a core id before seeing another physical id" ) ? static_cast<void> (0) : __assert_fail ("CurPhysicalId == -1 && \"Expected a core id before seeing another physical id\"" , "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Support/Host.cpp" , 1265, __PRETTY_FUNCTION__)) | |||
1265 | "Expected a core id before seeing another physical id")((CurPhysicalId == -1 && "Expected a core id before seeing another physical id" ) ? static_cast<void> (0) : __assert_fail ("CurPhysicalId == -1 && \"Expected a core id before seeing another physical id\"" , "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Support/Host.cpp" , 1265, __PRETTY_FUNCTION__)); | |||
1266 | Val.getAsInteger(10, CurPhysicalId); | |||
1267 | } | |||
1268 | if (Name == "core id") { | |||
1269 | assert(CurCoreId == -1 &&((CurCoreId == -1 && "Expected a physical id before seeing another core id" ) ? static_cast<void> (0) : __assert_fail ("CurCoreId == -1 && \"Expected a physical id before seeing another core id\"" , "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Support/Host.cpp" , 1270, __PRETTY_FUNCTION__)) | |||
1270 | "Expected a physical id before seeing another core id")((CurCoreId == -1 && "Expected a physical id before seeing another core id" ) ? static_cast<void> (0) : __assert_fail ("CurCoreId == -1 && \"Expected a physical id before seeing another core id\"" , "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Support/Host.cpp" , 1270, __PRETTY_FUNCTION__)); | |||
1271 | Val.getAsInteger(10, CurCoreId); | |||
1272 | } | |||
1273 | if (CurPhysicalId != -1 && CurCoreId != -1) { | |||
1274 | UniqueItems.insert(std::make_pair(CurPhysicalId, CurCoreId)); | |||
1275 | CurPhysicalId = -1; | |||
1276 | CurCoreId = -1; | |||
1277 | } | |||
1278 | } | |||
1279 | return UniqueItems.size(); | |||
1280 | } | |||
1281 | #elif defined(__APPLE__) && defined(__x86_64__1) | |||
1282 | #include <sys/param.h> | |||
1283 | #include <sys/sysctl.h> | |||
1284 | ||||
1285 | // Gets the number of *physical cores* on the machine. | |||
1286 | static int computeHostNumPhysicalCores() { | |||
1287 | uint32_t count; | |||
1288 | size_t len = sizeof(count); | |||
1289 | sysctlbyname("hw.physicalcpu", &count, &len, NULL__null, 0); | |||
1290 | if (count < 1) { | |||
1291 | int nm[2]; | |||
1292 | nm[0] = CTL_HW; | |||
1293 | nm[1] = HW_AVAILCPU; | |||
1294 | sysctl(nm, 2, &count, &len, NULL__null, 0); | |||
1295 | if (count < 1) | |||
1296 | return -1; | |||
1297 | } | |||
1298 | return count; | |||
1299 | } | |||
1300 | #else | |||
1301 | // On other systems, return -1 to indicate unknown. | |||
1302 | static int computeHostNumPhysicalCores() { return -1; } | |||
1303 | #endif | |||
1304 | ||||
1305 | int sys::getHostNumPhysicalCores() { | |||
1306 | static int NumCores = computeHostNumPhysicalCores(); | |||
1307 | return NumCores; | |||
1308 | } | |||
1309 | ||||
1310 | #if defined(__i386__) || defined(_M_IX86) || \ | |||
1311 | defined(__x86_64__1) || defined(_M_X64) | |||
1312 | bool sys::getHostCPUFeatures(StringMap<bool> &Features) { | |||
1313 | unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; | |||
1314 | unsigned MaxLevel; | |||
1315 | union { | |||
1316 | unsigned u[3]; | |||
1317 | char c[12]; | |||
1318 | } text; | |||
1319 | ||||
1320 | if (getX86CpuIDAndInfo(0, &MaxLevel, text.u + 0, text.u + 2, text.u + 1) || | |||
1321 | MaxLevel < 1) | |||
1322 | return false; | |||
1323 | ||||
1324 | getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX); | |||
1325 | ||||
1326 | Features["cmov"] = (EDX >> 15) & 1; | |||
1327 | Features["mmx"] = (EDX >> 23) & 1; | |||
1328 | Features["sse"] = (EDX >> 25) & 1; | |||
1329 | Features["sse2"] = (EDX >> 26) & 1; | |||
1330 | Features["sse3"] = (ECX >> 0) & 1; | |||
1331 | Features["ssse3"] = (ECX >> 9) & 1; | |||
1332 | Features["sse4.1"] = (ECX >> 19) & 1; | |||
1333 | Features["sse4.2"] = (ECX >> 20) & 1; | |||
1334 | ||||
1335 | Features["pclmul"] = (ECX >> 1) & 1; | |||
1336 | Features["cx16"] = (ECX >> 13) & 1; | |||
1337 | Features["movbe"] = (ECX >> 22) & 1; | |||
1338 | Features["popcnt"] = (ECX >> 23) & 1; | |||
1339 | Features["aes"] = (ECX >> 25) & 1; | |||
1340 | Features["rdrnd"] = (ECX >> 30) & 1; | |||
1341 | ||||
1342 | // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV | |||
1343 | // indicates that the AVX registers will be saved and restored on context | |||
1344 | // switch, then we have full AVX support. | |||
1345 | bool HasAVXSave = ((ECX >> 27) & 1) && ((ECX >> 28) & 1) && | |||
1346 | !getX86XCR0(&EAX, &EDX) && ((EAX & 0x6) == 0x6); | |||
1347 | Features["avx"] = HasAVXSave; | |||
1348 | Features["fma"] = HasAVXSave && (ECX >> 12) & 1; | |||
1349 | Features["f16c"] = HasAVXSave && (ECX >> 29) & 1; | |||
1350 | ||||
1351 | // Only enable XSAVE if OS has enabled support for saving YMM state. | |||
1352 | Features["xsave"] = HasAVXSave && (ECX >> 26) & 1; | |||
1353 | ||||
1354 | // AVX512 requires additional context to be saved by the OS. | |||
1355 | bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0); | |||
1356 | ||||
1357 | unsigned MaxExtLevel; | |||
1358 | getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); | |||
1359 | ||||
1360 | bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && | |||
1361 | !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); | |||
1362 | Features["lzcnt"] = HasExtLeaf1 && ((ECX >> 5) & 1); | |||
1363 | Features["sse4a"] = HasExtLeaf1 && ((ECX >> 6) & 1); | |||
1364 | Features["prfchw"] = HasExtLeaf1 && ((ECX >> 8) & 1); | |||
1365 | Features["xop"] = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave; | |||
1366 | Features["lwp"] = HasExtLeaf1 && ((ECX >> 15) & 1); | |||
1367 | Features["fma4"] = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave; | |||
1368 | Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1); | |||
1369 | Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1); | |||
1370 | ||||
1371 | bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 && | |||
1372 | !getX86CpuIDAndInfoEx(0x80000008,0x0, &EAX, &EBX, &ECX, &EDX); | |||
1373 | Features["clzero"] = HasExtLeaf8 && ((EBX >> 0) & 1); | |||
1374 | ||||
1375 | bool HasLeaf7 = | |||
1376 | MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); | |||
1377 | ||||
1378 | // AVX2 is only supported if we have the OS save support from AVX. | |||
1379 | Features["avx2"] = HasAVXSave && HasLeaf7 && ((EBX >> 5) & 1); | |||
1380 | ||||
1381 | Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1); | |||
1382 | Features["sgx"] = HasLeaf7 && ((EBX >> 2) & 1); | |||
1383 | Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1); | |||
1384 | Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1); | |||
1385 | Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1); | |||
1386 | Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1); | |||
1387 | Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1); | |||
1388 | Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1); | |||
1389 | Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1); | |||
1390 | Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1); | |||
1391 | ||||
1392 | // AVX512 is only supported if the OS supports the context save for it. | |||
1393 | Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save; | |||
1394 | Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save; | |||
1395 | Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save; | |||
1396 | Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save; | |||
1397 | Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save; | |||
1398 | Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save; | |||
1399 | Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save; | |||
1400 | Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save; | |||
1401 | ||||
1402 | Features["prefetchwt1"] = HasLeaf7 && (ECX & 1); | |||
1403 | Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; | |||
1404 | Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save; | |||
1405 | // Enable protection keys | |||
1406 | Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1); | |||
1407 | ||||
1408 | bool HasLeafD = MaxLevel >= 0xd && | |||
1409 | !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); | |||
1410 | ||||
1411 | // Only enable XSAVE if OS has enabled support for saving YMM state. | |||
1412 | Features["xsaveopt"] = HasAVXSave && HasLeafD && ((EAX >> 0) & 1); | |||
1413 | Features["xsavec"] = HasAVXSave && HasLeafD && ((EAX >> 1) & 1); | |||
1414 | Features["xsaves"] = HasAVXSave && HasLeafD && ((EAX >> 3) & 1); | |||
1415 | ||||
1416 | return true; | |||
1417 | } | |||
1418 | #elif defined(__linux__1) && (defined(__arm__) || defined(__aarch64__)) | |||
1419 | bool sys::getHostCPUFeatures(StringMap<bool> &Features) { | |||
1420 | std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); | |||
1421 | if (!P) | |||
1422 | return false; | |||
1423 | ||||
1424 | SmallVector<StringRef, 32> Lines; | |||
1425 | P->getBuffer().split(Lines, "\n"); | |||
1426 | ||||
1427 | SmallVector<StringRef, 32> CPUFeatures; | |||
1428 | ||||
1429 | // Look for the CPU features. | |||
1430 | for (unsigned I = 0, E = Lines.size(); I != E; ++I) | |||
1431 | if (Lines[I].startswith("Features")) { | |||
1432 | Lines[I].split(CPUFeatures, ' '); | |||
1433 | break; | |||
1434 | } | |||
1435 | ||||
1436 | #if defined(__aarch64__) | |||
1437 | // Keep track of which crypto features we have seen | |||
1438 | enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 }; | |||
1439 | uint32_t crypto = 0; | |||
1440 | #endif | |||
1441 | ||||
1442 | for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { | |||
1443 | StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I]) | |||
1444 | #if defined(__aarch64__) | |||
1445 | .Case("asimd", "neon") | |||
1446 | .Case("fp", "fp-armv8") | |||
1447 | .Case("crc32", "crc") | |||
1448 | #else | |||
1449 | .Case("half", "fp16") | |||
1450 | .Case("neon", "neon") | |||
1451 | .Case("vfpv3", "vfp3") | |||
1452 | .Case("vfpv3d16", "d16") | |||
1453 | .Case("vfpv4", "vfp4") | |||
1454 | .Case("idiva", "hwdiv-arm") | |||
1455 | .Case("idivt", "hwdiv") | |||
1456 | #endif | |||
1457 | .Default(""); | |||
1458 | ||||
1459 | #if defined(__aarch64__) | |||
1460 | // We need to check crypto separately since we need all of the crypto | |||
1461 | // extensions to enable the subtarget feature | |||
1462 | if (CPUFeatures[I] == "aes") | |||
1463 | crypto |= CAP_AES; | |||
1464 | else if (CPUFeatures[I] == "pmull") | |||
1465 | crypto |= CAP_PMULL; | |||
1466 | else if (CPUFeatures[I] == "sha1") | |||
1467 | crypto |= CAP_SHA1; | |||
1468 | else if (CPUFeatures[I] == "sha2") | |||
1469 | crypto |= CAP_SHA2; | |||
1470 | #endif | |||
1471 | ||||
1472 | if (LLVMFeatureStr != "") | |||
1473 | Features[LLVMFeatureStr] = true; | |||
1474 | } | |||
1475 | ||||
1476 | #if defined(__aarch64__) | |||
1477 | // If we have all crypto bits we can add the feature | |||
1478 | if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2)) | |||
1479 | Features["crypto"] = true; | |||
1480 | #endif | |||
1481 | ||||
1482 | return true; | |||
1483 | } | |||
1484 | #else | |||
1485 | bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; } | |||
1486 | #endif | |||
1487 | ||||
1488 | std::string sys::getProcessTriple() { | |||
1489 | Triple PT(Triple::normalize(LLVM_HOST_TRIPLE"x86_64-pc-linux-gnu")); | |||
1490 | ||||
1491 | if (sizeof(void *) == 8 && PT.isArch32Bit()) | |||
1492 | PT = PT.get64BitArchVariant(); | |||
1493 | if (sizeof(void *) == 4 && PT.isArch64Bit()) | |||
1494 | PT = PT.get32BitArchVariant(); | |||
1495 | ||||
1496 | return PT.str(); | |||
1497 | } |