LLVM  4.0.0
Host.cpp
Go to the documentation of this file.
1 //===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the operating system Host concept.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Support/Host.h"
15 #include "llvm/ADT/SmallSet.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/ADT/Triple.h"
20 #include "llvm/Config/config.h"
21 #include "llvm/Support/Debug.h"
25 #include <assert.h>
26 #include <string.h>
27 
28 // Include the platform-specific parts of this class.
29 #ifdef LLVM_ON_UNIX
30 #include "Unix/Host.inc"
31 #endif
32 #ifdef LLVM_ON_WIN32
33 #include "Windows/Host.inc"
34 #endif
35 #ifdef _MSC_VER
36 #include <intrin.h>
37 #endif
38 #if defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__))
39 #include <mach/host_info.h>
40 #include <mach/mach.h>
41 #include <mach/mach_host.h>
42 #include <mach/machine.h>
43 #endif
44 
45 #define DEBUG_TYPE "host-detection"
46 
47 //===----------------------------------------------------------------------===//
48 //
49 // Implementations of the CPU detection routines
50 //
51 //===----------------------------------------------------------------------===//
52 
53 using namespace llvm;
54 
55 #if defined(__linux__)
56 static ssize_t LLVM_ATTRIBUTE_UNUSED readCpuInfo(void *Buf, size_t Size) {
57  // Note: We cannot mmap /proc/cpuinfo here and then process the resulting
58  // memory buffer because the 'file' has 0 size (it can be read from only
59  // as a stream).
60 
61  int FD;
62  std::error_code EC = sys::fs::openFileForRead("/proc/cpuinfo", FD);
63  if (EC) {
64  DEBUG(dbgs() << "Unable to open /proc/cpuinfo: " << EC.message() << "\n");
65  return -1;
66  }
67  int Ret = read(FD, Buf, Size);
68  int CloseStatus = close(FD);
69  if (CloseStatus)
70  return -1;
71  return Ret;
72 }
73 #endif
74 
75 #if defined(__i386__) || defined(_M_IX86) || \
76  defined(__x86_64__) || defined(_M_X64)
77 
78 enum VendorSignatures {
79  SIG_INTEL = 0x756e6547 /* Genu */,
80  SIG_AMD = 0x68747541 /* Auth */
81 };
82 
83 enum ProcessorVendors {
84  VENDOR_INTEL = 1,
85  VENDOR_AMD,
86  VENDOR_OTHER,
87  VENDOR_MAX
88 };
89 
90 enum ProcessorTypes {
91  INTEL_ATOM = 1,
92  INTEL_CORE2,
93  INTEL_COREI7,
94  AMDFAM10H,
95  AMDFAM15H,
96  INTEL_i386,
97  INTEL_i486,
98  INTEL_PENTIUM,
99  INTEL_PENTIUM_PRO,
100  INTEL_PENTIUM_II,
101  INTEL_PENTIUM_III,
102  INTEL_PENTIUM_IV,
103  INTEL_PENTIUM_M,
104  INTEL_CORE_DUO,
105  INTEL_XEONPHI,
106  INTEL_X86_64,
107  INTEL_NOCONA,
108  INTEL_PRESCOTT,
109  AMD_i486,
110  AMDPENTIUM,
111  AMDATHLON,
112  AMDFAM14H,
113  AMDFAM16H,
114  AMDFAM17H,
115  CPU_TYPE_MAX
116 };
117 
118 enum ProcessorSubtypes {
119  INTEL_COREI7_NEHALEM = 1,
120  INTEL_COREI7_WESTMERE,
121  INTEL_COREI7_SANDYBRIDGE,
122  AMDFAM10H_BARCELONA,
123  AMDFAM10H_SHANGHAI,
124  AMDFAM10H_ISTANBUL,
125  AMDFAM15H_BDVER1,
126  AMDFAM15H_BDVER2,
127  INTEL_PENTIUM_MMX,
128  INTEL_CORE2_65,
129  INTEL_CORE2_45,
130  INTEL_COREI7_IVYBRIDGE,
131  INTEL_COREI7_HASWELL,
132  INTEL_COREI7_BROADWELL,
133  INTEL_COREI7_SKYLAKE,
134  INTEL_COREI7_SKYLAKE_AVX512,
135  INTEL_ATOM_BONNELL,
136  INTEL_ATOM_SILVERMONT,
137  INTEL_KNIGHTS_LANDING,
138  AMDPENTIUM_K6,
139  AMDPENTIUM_K62,
140  AMDPENTIUM_K63,
141  AMDPENTIUM_GEODE,
142  AMDATHLON_TBIRD,
143  AMDATHLON_MP,
144  AMDATHLON_XP,
145  AMDATHLON_K8SSE3,
146  AMDATHLON_OPTERON,
147  AMDATHLON_FX,
148  AMDATHLON_64,
149  AMD_BTVER1,
150  AMD_BTVER2,
151  AMDFAM15H_BDVER3,
152  AMDFAM15H_BDVER4,
153  AMDFAM17H_ZNVER1,
154  CPU_SUBTYPE_MAX
155 };
156 
157 enum ProcessorFeatures {
158  FEATURE_CMOV = 0,
159  FEATURE_MMX,
160  FEATURE_POPCNT,
161  FEATURE_SSE,
162  FEATURE_SSE2,
163  FEATURE_SSE3,
164  FEATURE_SSSE3,
165  FEATURE_SSE4_1,
166  FEATURE_SSE4_2,
167  FEATURE_AVX,
168  FEATURE_AVX2,
169  FEATURE_AVX512,
170  FEATURE_AVX512SAVE,
171  FEATURE_MOVBE,
172  FEATURE_ADX,
173  FEATURE_EM64T
174 };
175 
176 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
177 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
178 // support. Consequently, for i386, the presence of CPUID is checked first
179 // via the corresponding eflags bit.
180 // Removal of cpuid.h header motivated by PR30384
181 // Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp
182 // or test-suite, but are used in external projects e.g. libstdcxx
183 static bool isCpuIdSupported() {
184 #if defined(__GNUC__) || defined(__clang__)
185 #if defined(__i386__)
186  int __cpuid_supported;
187  __asm__(" pushfl\n"
188  " popl %%eax\n"
189  " movl %%eax,%%ecx\n"
190  " xorl $0x00200000,%%eax\n"
191  " pushl %%eax\n"
192  " popfl\n"
193  " pushfl\n"
194  " popl %%eax\n"
195  " movl $0,%0\n"
196  " cmpl %%eax,%%ecx\n"
197  " je 1f\n"
198  " movl $1,%0\n"
199  "1:"
200  : "=r"(__cpuid_supported)
201  :
202  : "eax", "ecx");
203  if (!__cpuid_supported)
204  return false;
205 #endif
206  return true;
207 #endif
208  return true;
209 }
210 
211 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
212 /// the specified arguments. If we can't run cpuid on the host, return true.
213 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
214  unsigned *rECX, unsigned *rEDX) {
215 #if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
216 #if defined(__GNUC__) || defined(__clang__)
217 #if defined(__x86_64__)
218  // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
219  // FIXME: should we save this for Clang?
220  __asm__("movq\t%%rbx, %%rsi\n\t"
221  "cpuid\n\t"
222  "xchgq\t%%rbx, %%rsi\n\t"
223  : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
224  : "a"(value));
225 #elif defined(__i386__)
226  __asm__("movl\t%%ebx, %%esi\n\t"
227  "cpuid\n\t"
228  "xchgl\t%%ebx, %%esi\n\t"
229  : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
230  : "a"(value));
231 #else
232  assert(0 && "This method is defined only for x86.");
233 #endif
234 #elif defined(_MSC_VER)
235  // The MSVC intrinsic is portable across x86 and x64.
236  int registers[4];
237  __cpuid(registers, value);
238  *rEAX = registers[0];
239  *rEBX = registers[1];
240  *rECX = registers[2];
241  *rEDX = registers[3];
242 #endif
243  return false;
244 #else
245  return true;
246 #endif
247 }
248 
249 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
250 /// the 4 values in the specified arguments. If we can't run cpuid on the host,
251 /// return true.
252 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
253  unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
254  unsigned *rEDX) {
255 #if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
256 #if defined(__x86_64__) || defined(_M_X64)
257 #if defined(__GNUC__) || defined(__clang__)
258  // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
259  // FIXME: should we save this for Clang?
260  __asm__("movq\t%%rbx, %%rsi\n\t"
261  "cpuid\n\t"
262  "xchgq\t%%rbx, %%rsi\n\t"
263  : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
264  : "a"(value), "c"(subleaf));
265 #elif defined(_MSC_VER)
266  int registers[4];
267  __cpuidex(registers, value, subleaf);
268  *rEAX = registers[0];
269  *rEBX = registers[1];
270  *rECX = registers[2];
271  *rEDX = registers[3];
272 #endif
273 #elif defined(__i386__) || defined(_M_IX86)
274 #if defined(__GNUC__) || defined(__clang__)
275  __asm__("movl\t%%ebx, %%esi\n\t"
276  "cpuid\n\t"
277  "xchgl\t%%ebx, %%esi\n\t"
278  : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
279  : "a"(value), "c"(subleaf));
280 #elif defined(_MSC_VER)
281  __asm {
282  mov eax,value
283  mov ecx,subleaf
284  cpuid
285  mov esi,rEAX
286  mov dword ptr [esi],eax
287  mov esi,rEBX
288  mov dword ptr [esi],ebx
289  mov esi,rECX
290  mov dword ptr [esi],ecx
291  mov esi,rEDX
292  mov dword ptr [esi],edx
293  }
294 #endif
295 #else
296  assert(0 && "This method is defined only for x86.");
297 #endif
298  return false;
299 #else
300  return true;
301 #endif
302 }
303 
304 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
305 #if defined(__GNUC__) || defined(__clang__)
306  // Check xgetbv; this uses a .byte sequence instead of the instruction
307  // directly because older assemblers do not include support for xgetbv and
308  // there is no easy way to conditionally compile based on the assembler used.
309  __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
310  return false;
311 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
312  unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
313  *rEAX = Result;
314  *rEDX = Result >> 32;
315  return false;
316 #else
317  return true;
318 #endif
319 }
320 
321 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
322  unsigned *Model) {
323  *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
324  *Model = (EAX >> 4) & 0xf; // Bits 4 - 7
325  if (*Family == 6 || *Family == 0xf) {
326  if (*Family == 0xf)
327  // Examine extended family ID if family ID is F.
328  *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
329  // Examine extended model ID if family ID is 6 or F.
330  *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
331  }
332 }
333 
334 static void
335 getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model,
336  unsigned int Brand_id, unsigned int Features,
337  unsigned *Type, unsigned *Subtype) {
338  if (Brand_id != 0)
339  return;
340  switch (Family) {
341  case 3:
342  *Type = INTEL_i386;
343  break;
344  case 4:
345  switch (Model) {
346  case 0: // Intel486 DX processors
347  case 1: // Intel486 DX processors
348  case 2: // Intel486 SX processors
349  case 3: // Intel487 processors, IntelDX2 OverDrive processors,
350  // IntelDX2 processors
351  case 4: // Intel486 SL processor
352  case 5: // IntelSX2 processors
353  case 7: // Write-Back Enhanced IntelDX2 processors
354  case 8: // IntelDX4 OverDrive processors, IntelDX4 processors
355  default:
356  *Type = INTEL_i486;
357  break;
358  }
359  break;
360  case 5:
361  switch (Model) {
362  case 1: // Pentium OverDrive processor for Pentium processor (60, 66),
363  // Pentium processors (60, 66)
364  case 2: // Pentium OverDrive processor for Pentium processor (75, 90,
365  // 100, 120, 133), Pentium processors (75, 90, 100, 120, 133,
366  // 150, 166, 200)
367  case 3: // Pentium OverDrive processors for Intel486 processor-based
368  // systems
369  *Type = INTEL_PENTIUM;
370  break;
371  case 4: // Pentium OverDrive processor with MMX technology for Pentium
372  // processor (75, 90, 100, 120, 133), Pentium processor with
373  // MMX technology (166, 200)
374  *Type = INTEL_PENTIUM;
375  *Subtype = INTEL_PENTIUM_MMX;
376  break;
377  default:
378  *Type = INTEL_PENTIUM;
379  break;
380  }
381  break;
382  case 6:
383  switch (Model) {
384  case 0x01: // Pentium Pro processor
385  *Type = INTEL_PENTIUM_PRO;
386  break;
387  case 0x03: // Intel Pentium II OverDrive processor, Pentium II processor,
388  // model 03
389  case 0x05: // Pentium II processor, model 05, Pentium II Xeon processor,
390  // model 05, and Intel Celeron processor, model 05
391  case 0x06: // Celeron processor, model 06
392  *Type = INTEL_PENTIUM_II;
393  break;
394  case 0x07: // Pentium III processor, model 07, and Pentium III Xeon
395  // processor, model 07
396  case 0x08: // Pentium III processor, model 08, Pentium III Xeon processor,
397  // model 08, and Celeron processor, model 08
398  case 0x0a: // Pentium III Xeon processor, model 0Ah
399  case 0x0b: // Pentium III processor, model 0Bh
400  *Type = INTEL_PENTIUM_III;
401  break;
402  case 0x09: // Intel Pentium M processor, Intel Celeron M processor model 09.
403  case 0x0d: // Intel Pentium M processor, Intel Celeron M processor, model
404  // 0Dh. All processors are manufactured using the 90 nm process.
405  case 0x15: // Intel EP80579 Integrated Processor and Intel EP80579
406  // Integrated Processor with Intel QuickAssist Technology
407  *Type = INTEL_PENTIUM_M;
408  break;
409  case 0x0e: // Intel Core Duo processor, Intel Core Solo processor, model
410  // 0Eh. All processors are manufactured using the 65 nm process.
411  *Type = INTEL_CORE_DUO;
412  break; // yonah
413  case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
414  // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
415  // mobile processor, Intel Core 2 Extreme processor, Intel
416  // Pentium Dual-Core processor, Intel Xeon processor, model
417  // 0Fh. All processors are manufactured using the 65 nm process.
418  case 0x16: // Intel Celeron processor model 16h. All processors are
419  // manufactured using the 65 nm process
420  *Type = INTEL_CORE2; // "core2"
421  *Subtype = INTEL_CORE2_65;
422  break;
423  case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
424  // 17h. All processors are manufactured using the 45 nm process.
425  //
426  // 45nm: Penryn , Wolfdale, Yorkfield (XE)
427  case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
428  // the 45 nm process.
429  *Type = INTEL_CORE2; // "penryn"
430  *Subtype = INTEL_CORE2_45;
431  break;
432  case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
433  // processors are manufactured using the 45 nm process.
434  case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
435  // As found in a Summer 2010 model iMac.
436  case 0x1f:
437  case 0x2e: // Nehalem EX
438  *Type = INTEL_COREI7; // "nehalem"
439  *Subtype = INTEL_COREI7_NEHALEM;
440  break;
441  case 0x25: // Intel Core i7, laptop version.
442  case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
443  // processors are manufactured using the 32 nm process.
444  case 0x2f: // Westmere EX
445  *Type = INTEL_COREI7; // "westmere"
446  *Subtype = INTEL_COREI7_WESTMERE;
447  break;
448  case 0x2a: // Intel Core i7 processor. All processors are manufactured
449  // using the 32 nm process.
450  case 0x2d:
451  *Type = INTEL_COREI7; //"sandybridge"
452  *Subtype = INTEL_COREI7_SANDYBRIDGE;
453  break;
454  case 0x3a:
455  case 0x3e: // Ivy Bridge EP
456  *Type = INTEL_COREI7; // "ivybridge"
457  *Subtype = INTEL_COREI7_IVYBRIDGE;
458  break;
459 
460  // Haswell:
461  case 0x3c:
462  case 0x3f:
463  case 0x45:
464  case 0x46:
465  *Type = INTEL_COREI7; // "haswell"
466  *Subtype = INTEL_COREI7_HASWELL;
467  break;
468 
469  // Broadwell:
470  case 0x3d:
471  case 0x47:
472  case 0x4f:
473  case 0x56:
474  *Type = INTEL_COREI7; // "broadwell"
475  *Subtype = INTEL_COREI7_BROADWELL;
476  break;
477 
478  // Skylake:
479  case 0x4e: // Skylake mobile
480  case 0x5e: // Skylake desktop
481  case 0x8e: // Kaby Lake mobile
482  case 0x9e: // Kaby Lake desktop
483  *Type = INTEL_COREI7; // "skylake"
484  *Subtype = INTEL_COREI7_SKYLAKE;
485  break;
486 
487  // Skylake Xeon:
488  case 0x55:
489  *Type = INTEL_COREI7;
490  // Check that we really have AVX512
491  if (Features & (1 << FEATURE_AVX512)) {
492  *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
493  } else {
494  *Subtype = INTEL_COREI7_SKYLAKE; // "skylake"
495  }
496  break;
497 
498  case 0x1c: // Most 45 nm Intel Atom processors
499  case 0x26: // 45 nm Atom Lincroft
500  case 0x27: // 32 nm Atom Medfield
501  case 0x35: // 32 nm Atom Midview
502  case 0x36: // 32 nm Atom Midview
503  *Type = INTEL_ATOM;
504  *Subtype = INTEL_ATOM_BONNELL;
505  break; // "bonnell"
506 
507  // Atom Silvermont codes from the Intel software optimization guide.
508  case 0x37:
509  case 0x4a:
510  case 0x4d:
511  case 0x5a:
512  case 0x5d:
513  case 0x4c: // really airmont
514  *Type = INTEL_ATOM;
515  *Subtype = INTEL_ATOM_SILVERMONT;
516  break; // "silvermont"
517 
518  case 0x57:
519  *Type = INTEL_XEONPHI; // knl
520  *Subtype = INTEL_KNIGHTS_LANDING;
521  break;
522 
523  default: // Unknown family 6 CPU, try to guess.
524  if (Features & (1 << FEATURE_AVX512)) {
525  *Type = INTEL_XEONPHI; // knl
526  *Subtype = INTEL_KNIGHTS_LANDING;
527  break;
528  }
529  if (Features & (1 << FEATURE_ADX)) {
530  *Type = INTEL_COREI7;
531  *Subtype = INTEL_COREI7_BROADWELL;
532  break;
533  }
534  if (Features & (1 << FEATURE_AVX2)) {
535  *Type = INTEL_COREI7;
536  *Subtype = INTEL_COREI7_HASWELL;
537  break;
538  }
539  if (Features & (1 << FEATURE_AVX)) {
540  *Type = INTEL_COREI7;
541  *Subtype = INTEL_COREI7_SANDYBRIDGE;
542  break;
543  }
544  if (Features & (1 << FEATURE_SSE4_2)) {
545  if (Features & (1 << FEATURE_MOVBE)) {
546  *Type = INTEL_ATOM;
547  *Subtype = INTEL_ATOM_SILVERMONT;
548  } else {
549  *Type = INTEL_COREI7;
550  *Subtype = INTEL_COREI7_NEHALEM;
551  }
552  break;
553  }
554  if (Features & (1 << FEATURE_SSE4_1)) {
555  *Type = INTEL_CORE2; // "penryn"
556  *Subtype = INTEL_CORE2_45;
557  break;
558  }
559  if (Features & (1 << FEATURE_SSSE3)) {
560  if (Features & (1 << FEATURE_MOVBE)) {
561  *Type = INTEL_ATOM;
562  *Subtype = INTEL_ATOM_BONNELL; // "bonnell"
563  } else {
564  *Type = INTEL_CORE2; // "core2"
565  *Subtype = INTEL_CORE2_65;
566  }
567  break;
568  }
569  if (Features & (1 << FEATURE_EM64T)) {
570  *Type = INTEL_X86_64;
571  break; // x86-64
572  }
573  if (Features & (1 << FEATURE_SSE2)) {
574  *Type = INTEL_PENTIUM_M;
575  break;
576  }
577  if (Features & (1 << FEATURE_SSE)) {
578  *Type = INTEL_PENTIUM_III;
579  break;
580  }
581  if (Features & (1 << FEATURE_MMX)) {
582  *Type = INTEL_PENTIUM_II;
583  break;
584  }
585  *Type = INTEL_PENTIUM_PRO;
586  break;
587  }
588  break;
589  case 15: {
590  switch (Model) {
591  case 0: // Pentium 4 processor, Intel Xeon processor. All processors are
592  // model 00h and manufactured using the 0.18 micron process.
593  case 1: // Pentium 4 processor, Intel Xeon processor, Intel Xeon
594  // processor MP, and Intel Celeron processor. All processors are
595  // model 01h and manufactured using the 0.18 micron process.
596  case 2: // Pentium 4 processor, Mobile Intel Pentium 4 processor - M,
597  // Intel Xeon processor, Intel Xeon processor MP, Intel Celeron
598  // processor, and Mobile Intel Celeron processor. All processors
599  // are model 02h and manufactured using the 0.13 micron process.
600  *Type =
601  ((Features & (1 << FEATURE_EM64T)) ? INTEL_X86_64 : INTEL_PENTIUM_IV);
602  break;
603 
604  case 3: // Pentium 4 processor, Intel Xeon processor, Intel Celeron D
605  // processor. All processors are model 03h and manufactured using
606  // the 90 nm process.
607  case 4: // Pentium 4 processor, Pentium 4 processor Extreme Edition,
608  // Pentium D processor, Intel Xeon processor, Intel Xeon
609  // processor MP, Intel Celeron D processor. All processors are
610  // model 04h and manufactured using the 90 nm process.
611  case 6: // Pentium 4 processor, Pentium D processor, Pentium processor
612  // Extreme Edition, Intel Xeon processor, Intel Xeon processor
613  // MP, Intel Celeron D processor. All processors are model 06h
614  // and manufactured using the 65 nm process.
615  *Type =
616  ((Features & (1 << FEATURE_EM64T)) ? INTEL_NOCONA : INTEL_PRESCOTT);
617  break;
618 
619  default:
620  *Type =
621  ((Features & (1 << FEATURE_EM64T)) ? INTEL_X86_64 : INTEL_PENTIUM_IV);
622  break;
623  }
624  break;
625  }
626  default:
627  break; /*"generic"*/
628  }
629 }
630 
631 static void getAMDProcessorTypeAndSubtype(unsigned int Family,
632  unsigned int Model,
633  unsigned int Features,
634  unsigned *Type,
635  unsigned *Subtype) {
636  // FIXME: this poorly matches the generated SubtargetFeatureKV table. There
637  // appears to be no way to generate the wide variety of AMD-specific targets
638  // from the information returned from CPUID.
639  switch (Family) {
640  case 4:
641  *Type = AMD_i486;
642  break;
643  case 5:
644  *Type = AMDPENTIUM;
645  switch (Model) {
646  case 6:
647  case 7:
648  *Subtype = AMDPENTIUM_K6;
649  break; // "k6"
650  case 8:
651  *Subtype = AMDPENTIUM_K62;
652  break; // "k6-2"
653  case 9:
654  case 13:
655  *Subtype = AMDPENTIUM_K63;
656  break; // "k6-3"
657  case 10:
658  *Subtype = AMDPENTIUM_GEODE;
659  break; // "geode"
660  }
661  break;
662  case 6:
663  *Type = AMDATHLON;
664  switch (Model) {
665  case 4:
666  *Subtype = AMDATHLON_TBIRD;
667  break; // "athlon-tbird"
668  case 6:
669  case 7:
670  case 8:
671  *Subtype = AMDATHLON_MP;
672  break; // "athlon-mp"
673  case 10:
674  *Subtype = AMDATHLON_XP;
675  break; // "athlon-xp"
676  }
677  break;
678  case 15:
679  *Type = AMDATHLON;
680  if (Features & (1 << FEATURE_SSE3)) {
681  *Subtype = AMDATHLON_K8SSE3;
682  break; // "k8-sse3"
683  }
684  switch (Model) {
685  case 1:
686  *Subtype = AMDATHLON_OPTERON;
687  break; // "opteron"
688  case 5:
689  *Subtype = AMDATHLON_FX;
690  break; // "athlon-fx"; also opteron
691  default:
692  *Subtype = AMDATHLON_64;
693  break; // "athlon64"
694  }
695  break;
696  case 16:
697  *Type = AMDFAM10H; // "amdfam10"
698  switch (Model) {
699  case 2:
700  *Subtype = AMDFAM10H_BARCELONA;
701  break;
702  case 4:
703  *Subtype = AMDFAM10H_SHANGHAI;
704  break;
705  case 8:
706  *Subtype = AMDFAM10H_ISTANBUL;
707  break;
708  }
709  break;
710  case 20:
711  *Type = AMDFAM14H;
712  *Subtype = AMD_BTVER1;
713  break; // "btver1";
714  case 21:
715  *Type = AMDFAM15H;
716  if (!(Features &
717  (1 << FEATURE_AVX))) { // If no AVX support, provide a sane fallback.
718  *Subtype = AMD_BTVER1;
719  break; // "btver1"
720  }
721  if (Model >= 0x50 && Model <= 0x6f) {
722  *Subtype = AMDFAM15H_BDVER4;
723  break; // "bdver4"; 50h-6Fh: Excavator
724  }
725  if (Model >= 0x30 && Model <= 0x3f) {
726  *Subtype = AMDFAM15H_BDVER3;
727  break; // "bdver3"; 30h-3Fh: Steamroller
728  }
729  if (Model >= 0x10 && Model <= 0x1f) {
730  *Subtype = AMDFAM15H_BDVER2;
731  break; // "bdver2"; 10h-1Fh: Piledriver
732  }
733  if (Model <= 0x0f) {
734  *Subtype = AMDFAM15H_BDVER1;
735  break; // "bdver1"; 00h-0Fh: Bulldozer
736  }
737  break;
738  case 22:
739  *Type = AMDFAM16H;
740  if (!(Features &
741  (1 << FEATURE_AVX))) { // If no AVX support provide a sane fallback.
742  *Subtype = AMD_BTVER1;
743  break; // "btver1";
744  }
745  *Subtype = AMD_BTVER2;
746  break; // "btver2"
747  case 23:
748  *Type = AMDFAM17H;
749  if (Features & (1 << FEATURE_ADX)) {
750  *Subtype = AMDFAM17H_ZNVER1;
751  break; // "znver1"
752  }
753  *Subtype = AMD_BTVER1;
754  break;
755  default:
756  break; // "generic"
757  }
758 }
759 
760 static unsigned getAvailableFeatures(unsigned int ECX, unsigned int EDX,
761  unsigned MaxLeaf) {
762  unsigned Features = 0;
763  unsigned int EAX, EBX;
764  Features |= (((EDX >> 23) & 1) << FEATURE_MMX);
765  Features |= (((EDX >> 25) & 1) << FEATURE_SSE);
766  Features |= (((EDX >> 26) & 1) << FEATURE_SSE2);
767  Features |= (((ECX >> 0) & 1) << FEATURE_SSE3);
768  Features |= (((ECX >> 9) & 1) << FEATURE_SSSE3);
769  Features |= (((ECX >> 19) & 1) << FEATURE_SSE4_1);
770  Features |= (((ECX >> 20) & 1) << FEATURE_SSE4_2);
771  Features |= (((ECX >> 22) & 1) << FEATURE_MOVBE);
772 
773  // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
774  // indicates that the AVX registers will be saved and restored on context
775  // switch, then we have full AVX support.
776  const unsigned AVXBits = (1 << 27) | (1 << 28);
777  bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
778  ((EAX & 0x6) == 0x6);
779  bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
780  bool HasLeaf7 =
781  MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
782  bool HasADX = HasLeaf7 && ((EBX >> 19) & 1);
783  bool HasAVX2 = HasAVX && HasLeaf7 && (EBX & 0x20);
784  bool HasAVX512 = HasLeaf7 && HasAVX512Save && ((EBX >> 16) & 1);
785  Features |= (HasAVX << FEATURE_AVX);
786  Features |= (HasAVX2 << FEATURE_AVX2);
787  Features |= (HasAVX512 << FEATURE_AVX512);
788  Features |= (HasAVX512Save << FEATURE_AVX512SAVE);
789  Features |= (HasADX << FEATURE_ADX);
790 
791  getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
792  Features |= (((EDX >> 29) & 0x1) << FEATURE_EM64T);
793  return Features;
794 }
795 
797  unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
798  unsigned MaxLeaf, Vendor;
799 
800 #if defined(__GNUC__) || defined(__clang__)
801  //FIXME: include cpuid.h from clang or copy __get_cpuid_max here
802  // and simplify it to not invoke __cpuid (like cpu_model.c in
803  // compiler-rt/lib/builtins/cpu_model.c?
804  // Opting for the second option.
805  if(!isCpuIdSupported())
806  return "generic";
807 #endif
808  if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX))
809  return "generic";
810  if (getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX))
811  return "generic";
812 
813  unsigned Brand_id = EBX & 0xff;
814  unsigned Family = 0, Model = 0;
815  unsigned Features = 0;
816  detectX86FamilyModel(EAX, &Family, &Model);
817  Features = getAvailableFeatures(ECX, EDX, MaxLeaf);
818 
819  unsigned Type;
820  unsigned Subtype;
821 
822  if (Vendor == SIG_INTEL) {
823  getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features, &Type,
824  &Subtype);
825  switch (Type) {
826  case INTEL_i386:
827  return "i386";
828  case INTEL_i486:
829  return "i486";
830  case INTEL_PENTIUM:
831  if (Subtype == INTEL_PENTIUM_MMX)
832  return "pentium-mmx";
833  return "pentium";
834  case INTEL_PENTIUM_PRO:
835  return "pentiumpro";
836  case INTEL_PENTIUM_II:
837  return "pentium2";
838  case INTEL_PENTIUM_III:
839  return "pentium3";
840  case INTEL_PENTIUM_IV:
841  return "pentium4";
842  case INTEL_PENTIUM_M:
843  return "pentium-m";
844  case INTEL_CORE_DUO:
845  return "yonah";
846  case INTEL_CORE2:
847  switch (Subtype) {
848  case INTEL_CORE2_65:
849  return "core2";
850  case INTEL_CORE2_45:
851  return "penryn";
852  default:
853  return "core2";
854  }
855  case INTEL_COREI7:
856  switch (Subtype) {
857  case INTEL_COREI7_NEHALEM:
858  return "nehalem";
859  case INTEL_COREI7_WESTMERE:
860  return "westmere";
861  case INTEL_COREI7_SANDYBRIDGE:
862  return "sandybridge";
863  case INTEL_COREI7_IVYBRIDGE:
864  return "ivybridge";
865  case INTEL_COREI7_HASWELL:
866  return "haswell";
867  case INTEL_COREI7_BROADWELL:
868  return "broadwell";
869  case INTEL_COREI7_SKYLAKE:
870  return "skylake";
871  case INTEL_COREI7_SKYLAKE_AVX512:
872  return "skylake-avx512";
873  default:
874  return "corei7";
875  }
876  case INTEL_ATOM:
877  switch (Subtype) {
878  case INTEL_ATOM_BONNELL:
879  return "bonnell";
880  case INTEL_ATOM_SILVERMONT:
881  return "silvermont";
882  default:
883  return "atom";
884  }
885  case INTEL_XEONPHI:
886  return "knl"; /*update for more variants added*/
887  case INTEL_X86_64:
888  return "x86-64";
889  case INTEL_NOCONA:
890  return "nocona";
891  case INTEL_PRESCOTT:
892  return "prescott";
893  default:
894  return "generic";
895  }
896  } else if (Vendor == SIG_AMD) {
897  getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type, &Subtype);
898  switch (Type) {
899  case AMD_i486:
900  return "i486";
901  case AMDPENTIUM:
902  switch (Subtype) {
903  case AMDPENTIUM_K6:
904  return "k6";
905  case AMDPENTIUM_K62:
906  return "k6-2";
907  case AMDPENTIUM_K63:
908  return "k6-3";
909  case AMDPENTIUM_GEODE:
910  return "geode";
911  default:
912  return "pentium";
913  }
914  case AMDATHLON:
915  switch (Subtype) {
916  case AMDATHLON_TBIRD:
917  return "athlon-tbird";
918  case AMDATHLON_MP:
919  return "athlon-mp";
920  case AMDATHLON_XP:
921  return "athlon-xp";
922  case AMDATHLON_K8SSE3:
923  return "k8-sse3";
924  case AMDATHLON_OPTERON:
925  return "opteron";
926  case AMDATHLON_FX:
927  return "athlon-fx";
928  case AMDATHLON_64:
929  return "athlon64";
930  default:
931  return "athlon";
932  }
933  case AMDFAM10H:
934  if(Subtype == AMDFAM10H_BARCELONA)
935  return "barcelona";
936  return "amdfam10";
937  case AMDFAM14H:
938  return "btver1";
939  case AMDFAM15H:
940  switch (Subtype) {
941  case AMDFAM15H_BDVER1:
942  return "bdver1";
943  case AMDFAM15H_BDVER2:
944  return "bdver2";
945  case AMDFAM15H_BDVER3:
946  return "bdver3";
947  case AMDFAM15H_BDVER4:
948  return "bdver4";
949  case AMD_BTVER1:
950  return "btver1";
951  default:
952  return "amdfam15";
953  }
954  case AMDFAM16H:
955  switch (Subtype) {
956  case AMD_BTVER1:
957  return "btver1";
958  case AMD_BTVER2:
959  return "btver2";
960  default:
961  return "amdfam16";
962  }
963  case AMDFAM17H:
964  switch (Subtype) {
965  case AMD_BTVER1:
966  return "btver1";
967  case AMDFAM17H_ZNVER1:
968  return "znver1";
969  default:
970  return "amdfam17";
971  }
972  default:
973  return "generic";
974  }
975  }
976  return "generic";
977 }
978 
979 #elif defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__))
981  host_basic_info_data_t hostInfo;
982  mach_msg_type_number_t infoCount;
983 
984  infoCount = HOST_BASIC_INFO_COUNT;
985  host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&hostInfo,
986  &infoCount);
987 
988  if (hostInfo.cpu_type != CPU_TYPE_POWERPC)
989  return "generic";
990 
991  switch (hostInfo.cpu_subtype) {
993  return "601";
995  return "602";
997  return "603";
999  return "603e";
1001  return "603ev";
1003  return "604";
1005  return "604e";
1007  return "620";
1009  return "750";
1011  return "7400";
1013  return "7450";
1015  return "970";
1016  default:;
1017  }
1018 
1019  return "generic";
1020 }
1021 #elif defined(__linux__) && (defined(__ppc__) || defined(__powerpc__))
1023  // Access to the Processor Version Register (PVR) on PowerPC is privileged,
1024  // and so we must use an operating-system interface to determine the current
1025  // processor type. On Linux, this is exposed through the /proc/cpuinfo file.
1026  const char *generic = "generic";
1027 
1028  // The cpu line is second (after the 'processor: 0' line), so if this
1029  // buffer is too small then something has changed (or is wrong).
1030  char buffer[1024];
1031  ssize_t CPUInfoSize = readCpuInfo(buffer, sizeof(buffer));
1032  if (CPUInfoSize == -1)
1033  return generic;
1034 
1035  const char *CPUInfoStart = buffer;
1036  const char *CPUInfoEnd = buffer + CPUInfoSize;
1037 
1038  const char *CIP = CPUInfoStart;
1039 
1040  const char *CPUStart = 0;
1041  size_t CPULen = 0;
1042 
1043  // We need to find the first line which starts with cpu, spaces, and a colon.
1044  // After the colon, there may be some additional spaces and then the cpu type.
1045  while (CIP < CPUInfoEnd && CPUStart == 0) {
1046  if (CIP < CPUInfoEnd && *CIP == '\n')
1047  ++CIP;
1048 
1049  if (CIP < CPUInfoEnd && *CIP == 'c') {
1050  ++CIP;
1051  if (CIP < CPUInfoEnd && *CIP == 'p') {
1052  ++CIP;
1053  if (CIP < CPUInfoEnd && *CIP == 'u') {
1054  ++CIP;
1055  while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
1056  ++CIP;
1057 
1058  if (CIP < CPUInfoEnd && *CIP == ':') {
1059  ++CIP;
1060  while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
1061  ++CIP;
1062 
1063  if (CIP < CPUInfoEnd) {
1064  CPUStart = CIP;
1065  while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' &&
1066  *CIP != ',' && *CIP != '\n'))
1067  ++CIP;
1068  CPULen = CIP - CPUStart;
1069  }
1070  }
1071  }
1072  }
1073  }
1074 
1075  if (CPUStart == 0)
1076  while (CIP < CPUInfoEnd && *CIP != '\n')
1077  ++CIP;
1078  }
1079 
1080  if (CPUStart == 0)
1081  return generic;
1082 
1083  return StringSwitch<const char *>(StringRef(CPUStart, CPULen))
1084  .Case("604e", "604e")
1085  .Case("604", "604")
1086  .Case("7400", "7400")
1087  .Case("7410", "7400")
1088  .Case("7447", "7400")
1089  .Case("7455", "7450")
1090  .Case("G4", "g4")
1091  .Case("POWER4", "970")
1092  .Case("PPC970FX", "970")
1093  .Case("PPC970MP", "970")
1094  .Case("G5", "g5")
1095  .Case("POWER5", "g5")
1096  .Case("A2", "a2")
1097  .Case("POWER6", "pwr6")
1098  .Case("POWER7", "pwr7")
1099  .Case("POWER8", "pwr8")
1100  .Case("POWER8E", "pwr8")
1101  .Case("POWER8NVL", "pwr8")
1102  .Case("POWER9", "pwr9")
1103  .Default(generic);
1104 }
1105 #elif defined(__linux__) && defined(__arm__)
1107  // The cpuid register on arm is not accessible from user space. On Linux,
1108  // it is exposed through the /proc/cpuinfo file.
1109 
1110  // Read 1024 bytes from /proc/cpuinfo, which should contain the CPU part line
1111  // in all cases.
1112  char buffer[1024];
1113  ssize_t CPUInfoSize = readCpuInfo(buffer, sizeof(buffer));
1114  if (CPUInfoSize == -1)
1115  return "generic";
1116 
1117  StringRef Str(buffer, CPUInfoSize);
1118 
1120  Str.split(Lines, "\n");
1121 
1122  // Look for the CPU implementer line.
1123  StringRef Implementer;
1124  for (unsigned I = 0, E = Lines.size(); I != E; ++I)
1125  if (Lines[I].startswith("CPU implementer"))
1126  Implementer = Lines[I].substr(15).ltrim("\t :");
1127 
1128  if (Implementer == "0x41") // ARM Ltd.
1129  // Look for the CPU part line.
1130  for (unsigned I = 0, E = Lines.size(); I != E; ++I)
1131  if (Lines[I].startswith("CPU part"))
1132  // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
1133  // values correspond to the "Part number" in the CP15/c0 register. The
1134  // contents are specified in the various processor manuals.
1135  return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :"))
1136  .Case("0x926", "arm926ej-s")
1137  .Case("0xb02", "mpcore")
1138  .Case("0xb36", "arm1136j-s")
1139  .Case("0xb56", "arm1156t2-s")
1140  .Case("0xb76", "arm1176jz-s")
1141  .Case("0xc08", "cortex-a8")
1142  .Case("0xc09", "cortex-a9")
1143  .Case("0xc0f", "cortex-a15")
1144  .Case("0xc20", "cortex-m0")
1145  .Case("0xc23", "cortex-m3")
1146  .Case("0xc24", "cortex-m4")
1147  .Default("generic");
1148 
1149  if (Implementer == "0x51") // Qualcomm Technologies, Inc.
1150  // Look for the CPU part line.
1151  for (unsigned I = 0, E = Lines.size(); I != E; ++I)
1152  if (Lines[I].startswith("CPU part"))
1153  // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
1154  // values correspond to the "Part number" in the CP15/c0 register. The
1155  // contents are specified in the various processor manuals.
1156  return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :"))
1157  .Case("0x06f", "krait") // APQ8064
1158  .Default("generic");
1159 
1160  return "generic";
1161 }
1162 #elif defined(__linux__) && defined(__s390x__)
1164  // STIDP is a privileged operation, so use /proc/cpuinfo instead.
1165 
1166  // The "processor 0:" line comes after a fair amount of other information,
1167  // including a cache breakdown, but this should be plenty.
1168  char buffer[2048];
1169  ssize_t CPUInfoSize = readCpuInfo(buffer, sizeof(buffer));
1170  if (CPUInfoSize == -1)
1171  return "generic";
1172 
1173  StringRef Str(buffer, CPUInfoSize);
1175  Str.split(Lines, "\n");
1176 
1177  // Look for the CPU features.
1178  SmallVector<StringRef, 32> CPUFeatures;
1179  for (unsigned I = 0, E = Lines.size(); I != E; ++I)
1180  if (Lines[I].startswith("features")) {
1181  size_t Pos = Lines[I].find(":");
1182  if (Pos != StringRef::npos) {
1183  Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' ');
1184  break;
1185  }
1186  }
1187 
1188  // We need to check for the presence of vector support independently of
1189  // the machine type, since we may only use the vector register set when
1190  // supported by the kernel (and hypervisor).
1191  bool HaveVectorSupport = false;
1192  for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
1193  if (CPUFeatures[I] == "vx")
1194  HaveVectorSupport = true;
1195  }
1196 
1197  // Now check the processor machine type.
1198  for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
1199  if (Lines[I].startswith("processor ")) {
1200  size_t Pos = Lines[I].find("machine = ");
1201  if (Pos != StringRef::npos) {
1202  Pos += sizeof("machine = ") - 1;
1203  unsigned int Id;
1204  if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) {
1205  if (Id >= 2964 && HaveVectorSupport)
1206  return "z13";
1207  if (Id >= 2827)
1208  return "zEC12";
1209  if (Id >= 2817)
1210  return "z196";
1211  }
1212  }
1213  break;
1214  }
1215  }
1216 
1217  return "generic";
1218 }
1219 #else
1220 StringRef sys::getHostCPUName() { return "generic"; }
1221 #endif
1222 
1223 #if defined(__linux__) && defined(__x86_64__)
1224 // On Linux, the number of physical cores can be computed from /proc/cpuinfo,
1225 // using the number of unique physical/core id pairs. The following
1226 // implementation reads the /proc/cpuinfo format on an x86_64 system.
1227 static int computeHostNumPhysicalCores() {
1228  // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be
1229  // mmapped because it appears to have 0 size.
1231  llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
1232  if (std::error_code EC = Text.getError()) {
1233  llvm::errs() << "Can't read "
1234  << "/proc/cpuinfo: " << EC.message() << "\n";
1235  }
1237  (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1,
1238  /*KeepEmpty=*/false);
1239  int CurPhysicalId = -1;
1240  int CurCoreId = -1;
1241  SmallSet<std::pair<int, int>, 32> UniqueItems;
1242  for (auto &Line : strs) {
1243  Line = Line.trim();
1244  if (!Line.startswith("physical id") && !Line.startswith("core id"))
1245  continue;
1246  std::pair<StringRef, StringRef> Data = Line.split(':');
1247  auto Name = Data.first.trim();
1248  auto Val = Data.second.trim();
1249  if (Name == "physical id") {
1250  assert(CurPhysicalId == -1 &&
1251  "Expected a core id before seeing another physical id");
1252  Val.getAsInteger(10, CurPhysicalId);
1253  }
1254  if (Name == "core id") {
1255  assert(CurCoreId == -1 &&
1256  "Expected a physical id before seeing another core id");
1257  Val.getAsInteger(10, CurCoreId);
1258  }
1259  if (CurPhysicalId != -1 && CurCoreId != -1) {
1260  UniqueItems.insert(std::make_pair(CurPhysicalId, CurCoreId));
1261  CurPhysicalId = -1;
1262  CurCoreId = -1;
1263  }
1264  }
1265  return UniqueItems.size();
1266 }
1267 #elif defined(__APPLE__) && defined(__x86_64__)
1268 #include <sys/param.h>
1269 #include <sys/sysctl.h>
1270 
1271 // Gets the number of *physical cores* on the machine.
1272 static int computeHostNumPhysicalCores() {
1273  uint32_t count;
1274  size_t len = sizeof(count);
1275  sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0);
1276  if (count < 1) {
1277  int nm[2];
1278  nm[0] = CTL_HW;
1279  nm[1] = HW_AVAILCPU;
1280  sysctl(nm, 2, &count, &len, NULL, 0);
1281  if (count < 1)
1282  return -1;
1283  }
1284  return count;
1285 }
1286 #else
1287 // On other systems, return -1 to indicate unknown.
1288 static int computeHostNumPhysicalCores() { return -1; }
1289 #endif
1290 
1292  static int NumCores = computeHostNumPhysicalCores();
1293  return NumCores;
1294 }
1295 
1296 #if defined(__i386__) || defined(_M_IX86) || \
1297  defined(__x86_64__) || defined(_M_X64)
1298 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1299  unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1300  unsigned MaxLevel;
1301  union {
1302  unsigned u[3];
1303  char c[12];
1304  } text;
1305 
1306  if (getX86CpuIDAndInfo(0, &MaxLevel, text.u + 0, text.u + 2, text.u + 1) ||
1307  MaxLevel < 1)
1308  return false;
1309 
1310  getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
1311 
1312  Features["cmov"] = (EDX >> 15) & 1;
1313  Features["mmx"] = (EDX >> 23) & 1;
1314  Features["sse"] = (EDX >> 25) & 1;
1315  Features["sse2"] = (EDX >> 26) & 1;
1316  Features["sse3"] = (ECX >> 0) & 1;
1317  Features["ssse3"] = (ECX >> 9) & 1;
1318  Features["sse4.1"] = (ECX >> 19) & 1;
1319  Features["sse4.2"] = (ECX >> 20) & 1;
1320 
1321  Features["pclmul"] = (ECX >> 1) & 1;
1322  Features["cx16"] = (ECX >> 13) & 1;
1323  Features["movbe"] = (ECX >> 22) & 1;
1324  Features["popcnt"] = (ECX >> 23) & 1;
1325  Features["aes"] = (ECX >> 25) & 1;
1326  Features["rdrnd"] = (ECX >> 30) & 1;
1327 
1328  // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1329  // indicates that the AVX registers will be saved and restored on context
1330  // switch, then we have full AVX support.
1331  bool HasAVXSave = ((ECX >> 27) & 1) && ((ECX >> 28) & 1) &&
1332  !getX86XCR0(&EAX, &EDX) && ((EAX & 0x6) == 0x6);
1333  Features["avx"] = HasAVXSave;
1334  Features["fma"] = HasAVXSave && (ECX >> 12) & 1;
1335  Features["f16c"] = HasAVXSave && (ECX >> 29) & 1;
1336 
1337  // Only enable XSAVE if OS has enabled support for saving YMM state.
1338  Features["xsave"] = HasAVXSave && (ECX >> 26) & 1;
1339 
1340  // AVX512 requires additional context to be saved by the OS.
1341  bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0);
1342 
1343  unsigned MaxExtLevel;
1344  getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1345 
1346  bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1347  !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1348  Features["lzcnt"] = HasExtLeaf1 && ((ECX >> 5) & 1);
1349  Features["sse4a"] = HasExtLeaf1 && ((ECX >> 6) & 1);
1350  Features["prfchw"] = HasExtLeaf1 && ((ECX >> 8) & 1);
1351  Features["xop"] = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave;
1352  Features["fma4"] = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave;
1353  Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1);
1354  Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1);
1355 
1356  bool HasLeaf7 =
1357  MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1358 
1359  // AVX2 is only supported if we have the OS save support from AVX.
1360  Features["avx2"] = HasAVXSave && HasLeaf7 && ((EBX >> 5) & 1);
1361 
1362  Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1);
1363  Features["sgx"] = HasLeaf7 && ((EBX >> 2) & 1);
1364  Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1);
1365  Features["hle"] = HasLeaf7 && ((EBX >> 4) & 1);
1366  Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1);
1367  Features["invpcid"] = HasLeaf7 && ((EBX >> 10) & 1);
1368  Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1);
1369  Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1);
1370  Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1);
1371  Features["smap"] = HasLeaf7 && ((EBX >> 20) & 1);
1372  Features["pcommit"] = HasLeaf7 && ((EBX >> 22) & 1);
1373  Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1);
1374  Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1);
1375  Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1);
1376 
1377  // AVX512 is only supported if the OS supports the context save for it.
1378  Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
1379  Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
1380  Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save;
1381  Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save;
1382  Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save;
1383  Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
1384  Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
1385  Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
1386 
1387  Features["prefetchwt1"] = HasLeaf7 && (ECX & 1);
1388  Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save;
1389  // Enable protection keys
1390  Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1);
1391 
1392  bool HasLeafD = MaxLevel >= 0xd &&
1393  !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
1394 
1395  // Only enable XSAVE if OS has enabled support for saving YMM state.
1396  Features["xsaveopt"] = HasAVXSave && HasLeafD && ((EAX >> 0) & 1);
1397  Features["xsavec"] = HasAVXSave && HasLeafD && ((EAX >> 1) & 1);
1398  Features["xsaves"] = HasAVXSave && HasLeafD && ((EAX >> 3) & 1);
1399 
1400  return true;
1401 }
1402 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
1403 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1404  // Read 1024 bytes from /proc/cpuinfo, which should contain the Features line
1405  // in all cases.
1406  char buffer[1024];
1407  ssize_t CPUInfoSize = readCpuInfo(buffer, sizeof(buffer));
1408  if (CPUInfoSize == -1)
1409  return false;
1410 
1411  StringRef Str(buffer, CPUInfoSize);
1412 
1414  Str.split(Lines, "\n");
1415 
1416  SmallVector<StringRef, 32> CPUFeatures;
1417 
1418  // Look for the CPU features.
1419  for (unsigned I = 0, E = Lines.size(); I != E; ++I)
1420  if (Lines[I].startswith("Features")) {
1421  Lines[I].split(CPUFeatures, ' ');
1422  break;
1423  }
1424 
1425 #if defined(__aarch64__)
1426  // Keep track of which crypto features we have seen
1427  enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 };
1428  uint32_t crypto = 0;
1429 #endif
1430 
1431  for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
1432  StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I])
1433 #if defined(__aarch64__)
1434  .Case("asimd", "neon")
1435  .Case("fp", "fp-armv8")
1436  .Case("crc32", "crc")
1437 #else
1438  .Case("half", "fp16")
1439  .Case("neon", "neon")
1440  .Case("vfpv3", "vfp3")
1441  .Case("vfpv3d16", "d16")
1442  .Case("vfpv4", "vfp4")
1443  .Case("idiva", "hwdiv-arm")
1444  .Case("idivt", "hwdiv")
1445 #endif
1446  .Default("");
1447 
1448 #if defined(__aarch64__)
1449  // We need to check crypto separately since we need all of the crypto
1450  // extensions to enable the subtarget feature
1451  if (CPUFeatures[I] == "aes")
1452  crypto |= CAP_AES;
1453  else if (CPUFeatures[I] == "pmull")
1454  crypto |= CAP_PMULL;
1455  else if (CPUFeatures[I] == "sha1")
1456  crypto |= CAP_SHA1;
1457  else if (CPUFeatures[I] == "sha2")
1458  crypto |= CAP_SHA2;
1459 #endif
1460 
1461  if (LLVMFeatureStr != "")
1462  Features[LLVMFeatureStr] = true;
1463  }
1464 
1465 #if defined(__aarch64__)
1466  // If we have all crypto bits we can add the feature
1467  if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2))
1468  Features["crypto"] = true;
1469 #endif
1470 
1471  return true;
1472 }
1473 #else
1474 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; }
1475 #endif
1476 
1477 std::string sys::getProcessTriple() {
1478  Triple PT(Triple::normalize(LLVM_HOST_TRIPLE));
1479 
1480  if (sizeof(void *) == 8 && PT.isArch32Bit())
1481  PT = PT.get64BitArchVariant();
1482  if (sizeof(void *) == 4 && PT.isArch64Bit())
1483  PT = PT.get32BitArchVariant();
1484 
1485  return PT.str();
1486 }
std::error_code getError() const
Definition: ErrorOr.h:169
Represents either an error or a value T.
Definition: ErrorOr.h:68
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
value_type read(const void *memory)
Read a value of a particular endianness from memory.
Definition: Endian.h:48
std::error_code openFileForRead(const Twine &Name, int &ResultFD, SmallVectorImpl< char > *RealPath=nullptr)
const std::string & str() const
Definition: Triple.h:339
int getHostNumPhysicalCores()
Get the number of physical cores (as opposed to logical cores returned from thread::hardware_concurre...
Definition: Host.cpp:1291
LLVM_ATTRIBUTE_ALWAYS_INLINE R Default(const T &Value) const
Definition: StringSwitch.h:244
size_type size() const
Definition: SmallSet.h:59
bool isArch64Bit() const
Test whether the architecture is 64-bit.
Definition: Triple.cpp:1202
LLVM_ATTRIBUTE_ALWAYS_INLINE StringSwitch & Case(const char(&S)[N], const T &Value)
Definition: StringSwitch.h:74
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(std::begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:791
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
llvm::Triple get32BitArchVariant() const
Form a triple with a 32-bit variant of the current architecture.
Definition: Triple.cpp:1214
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:43
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:36
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:150
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:80
std::string normalize() const
Return the normalized form of this triple's string.
Definition: Triple.h:263
std::string getProcessTriple()
getProcessTriple() - Return an appropriate target triple for generating code to be loaded into the cu...
Definition: Host.cpp:1477
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
static bool startswith(StringRef Magic, const char(&S)[N])
Definition: Path.cpp:994
llvm::Triple get64BitArchVariant() const
Form a triple with a 64-bit variant of the current architecture.
Definition: Triple.cpp:1276
StringRef getHostCPUName()
getHostCPUName - Get the LLVM name for the host CPU.
Definition: Host.cpp:1220
static const size_t npos
Definition: StringRef.h:51
#define I(x, y, z)
Definition: MD5.cpp:54
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFileAsStream(const Twine &Filename)
Read all of the specified file into a MemoryBuffer as a stream (i.e.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
const FeatureBitset Features
#define DEBUG(X)
Definition: Debug.h:100
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
bool getHostCPUFeatures(StringMap< bool > &Features)
getHostCPUFeatures - Get the LLVM names for the host CPU features.
Definition: Host.cpp:1474
bool isArch32Bit() const
Test whether the architecture is 32-bit.
Definition: Triple.cpp:1206
static int computeHostNumPhysicalCores()
Definition: Host.cpp:1288