File: | projects/compiler-rt/lib/xray/xray_x86_64.cc |
Warning: | line 263, column 13 The left operand of '&' is a garbage value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | #include "cpuid.h" | |||
2 | #include "sanitizer_common/sanitizer_common.h" | |||
3 | #include "xray_defs.h" | |||
4 | #include "xray_interface_internal.h" | |||
5 | ||||
6 | #include <atomic> | |||
7 | #include <cstdint> | |||
8 | #include <errno(*__errno_location ()).h> | |||
9 | #include <fcntl.h> | |||
10 | #include <iterator> | |||
11 | #include <limits> | |||
12 | #include <tuple> | |||
13 | #include <unistd.h> | |||
14 | ||||
15 | namespace __xray { | |||
16 | ||||
17 | static std::pair<ssize_t, bool> | |||
18 | retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT { | |||
19 | auto BytesToRead = std::distance(Begin, End); | |||
20 | ssize_t BytesRead; | |||
21 | ssize_t TotalBytesRead = 0; | |||
22 | while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) { | |||
23 | if (BytesRead == -1) { | |||
24 | if (errno(*__errno_location ()) == EINTR4) | |||
25 | continue; | |||
26 | Report("Read error; errno = %d\n", errno(*__errno_location ())); | |||
27 | return std::make_pair(TotalBytesRead, false); | |||
28 | } | |||
29 | ||||
30 | TotalBytesRead += BytesRead; | |||
31 | BytesToRead -= BytesRead; | |||
32 | Begin += BytesRead; | |||
33 | } | |||
34 | return std::make_pair(TotalBytesRead, true); | |||
35 | } | |||
36 | ||||
37 | static bool readValueFromFile(const char *Filename, | |||
38 | long long *Value) XRAY_NEVER_INSTRUMENT { | |||
39 | int Fd = open(Filename, O_RDONLY00 | O_CLOEXEC02000000); | |||
40 | if (Fd == -1) | |||
41 | return false; | |||
42 | static constexpr size_t BufSize = 256; | |||
43 | char Line[BufSize] = {}; | |||
44 | ssize_t BytesRead; | |||
45 | bool Success; | |||
46 | std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize); | |||
47 | close(Fd); | |||
48 | if (!Success) | |||
49 | return false; | |||
50 | char *End = nullptr; | |||
51 | long long Tmp = internal_simple_strtoll(Line, &End, 10); | |||
52 | bool Result = false; | |||
53 | if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) { | |||
54 | *Value = Tmp; | |||
55 | Result = true; | |||
56 | } | |||
57 | return Result; | |||
58 | } | |||
59 | ||||
60 | uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { | |||
61 | long long TSCFrequency = -1; | |||
62 | if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", | |||
63 | &TSCFrequency)) { | |||
64 | TSCFrequency *= 1000; | |||
65 | } else if (readValueFromFile( | |||
66 | "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", | |||
67 | &TSCFrequency)) { | |||
68 | TSCFrequency *= 1000; | |||
69 | } else { | |||
70 | Report("Unable to determine CPU frequency for TSC accounting.\n"); | |||
71 | } | |||
72 | return TSCFrequency == -1 ? 0 : static_cast<uint64_t>(TSCFrequency); | |||
73 | } | |||
74 | ||||
75 | static constexpr uint8_t CallOpCode = 0xe8; | |||
76 | static constexpr uint16_t MovR10Seq = 0xba41; | |||
77 | static constexpr uint16_t Jmp9Seq = 0x09eb; | |||
78 | static constexpr uint16_t Jmp20Seq = 0x14eb; | |||
79 | static constexpr uint16_t Jmp15Seq = 0x0feb; | |||
80 | static constexpr uint8_t JmpOpCode = 0xe9; | |||
81 | static constexpr uint8_t RetOpCode = 0xc3; | |||
82 | static constexpr uint16_t NopwSeq = 0x9066; | |||
83 | ||||
84 | static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()}; | |||
85 | static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()}; | |||
86 | ||||
87 | bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, | |||
88 | const XRaySledEntry &Sled, | |||
89 | void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { | |||
90 | // Here we do the dance of replacing the following sled: | |||
91 | // | |||
92 | // xray_sled_n: | |||
93 | // jmp +9 | |||
94 | // <9 byte nop> | |||
95 | // | |||
96 | // With the following: | |||
97 | // | |||
98 | // mov r10d, <function id> | |||
99 | // call <relative 32bit offset to entry trampoline> | |||
100 | // | |||
101 | // We need to do this in the following order: | |||
102 | // | |||
103 | // 1. Put the function id first, 2 bytes from the start of the sled (just | |||
104 | // after the 2-byte jmp instruction). | |||
105 | // 2. Put the call opcode 6 bytes from the start of the sled. | |||
106 | // 3. Put the relative offset 7 bytes from the start of the sled. | |||
107 | // 4. Do an atomic write over the jmp instruction for the "mov r10d" | |||
108 | // opcode and first operand. | |||
109 | // | |||
110 | // Prerequisite is to compute the relative offset to the trampoline's address. | |||
111 | int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) - | |||
112 | (static_cast<int64_t>(Sled.Address) + 11); | |||
113 | if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { | |||
114 | Report("XRay Entry trampoline (%p) too far from sled (%p)\n", | |||
115 | Trampoline, reinterpret_cast<void *>(Sled.Address)); | |||
116 | return false; | |||
117 | } | |||
118 | if (Enable) { | |||
119 | *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId; | |||
120 | *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode; | |||
121 | *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset; | |||
122 | std::atomic_store_explicit( | |||
123 | reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq, | |||
124 | std::memory_order_release); | |||
125 | } else { | |||
126 | std::atomic_store_explicit( | |||
127 | reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq, | |||
128 | std::memory_order_release); | |||
129 | // FIXME: Write out the nops still? | |||
130 | } | |||
131 | return true; | |||
132 | } | |||
133 | ||||
134 | bool patchFunctionExit(const bool Enable, const uint32_t FuncId, | |||
135 | const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { | |||
136 | // Here we do the dance of replacing the following sled: | |||
137 | // | |||
138 | // xray_sled_n: | |||
139 | // ret | |||
140 | // <10 byte nop> | |||
141 | // | |||
142 | // With the following: | |||
143 | // | |||
144 | // mov r10d, <function id> | |||
145 | // jmp <relative 32bit offset to exit trampoline> | |||
146 | // | |||
147 | // 1. Put the function id first, 2 bytes from the start of the sled (just | |||
148 | // after the 1-byte ret instruction). | |||
149 | // 2. Put the jmp opcode 6 bytes from the start of the sled. | |||
150 | // 3. Put the relative offset 7 bytes from the start of the sled. | |||
151 | // 4. Do an atomic write over the jmp instruction for the "mov r10d" | |||
152 | // opcode and first operand. | |||
153 | // | |||
154 | // Prerequisite is to compute the relative offset fo the | |||
155 | // __xray_FunctionExit function's address. | |||
156 | int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionExit) - | |||
157 | (static_cast<int64_t>(Sled.Address) + 11); | |||
158 | if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { | |||
159 | Report("XRay Exit trampoline (%p) too far from sled (%p)\n", | |||
160 | __xray_FunctionExit, reinterpret_cast<void *>(Sled.Address)); | |||
161 | return false; | |||
162 | } | |||
163 | if (Enable) { | |||
164 | *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId; | |||
165 | *reinterpret_cast<uint8_t *>(Sled.Address + 6) = JmpOpCode; | |||
166 | *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset; | |||
167 | std::atomic_store_explicit( | |||
168 | reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq, | |||
169 | std::memory_order_release); | |||
170 | } else { | |||
171 | std::atomic_store_explicit( | |||
172 | reinterpret_cast<std::atomic<uint8_t> *>(Sled.Address), RetOpCode, | |||
173 | std::memory_order_release); | |||
174 | // FIXME: Write out the nops still? | |||
175 | } | |||
176 | return true; | |||
177 | } | |||
178 | ||||
179 | bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, | |||
180 | const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { | |||
181 | // Here we do the dance of replacing the tail call sled with a similar | |||
182 | // sequence as the entry sled, but calls the tail exit sled instead. | |||
183 | int64_t TrampolineOffset = | |||
184 | reinterpret_cast<int64_t>(__xray_FunctionTailExit) - | |||
185 | (static_cast<int64_t>(Sled.Address) + 11); | |||
186 | if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { | |||
187 | Report("XRay Exit trampoline (%p) too far from sled (%p)\n", | |||
188 | __xray_FunctionExit, reinterpret_cast<void *>(Sled.Address)); | |||
189 | return false; | |||
190 | } | |||
191 | if (Enable) { | |||
192 | *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId; | |||
193 | *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode; | |||
194 | *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset; | |||
195 | std::atomic_store_explicit( | |||
196 | reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq, | |||
197 | std::memory_order_release); | |||
198 | } else { | |||
199 | std::atomic_store_explicit( | |||
200 | reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq, | |||
201 | std::memory_order_release); | |||
202 | // FIXME: Write out the nops still? | |||
203 | } | |||
204 | return true; | |||
205 | } | |||
206 | ||||
207 | bool patchCustomEvent(const bool Enable, const uint32_t FuncId, | |||
208 | const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { | |||
209 | // Here we do the dance of replacing the following sled: | |||
210 | // | |||
211 | // In Version 0: | |||
212 | // | |||
213 | // xray_sled_n: | |||
214 | // jmp +20 // 2 bytes | |||
215 | // ... | |||
216 | // | |||
217 | // With the following: | |||
218 | // | |||
219 | // nopw // 2 bytes* | |||
220 | // ... | |||
221 | // | |||
222 | // | |||
223 | // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'. | |||
224 | // | |||
225 | // --- | |||
226 | // | |||
227 | // In Version 1: | |||
228 | // | |||
229 | // The jump offset is now 15 bytes (0x0f), so when restoring the nopw back | |||
230 | // to a jmp, use 15 bytes instead. | |||
231 | // | |||
232 | if (Enable) { | |||
233 | std::atomic_store_explicit( | |||
234 | reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), NopwSeq, | |||
235 | std::memory_order_release); | |||
236 | } else { | |||
237 | switch (Sled.Version) { | |||
238 | case 1: | |||
239 | std::atomic_store_explicit( | |||
240 | reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp15Seq, | |||
241 | std::memory_order_release); | |||
242 | break; | |||
243 | case 0: | |||
244 | default: | |||
245 | std::atomic_store_explicit( | |||
246 | reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp20Seq, | |||
247 | std::memory_order_release); | |||
248 | break; | |||
249 | } | |||
250 | } | |||
251 | return false; | |||
252 | } | |||
253 | ||||
254 | // We determine whether the CPU we're running on has the correct features we | |||
255 | // need. In x86_64 this will be rdtscp support. | |||
256 | bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { | |||
257 | unsigned int EAX, EBX, ECX, EDX; | |||
| ||||
258 | ||||
259 | // We check whether rdtscp support is enabled. According to the x86_64 manual, | |||
260 | // level should be set at 0x80000001, and we should have a look at bit 27 in | |||
261 | // EDX. That's 0x8000000 (or 1u << 27). | |||
262 | __get_cpuid(0x80000001, &EAX, &EBX, &ECX, &EDX); | |||
263 | if (!(EDX & (1u << 27))) { | |||
| ||||
264 | Report("Missing rdtscp support.\n"); | |||
265 | return false; | |||
266 | } | |||
267 | // Also check whether we can determine the CPU frequency, since if we cannot, | |||
268 | // we should use the emulated TSC instead. | |||
269 | if (!getTSCFrequency()) { | |||
270 | Report("Unable to determine CPU frequency.\n"); | |||
271 | return false; | |||
272 | } | |||
273 | return true; | |||
274 | } | |||
275 | ||||
276 | } // namespace __xray |
1 | /*===---- cpuid.h - X86 cpu model detection --------------------------------=== |
2 | * |
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
4 | * of this software and associated documentation files (the "Software"), to deal |
5 | * in the Software without restriction, including without limitation the rights |
6 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
7 | * copies of the Software, and to permit persons to whom the Software is |
8 | * furnished to do so, subject to the following conditions: |
9 | * |
10 | * The above copyright notice and this permission notice shall be included in |
11 | * all copies or substantial portions of the Software. |
12 | * |
13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
16 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
18 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
19 | * THE SOFTWARE. |
20 | * |
21 | *===-----------------------------------------------------------------------=== |
22 | */ |
23 | |
24 | #if !(__x86_64__1 || __i386__) |
25 | #error this header is for x86 only |
26 | #endif |
27 | |
28 | /* Responses identification request with %eax 0 */ |
29 | /* AMD: "AuthenticAMD" */ |
30 | #define signature_AMD_ebx0x68747541 0x68747541 |
31 | #define signature_AMD_edx0x69746e65 0x69746e65 |
32 | #define signature_AMD_ecx0x444d4163 0x444d4163 |
33 | /* CENTAUR: "CentaurHauls" */ |
34 | #define signature_CENTAUR_ebx0x746e6543 0x746e6543 |
35 | #define signature_CENTAUR_edx0x48727561 0x48727561 |
36 | #define signature_CENTAUR_ecx0x736c7561 0x736c7561 |
37 | /* CYRIX: "CyrixInstead" */ |
38 | #define signature_CYRIX_ebx0x69727943 0x69727943 |
39 | #define signature_CYRIX_edx0x736e4978 0x736e4978 |
40 | #define signature_CYRIX_ecx0x64616574 0x64616574 |
41 | /* INTEL: "GenuineIntel" */ |
42 | #define signature_INTEL_ebx0x756e6547 0x756e6547 |
43 | #define signature_INTEL_edx0x49656e69 0x49656e69 |
44 | #define signature_INTEL_ecx0x6c65746e 0x6c65746e |
45 | /* TM1: "TransmetaCPU" */ |
46 | #define signature_TM1_ebx0x6e617254 0x6e617254 |
47 | #define signature_TM1_edx0x74656d73 0x74656d73 |
48 | #define signature_TM1_ecx0x55504361 0x55504361 |
49 | /* TM2: "GenuineTMx86" */ |
50 | #define signature_TM2_ebx0x756e6547 0x756e6547 |
51 | #define signature_TM2_edx0x54656e69 0x54656e69 |
52 | #define signature_TM2_ecx0x3638784d 0x3638784d |
53 | /* NSC: "Geode by NSC" */ |
54 | #define signature_NSC_ebx0x646f6547 0x646f6547 |
55 | #define signature_NSC_edx0x43534e20 0x43534e20 |
56 | #define signature_NSC_ecx0x79622065 0x79622065 |
57 | /* NEXGEN: "NexGenDriven" */ |
58 | #define signature_NEXGEN_ebx0x4778654e 0x4778654e |
59 | #define signature_NEXGEN_edx0x72446e65 0x72446e65 |
60 | #define signature_NEXGEN_ecx0x6e657669 0x6e657669 |
61 | /* RISE: "RiseRiseRise" */ |
62 | #define signature_RISE_ebx0x65736952 0x65736952 |
63 | #define signature_RISE_edx0x65736952 0x65736952 |
64 | #define signature_RISE_ecx0x65736952 0x65736952 |
65 | /* SIS: "SiS SiS SiS " */ |
66 | #define signature_SIS_ebx0x20536953 0x20536953 |
67 | #define signature_SIS_edx0x20536953 0x20536953 |
68 | #define signature_SIS_ecx0x20536953 0x20536953 |
69 | /* UMC: "UMC UMC UMC " */ |
70 | #define signature_UMC_ebx0x20434d55 0x20434d55 |
71 | #define signature_UMC_edx0x20434d55 0x20434d55 |
72 | #define signature_UMC_ecx0x20434d55 0x20434d55 |
73 | /* VIA: "VIA VIA VIA " */ |
74 | #define signature_VIA_ebx0x20414956 0x20414956 |
75 | #define signature_VIA_edx0x20414956 0x20414956 |
76 | #define signature_VIA_ecx0x20414956 0x20414956 |
77 | /* VORTEX: "Vortex86 SoC" */ |
78 | #define signature_VORTEX_ebx0x74726f56 0x74726f56 |
79 | #define signature_VORTEX_edx0x36387865 0x36387865 |
80 | #define signature_VORTEX_ecx0x436f5320 0x436f5320 |
81 | |
82 | /* Features in %ecx for leaf 1 */ |
83 | #define bit_SSE30x00000001 0x00000001 |
84 | #define bit_PCLMULQDQ0x00000002 0x00000002 |
85 | #define bit_PCLMUL0x00000002 bit_PCLMULQDQ0x00000002 /* for gcc compat */ |
86 | #define bit_DTES640x00000004 0x00000004 |
87 | #define bit_MONITOR0x00000008 0x00000008 |
88 | #define bit_DSCPL0x00000010 0x00000010 |
89 | #define bit_VMX0x00000020 0x00000020 |
90 | #define bit_SMX0x00000040 0x00000040 |
91 | #define bit_EIST0x00000080 0x00000080 |
92 | #define bit_TM20x00000100 0x00000100 |
93 | #define bit_SSSE30x00000200 0x00000200 |
94 | #define bit_CNXTID0x00000400 0x00000400 |
95 | #define bit_FMA0x00001000 0x00001000 |
96 | #define bit_CMPXCHG16B0x00002000 0x00002000 |
97 | #define bit_xTPR0x00004000 0x00004000 |
98 | #define bit_PDCM0x00008000 0x00008000 |
99 | #define bit_PCID0x00020000 0x00020000 |
100 | #define bit_DCA0x00040000 0x00040000 |
101 | #define bit_SSE410x00080000 0x00080000 |
102 | #define bit_SSE4_10x00080000 bit_SSE410x00080000 /* for gcc compat */ |
103 | #define bit_SSE420x00100000 0x00100000 |
104 | #define bit_SSE4_20x00100000 bit_SSE420x00100000 /* for gcc compat */ |
105 | #define bit_x2APIC0x00200000 0x00200000 |
106 | #define bit_MOVBE0x00400000 0x00400000 |
107 | #define bit_POPCNT0x00800000 0x00800000 |
108 | #define bit_TSCDeadline0x01000000 0x01000000 |
109 | #define bit_AESNI0x02000000 0x02000000 |
110 | #define bit_AES0x02000000 bit_AESNI0x02000000 /* for gcc compat */ |
111 | #define bit_XSAVE0x04000000 0x04000000 |
112 | #define bit_OSXSAVE0x08000000 0x08000000 |
113 | #define bit_AVX0x10000000 0x10000000 |
114 | #define bit_F16C0x20000000 0x20000000 |
115 | #define bit_RDRND0x40000000 0x40000000 |
116 | |
117 | /* Features in %edx for leaf 1 */ |
118 | #define bit_FPU0x00000001 0x00000001 |
119 | #define bit_VME0x00000002 0x00000002 |
120 | #define bit_DE0x00000004 0x00000004 |
121 | #define bit_PSE0x00000008 0x00000008 |
122 | #define bit_TSC0x00000010 0x00000010 |
123 | #define bit_MSR0x00000020 0x00000020 |
124 | #define bit_PAE0x00000040 0x00000040 |
125 | #define bit_MCE0x00000080 0x00000080 |
126 | #define bit_CX80x00000100 0x00000100 |
127 | #define bit_CMPXCHG8B0x00000100 bit_CX80x00000100 /* for gcc compat */ |
128 | #define bit_APIC0x00000200 0x00000200 |
129 | #define bit_SEP0x00000800 0x00000800 |
130 | #define bit_MTRR0x00001000 0x00001000 |
131 | #define bit_PGE0x00002000 0x00002000 |
132 | #define bit_MCA0x00004000 0x00004000 |
133 | #define bit_CMOV0x00008000 0x00008000 |
134 | #define bit_PAT0x00010000 0x00010000 |
135 | #define bit_PSE360x00020000 0x00020000 |
136 | #define bit_PSN0x00040000 0x00040000 |
137 | #define bit_CLFSH0x00080000 0x00080000 |
138 | #define bit_DS0x00200000 0x00200000 |
139 | #define bit_ACPI0x00400000 0x00400000 |
140 | #define bit_MMX0x00800000 0x00800000 |
141 | #define bit_FXSR0x01000000 0x01000000 |
142 | #define bit_FXSAVE0x01000000 bit_FXSR0x01000000 /* for gcc compat */ |
143 | #define bit_SSE0x02000000 0x02000000 |
144 | #define bit_SSE20x04000000 0x04000000 |
145 | #define bit_SS0x08000000 0x08000000 |
146 | #define bit_HTT0x10000000 0x10000000 |
147 | #define bit_TM0x20000000 0x20000000 |
148 | #define bit_PBE0x80000000 0x80000000 |
149 | |
150 | /* Features in %ebx for leaf 7 sub-leaf 0 */ |
151 | #define bit_FSGSBASE0x00000001 0x00000001 |
152 | #define bit_SGX0x00000004 0x00000004 |
153 | #define bit_BMI0x00000008 0x00000008 |
154 | #define bit_HLE0x00000010 0x00000010 |
155 | #define bit_AVX20x00000020 0x00000020 |
156 | #define bit_SMEP0x00000080 0x00000080 |
157 | #define bit_BMI20x00000100 0x00000100 |
158 | #define bit_ENH_MOVSB0x00000200 0x00000200 |
159 | #define bit_RTM0x00000800 0x00000800 |
160 | #define bit_MPX0x00004000 0x00004000 |
161 | #define bit_AVX512F0x00010000 0x00010000 |
162 | #define bit_AVX512DQ0x00020000 0x00020000 |
163 | #define bit_RDSEED0x00040000 0x00040000 |
164 | #define bit_ADX0x00080000 0x00080000 |
165 | #define bit_AVX512IFMA0x00200000 0x00200000 |
166 | #define bit_CLFLUSHOPT0x00800000 0x00800000 |
167 | #define bit_CLWB0x01000000 0x01000000 |
168 | #define bit_AVX512PF0x04000000 0x04000000 |
169 | #define bit_AVX51SER0x08000000 0x08000000 |
170 | #define bit_AVX512CD0x10000000 0x10000000 |
171 | #define bit_SHA0x20000000 0x20000000 |
172 | #define bit_AVX512BW0x40000000 0x40000000 |
173 | #define bit_AVX512VL0x80000000 0x80000000 |
174 | |
175 | /* Features in %ecx for leaf 7 sub-leaf 0 */ |
176 | #define bit_PREFTCHWT10x00000001 0x00000001 |
177 | #define bit_AVX512VBMI0x00000002 0x00000002 |
178 | #define bit_PKU0x00000004 0x00000004 |
179 | #define bit_OSPKE0x00000010 0x00000010 |
180 | #define bit_AVX512VBMI20x00000040 0x00000040 |
181 | #define bit_SHSTK0x00000080 0x00000080 |
182 | #define bit_GFNI0x00000100 0x00000100 |
183 | #define bit_VAES0x00000200 0x00000200 |
184 | #define bit_VPCLMULQDQ0x00000400 0x00000400 |
185 | #define bit_AVX512VNNI0x00000800 0x00000800 |
186 | #define bit_AVX512BITALG0x00001000 0x00001000 |
187 | #define bit_AVX512VPOPCNTDQ0x00004000 0x00004000 |
188 | #define bit_RDPID0x00400000 0x00400000 |
189 | |
190 | /* Features in %edx for leaf 7 sub-leaf 0 */ |
191 | #define bit_AVX5124VNNIW0x00000004 0x00000004 |
192 | #define bit_AVX5124FMAPS0x00000008 0x00000008 |
193 | #define bit_IBT0x00100000 0x00100000 |
194 | |
195 | /* Features in %eax for leaf 13 sub-leaf 1 */ |
196 | #define bit_XSAVEOPT0x00000001 0x00000001 |
197 | #define bit_XSAVEC0x00000002 0x00000002 |
198 | #define bit_XSAVES0x00000008 0x00000008 |
199 | |
200 | /* Features in %ecx for leaf 0x80000001 */ |
201 | #define bit_LAHF_LM0x00000001 0x00000001 |
202 | #define bit_ABM0x00000020 0x00000020 |
203 | #define bit_LZCNT0x00000020 bit_ABM0x00000020 /* for gcc compat */ |
204 | #define bit_SSE4a0x00000040 0x00000040 |
205 | #define bit_PRFCHW0x00000100 0x00000100 |
206 | #define bit_XOP0x00000800 0x00000800 |
207 | #define bit_LWP0x00008000 0x00008000 |
208 | #define bit_FMA40x00010000 0x00010000 |
209 | #define bit_TBM0x00200000 0x00200000 |
210 | #define bit_MWAITX0x20000000 0x20000000 |
211 | |
212 | /* Features in %edx for leaf 0x80000001 */ |
213 | #define bit_MMXEXT0x00400000 0x00400000 |
214 | #define bit_LM0x20000000 0x20000000 |
215 | #define bit_3DNOWP0x40000000 0x40000000 |
216 | #define bit_3DNOW0x80000000 0x80000000 |
217 | |
218 | /* Features in %ebx for leaf 0x80000001 */ |
219 | #define bit_CLZERO0x00000001 0x00000001 |
220 | |
221 | |
222 | #if __i386__ |
223 | #define __cpuid(__leaf, __eax, __ebx, __ecx, __edx)__asm(" xchgq %%rbx,%q1\n" " cpuid\n" " xchgq %%rbx,%q1" : "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) : "0"( __leaf)) \ |
224 | __asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \ |
225 | : "0"(__leaf)) |
226 | |
227 | #define __cpuid_count(__leaf, __count, __eax, __ebx, __ecx, __edx)__asm(" xchgq %%rbx,%q1\n" " cpuid\n" " xchgq %%rbx,%q1" : "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) : "0"( __leaf), "2"(__count)) \ |
228 | __asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \ |
229 | : "0"(__leaf), "2"(__count)) |
230 | #else |
231 | /* x86-64 uses %rbx as the base register, so preserve it. */ |
232 | #define __cpuid(__leaf, __eax, __ebx, __ecx, __edx)__asm(" xchgq %%rbx,%q1\n" " cpuid\n" " xchgq %%rbx,%q1" : "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) : "0"( __leaf)) \ |
233 | __asm(" xchgq %%rbx,%q1\n" \ |
234 | " cpuid\n" \ |
235 | " xchgq %%rbx,%q1" \ |
236 | : "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) \ |
237 | : "0"(__leaf)) |
238 | |
239 | #define __cpuid_count(__leaf, __count, __eax, __ebx, __ecx, __edx)__asm(" xchgq %%rbx,%q1\n" " cpuid\n" " xchgq %%rbx,%q1" : "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) : "0"( __leaf), "2"(__count)) \ |
240 | __asm(" xchgq %%rbx,%q1\n" \ |
241 | " cpuid\n" \ |
242 | " xchgq %%rbx,%q1" \ |
243 | : "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) \ |
244 | : "0"(__leaf), "2"(__count)) |
245 | #endif |
246 | |
247 | static __inline int __get_cpuid_max (unsigned int __leaf, unsigned int *__sig) |
248 | { |
249 | unsigned int __eax, __ebx, __ecx, __edx; |
250 | #if __i386__ |
251 | int __cpuid_supported; |
252 | |
253 | __asm(" pushfl\n" |
254 | " popl %%eax\n" |
255 | " movl %%eax,%%ecx\n" |
256 | " xorl $0x00200000,%%eax\n" |
257 | " pushl %%eax\n" |
258 | " popfl\n" |
259 | " pushfl\n" |
260 | " popl %%eax\n" |
261 | " movl $0,%0\n" |
262 | " cmpl %%eax,%%ecx\n" |
263 | " je 1f\n" |
264 | " movl $1,%0\n" |
265 | "1:" |
266 | : "=r" (__cpuid_supported) : : "eax", "ecx"); |
267 | if (!__cpuid_supported) |
268 | return 0; |
269 | #endif |
270 | |
271 | __cpuid(__leaf, __eax, __ebx, __ecx, __edx)__asm(" xchgq %%rbx,%q1\n" " cpuid\n" " xchgq %%rbx,%q1" : "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) : "0"( __leaf)); |
272 | if (__sig) |
273 | *__sig = __ebx; |
274 | return __eax; |
275 | } |
276 | |
277 | static __inline int __get_cpuid (unsigned int __leaf, unsigned int *__eax, |
278 | unsigned int *__ebx, unsigned int *__ecx, |
279 | unsigned int *__edx) |
280 | { |
281 | unsigned int __max_leaf = __get_cpuid_max(__leaf & 0x80000000, 0); |
282 | |
283 | if (__max_leaf == 0 || __max_leaf < __leaf) |
284 | return 0; |
285 | |
286 | __cpuid(__leaf, *__eax, *__ebx, *__ecx, *__edx)__asm(" xchgq %%rbx,%q1\n" " cpuid\n" " xchgq %%rbx,%q1" : "=a"(*__eax), "=r" (*__ebx), "=c"(*__ecx), "=d"(*__edx) : "0" (__leaf)); |
287 | return 1; |
288 | } |
289 | |
290 | static __inline int __get_cpuid_count (unsigned int __leaf, |
291 | unsigned int __subleaf, |
292 | unsigned int *__eax, unsigned int *__ebx, |
293 | unsigned int *__ecx, unsigned int *__edx) |
294 | { |
295 | unsigned int __max_leaf = __get_cpuid_max(__leaf & 0x80000000, 0); |
296 | |
297 | if (__max_leaf == 0 || __max_leaf < __leaf) |
298 | return 0; |
299 | |
300 | __cpuid_count(__leaf, __subleaf, *__eax, *__ebx, *__ecx, *__edx)__asm(" xchgq %%rbx,%q1\n" " cpuid\n" " xchgq %%rbx,%q1" : "=a"(*__eax), "=r" (*__ebx), "=c"(*__ecx), "=d"(*__edx) : "0" (__leaf), "2"(__subleaf)); |
301 | return 1; |
302 | } |